Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ f5fab862

History | View | Annotate | Download (470.4 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62

    
63
import ganeti.masterd.instance # pylint: disable-msg=W0611
64

    
65

    
66
class ResultWithJobs:
67
  """Data container for LU results with jobs.
68

69
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71
  contained in the C{jobs} attribute and include the job IDs in the opcode
72
  result.
73

74
  """
75
  def __init__(self, jobs, **kwargs):
76
    """Initializes this class.
77

78
    Additional return values can be specified as keyword arguments.
79

80
    @type jobs: list of lists of L{opcode.OpCode}
81
    @param jobs: A list of lists of opcode objects
82

83
    """
84
    self.jobs = jobs
85
    self.other = kwargs
86

    
87

    
88
class LogicalUnit(object):
89
  """Logical Unit base class.
90

91
  Subclasses must follow these rules:
92
    - implement ExpandNames
93
    - implement CheckPrereq (except when tasklets are used)
94
    - implement Exec (except when tasklets are used)
95
    - implement BuildHooksEnv
96
    - implement BuildHooksNodes
97
    - redefine HPATH and HTYPE
98
    - optionally redefine their run requirements:
99
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
100

101
  Note that all commands require root permissions.
102

103
  @ivar dry_run_result: the value (if any) that will be returned to the caller
104
      in dry-run mode (signalled by opcode dry_run parameter)
105

106
  """
107
  HPATH = None
108
  HTYPE = None
109
  REQ_BGL = True
110

    
111
  def __init__(self, processor, op, context, rpc):
112
    """Constructor for LogicalUnit.
113

114
    This needs to be overridden in derived classes in order to check op
115
    validity.
116

117
    """
118
    self.proc = processor
119
    self.op = op
120
    self.cfg = context.cfg
121
    self.glm = context.glm
122
    # readability alias
123
    self.owned_locks = context.glm.list_owned
124
    self.context = context
125
    self.rpc = rpc
126
    # Dicts used to declare locking needs to mcpu
127
    self.needed_locks = None
128
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
129
    self.add_locks = {}
130
    self.remove_locks = {}
131
    # Used to force good behavior when calling helper functions
132
    self.recalculate_locks = {}
133
    # logging
134
    self.Log = processor.Log # pylint: disable-msg=C0103
135
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
136
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
137
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
138
    # support for dry-run
139
    self.dry_run_result = None
140
    # support for generic debug attribute
141
    if (not hasattr(self.op, "debug_level") or
142
        not isinstance(self.op.debug_level, int)):
143
      self.op.debug_level = 0
144

    
145
    # Tasklets
146
    self.tasklets = None
147

    
148
    # Validate opcode parameters and set defaults
149
    self.op.Validate(True)
150

    
151
    self.CheckArguments()
152

    
153
  def CheckArguments(self):
154
    """Check syntactic validity for the opcode arguments.
155

156
    This method is for doing a simple syntactic check and ensure
157
    validity of opcode parameters, without any cluster-related
158
    checks. While the same can be accomplished in ExpandNames and/or
159
    CheckPrereq, doing these separate is better because:
160

161
      - ExpandNames is left as as purely a lock-related function
162
      - CheckPrereq is run after we have acquired locks (and possible
163
        waited for them)
164

165
    The function is allowed to change the self.op attribute so that
166
    later methods can no longer worry about missing parameters.
167

168
    """
169
    pass
170

    
171
  def ExpandNames(self):
172
    """Expand names for this LU.
173

174
    This method is called before starting to execute the opcode, and it should
175
    update all the parameters of the opcode to their canonical form (e.g. a
176
    short node name must be fully expanded after this method has successfully
177
    completed). This way locking, hooks, logging, etc. can work correctly.
178

179
    LUs which implement this method must also populate the self.needed_locks
180
    member, as a dict with lock levels as keys, and a list of needed lock names
181
    as values. Rules:
182

183
      - use an empty dict if you don't need any lock
184
      - if you don't need any lock at a particular level omit that level
185
      - don't put anything for the BGL level
186
      - if you want all locks at a level use locking.ALL_SET as a value
187

188
    If you need to share locks (rather than acquire them exclusively) at one
189
    level you can modify self.share_locks, setting a true value (usually 1) for
190
    that level. By default locks are not shared.
191

192
    This function can also define a list of tasklets, which then will be
193
    executed in order instead of the usual LU-level CheckPrereq and Exec
194
    functions, if those are not defined by the LU.
195

196
    Examples::
197

198
      # Acquire all nodes and one instance
199
      self.needed_locks = {
200
        locking.LEVEL_NODE: locking.ALL_SET,
201
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
202
      }
203
      # Acquire just two nodes
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206
      }
207
      # Acquire no locks
208
      self.needed_locks = {} # No, you can't leave it to the default value None
209

210
    """
211
    # The implementation of this method is mandatory only if the new LU is
212
    # concurrent, so that old LUs don't need to be changed all at the same
213
    # time.
214
    if self.REQ_BGL:
215
      self.needed_locks = {} # Exclusive LUs don't need locks.
216
    else:
217
      raise NotImplementedError
218

    
219
  def DeclareLocks(self, level):
220
    """Declare LU locking needs for a level
221

222
    While most LUs can just declare their locking needs at ExpandNames time,
223
    sometimes there's the need to calculate some locks after having acquired
224
    the ones before. This function is called just before acquiring locks at a
225
    particular level, but after acquiring the ones at lower levels, and permits
226
    such calculations. It can be used to modify self.needed_locks, and by
227
    default it does nothing.
228

229
    This function is only called if you have something already set in
230
    self.needed_locks for the level.
231

232
    @param level: Locking level which is going to be locked
233
    @type level: member of ganeti.locking.LEVELS
234

235
    """
236

    
237
  def CheckPrereq(self):
238
    """Check prerequisites for this LU.
239

240
    This method should check that the prerequisites for the execution
241
    of this LU are fulfilled. It can do internode communication, but
242
    it should be idempotent - no cluster or system changes are
243
    allowed.
244

245
    The method should raise errors.OpPrereqError in case something is
246
    not fulfilled. Its return value is ignored.
247

248
    This method should also update all the parameters of the opcode to
249
    their canonical form if it hasn't been done by ExpandNames before.
250

251
    """
252
    if self.tasklets is not None:
253
      for (idx, tl) in enumerate(self.tasklets):
254
        logging.debug("Checking prerequisites for tasklet %s/%s",
255
                      idx + 1, len(self.tasklets))
256
        tl.CheckPrereq()
257
    else:
258
      pass
259

    
260
  def Exec(self, feedback_fn):
261
    """Execute the LU.
262

263
    This method should implement the actual work. It should raise
264
    errors.OpExecError for failures that are somewhat dealt with in
265
    code, or expected.
266

267
    """
268
    if self.tasklets is not None:
269
      for (idx, tl) in enumerate(self.tasklets):
270
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271
        tl.Exec(feedback_fn)
272
    else:
273
      raise NotImplementedError
274

    
275
  def BuildHooksEnv(self):
276
    """Build hooks environment for this LU.
277

278
    @rtype: dict
279
    @return: Dictionary containing the environment that will be used for
280
      running the hooks for this LU. The keys of the dict must not be prefixed
281
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282
      will extend the environment with additional variables. If no environment
283
      should be defined, an empty dictionary should be returned (not C{None}).
284
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
285
      will not be called.
286

287
    """
288
    raise NotImplementedError
289

    
290
  def BuildHooksNodes(self):
291
    """Build list of nodes to run LU's hooks.
292

293
    @rtype: tuple; (list, list)
294
    @return: Tuple containing a list of node names on which the hook
295
      should run before the execution and a list of node names on which the
296
      hook should run after the execution. No nodes should be returned as an
297
      empty list (and not None).
298
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
299
      will not be called.
300

301
    """
302
    raise NotImplementedError
303

    
304
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305
    """Notify the LU about the results of its hooks.
306

307
    This method is called every time a hooks phase is executed, and notifies
308
    the Logical Unit about the hooks' result. The LU can then use it to alter
309
    its result based on the hooks.  By default the method does nothing and the
310
    previous result is passed back unchanged but any LU can define it if it
311
    wants to use the local cluster hook-scripts somehow.
312

313
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
314
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315
    @param hook_results: the results of the multi-node hooks rpc call
316
    @param feedback_fn: function used send feedback back to the caller
317
    @param lu_result: the previous Exec result this LU had, or None
318
        in the PRE phase
319
    @return: the new Exec result, based on the previous result
320
        and hook results
321

322
    """
323
    # API must be kept, thus we ignore the unused argument and could
324
    # be a function warnings
325
    # pylint: disable-msg=W0613,R0201
326
    return lu_result
327

    
328
  def _ExpandAndLockInstance(self):
329
    """Helper function to expand and lock an instance.
330

331
    Many LUs that work on an instance take its name in self.op.instance_name
332
    and need to expand it and then declare the expanded name for locking. This
333
    function does it, and then updates self.op.instance_name to the expanded
334
    name. It also initializes needed_locks as a dict, if this hasn't been done
335
    before.
336

337
    """
338
    if self.needed_locks is None:
339
      self.needed_locks = {}
340
    else:
341
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342
        "_ExpandAndLockInstance called with instance-level locks set"
343
    self.op.instance_name = _ExpandInstanceName(self.cfg,
344
                                                self.op.instance_name)
345
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
346

    
347
  def _LockInstancesNodes(self, primary_only=False):
348
    """Helper function to declare instances' nodes for locking.
349

350
    This function should be called after locking one or more instances to lock
351
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352
    with all primary or secondary nodes for instances already locked and
353
    present in self.needed_locks[locking.LEVEL_INSTANCE].
354

355
    It should be called from DeclareLocks, and for safety only works if
356
    self.recalculate_locks[locking.LEVEL_NODE] is set.
357

358
    In the future it may grow parameters to just lock some instance's nodes, or
359
    to just lock primaries or secondary nodes, if needed.
360

361
    If should be called in DeclareLocks in a way similar to::
362

363
      if level == locking.LEVEL_NODE:
364
        self._LockInstancesNodes()
365

366
    @type primary_only: boolean
367
    @param primary_only: only lock primary nodes of locked instances
368

369
    """
370
    assert locking.LEVEL_NODE in self.recalculate_locks, \
371
      "_LockInstancesNodes helper function called with no nodes to recalculate"
372

    
373
    # TODO: check if we're really been called with the instance locks held
374

    
375
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376
    # future we might want to have different behaviors depending on the value
377
    # of self.recalculate_locks[locking.LEVEL_NODE]
378
    wanted_nodes = []
379
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381
      wanted_nodes.append(instance.primary_node)
382
      if not primary_only:
383
        wanted_nodes.extend(instance.secondary_nodes)
384

    
385
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
389

    
390
    del self.recalculate_locks[locking.LEVEL_NODE]
391

    
392

    
393
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
394
  """Simple LU which runs no hooks.
395

396
  This LU is intended as a parent for other LogicalUnits which will
397
  run no hooks, in order to reduce duplicate code.
398

399
  """
400
  HPATH = None
401
  HTYPE = None
402

    
403
  def BuildHooksEnv(self):
404
    """Empty BuildHooksEnv for NoHooksLu.
405

406
    This just raises an error.
407

408
    """
409
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
410

    
411
  def BuildHooksNodes(self):
412
    """Empty BuildHooksNodes for NoHooksLU.
413

414
    """
415
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
416

    
417

    
418
class Tasklet:
419
  """Tasklet base class.
420

421
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
423
  tasklets know nothing about locks.
424

425
  Subclasses must follow these rules:
426
    - Implement CheckPrereq
427
    - Implement Exec
428

429
  """
430
  def __init__(self, lu):
431
    self.lu = lu
432

    
433
    # Shortcuts
434
    self.cfg = lu.cfg
435
    self.rpc = lu.rpc
436

    
437
  def CheckPrereq(self):
438
    """Check prerequisites for this tasklets.
439

440
    This method should check whether the prerequisites for the execution of
441
    this tasklet are fulfilled. It can do internode communication, but it
442
    should be idempotent - no cluster or system changes are allowed.
443

444
    The method should raise errors.OpPrereqError in case something is not
445
    fulfilled. Its return value is ignored.
446

447
    This method should also update all parameters to their canonical form if it
448
    hasn't been done before.
449

450
    """
451
    pass
452

    
453
  def Exec(self, feedback_fn):
454
    """Execute the tasklet.
455

456
    This method should implement the actual work. It should raise
457
    errors.OpExecError for failures that are somewhat dealt with in code, or
458
    expected.
459

460
    """
461
    raise NotImplementedError
462

    
463

    
464
class _QueryBase:
465
  """Base for query utility classes.
466

467
  """
468
  #: Attribute holding field definitions
469
  FIELDS = None
470

    
471
  def __init__(self, filter_, fields, use_locking):
472
    """Initializes this class.
473

474
    """
475
    self.use_locking = use_locking
476

    
477
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
478
                             namefield="name")
479
    self.requested_data = self.query.RequestedData()
480
    self.names = self.query.RequestedNames()
481

    
482
    # Sort only if no names were requested
483
    self.sort_by_name = not self.names
484

    
485
    self.do_locking = None
486
    self.wanted = None
487

    
488
  def _GetNames(self, lu, all_names, lock_level):
489
    """Helper function to determine names asked for in the query.
490

491
    """
492
    if self.do_locking:
493
      names = lu.owned_locks(lock_level)
494
    else:
495
      names = all_names
496

    
497
    if self.wanted == locking.ALL_SET:
498
      assert not self.names
499
      # caller didn't specify names, so ordering is not important
500
      return utils.NiceSort(names)
501

    
502
    # caller specified names and we must keep the same order
503
    assert self.names
504
    assert not self.do_locking or lu.glm.is_owned(lock_level)
505

    
506
    missing = set(self.wanted).difference(names)
507
    if missing:
508
      raise errors.OpExecError("Some items were removed before retrieving"
509
                               " their data: %s" % missing)
510

    
511
    # Return expanded names
512
    return self.wanted
513

    
514
  def ExpandNames(self, lu):
515
    """Expand names for this query.
516

517
    See L{LogicalUnit.ExpandNames}.
518

519
    """
520
    raise NotImplementedError()
521

    
522
  def DeclareLocks(self, lu, level):
523
    """Declare locks for this query.
524

525
    See L{LogicalUnit.DeclareLocks}.
526

527
    """
528
    raise NotImplementedError()
529

    
530
  def _GetQueryData(self, lu):
531
    """Collects all data for this query.
532

533
    @return: Query data object
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def NewStyleQuery(self, lu):
539
    """Collect data and execute query.
540

541
    """
542
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543
                                  sort_by_name=self.sort_by_name)
544

    
545
  def OldStyleQuery(self, lu):
546
    """Collect data and execute query.
547

548
    """
549
    return self.query.OldStyleQuery(self._GetQueryData(lu),
550
                                    sort_by_name=self.sort_by_name)
551

    
552

    
553
def _ShareAll():
554
  """Returns a dict declaring all lock levels shared.
555

556
  """
557
  return dict.fromkeys(locking.LEVELS, 1)
558

    
559

    
560
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
561
  """Checks if the owned node groups are still correct for an instance.
562

563
  @type cfg: L{config.ConfigWriter}
564
  @param cfg: The cluster configuration
565
  @type instance_name: string
566
  @param instance_name: Instance name
567
  @type owned_groups: set or frozenset
568
  @param owned_groups: List of currently owned node groups
569

570
  """
571
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
572

    
573
  if not owned_groups.issuperset(inst_groups):
574
    raise errors.OpPrereqError("Instance %s's node groups changed since"
575
                               " locks were acquired, current groups are"
576
                               " are '%s', owning groups '%s'; retry the"
577
                               " operation" %
578
                               (instance_name,
579
                                utils.CommaJoin(inst_groups),
580
                                utils.CommaJoin(owned_groups)),
581
                               errors.ECODE_STATE)
582

    
583
  return inst_groups
584

    
585

    
586
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
587
  """Checks if the instances in a node group are still correct.
588

589
  @type cfg: L{config.ConfigWriter}
590
  @param cfg: The cluster configuration
591
  @type group_uuid: string
592
  @param group_uuid: Node group UUID
593
  @type owned_instances: set or frozenset
594
  @param owned_instances: List of currently owned instances
595

596
  """
597
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
598
  if owned_instances != wanted_instances:
599
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
600
                               " locks were acquired, wanted '%s', have '%s';"
601
                               " retry the operation" %
602
                               (group_uuid,
603
                                utils.CommaJoin(wanted_instances),
604
                                utils.CommaJoin(owned_instances)),
605
                               errors.ECODE_STATE)
606

    
607
  return wanted_instances
608

    
609

    
610
def _SupportsOob(cfg, node):
611
  """Tells if node supports OOB.
612

613
  @type cfg: L{config.ConfigWriter}
614
  @param cfg: The cluster configuration
615
  @type node: L{objects.Node}
616
  @param node: The node
617
  @return: The OOB script if supported or an empty string otherwise
618

619
  """
620
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
621

    
622

    
623
def _GetWantedNodes(lu, nodes):
624
  """Returns list of checked and expanded node names.
625

626
  @type lu: L{LogicalUnit}
627
  @param lu: the logical unit on whose behalf we execute
628
  @type nodes: list
629
  @param nodes: list of node names or None for all nodes
630
  @rtype: list
631
  @return: the list of nodes, sorted
632
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
633

634
  """
635
  if nodes:
636
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
637

    
638
  return utils.NiceSort(lu.cfg.GetNodeList())
639

    
640

    
641
def _GetWantedInstances(lu, instances):
642
  """Returns list of checked and expanded instance names.
643

644
  @type lu: L{LogicalUnit}
645
  @param lu: the logical unit on whose behalf we execute
646
  @type instances: list
647
  @param instances: list of instance names or None for all instances
648
  @rtype: list
649
  @return: the list of instances, sorted
650
  @raise errors.OpPrereqError: if the instances parameter is wrong type
651
  @raise errors.OpPrereqError: if any of the passed instances is not found
652

653
  """
654
  if instances:
655
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
656
  else:
657
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
658
  return wanted
659

    
660

    
661
def _GetUpdatedParams(old_params, update_dict,
662
                      use_default=True, use_none=False):
663
  """Return the new version of a parameter dictionary.
664

665
  @type old_params: dict
666
  @param old_params: old parameters
667
  @type update_dict: dict
668
  @param update_dict: dict containing new parameter values, or
669
      constants.VALUE_DEFAULT to reset the parameter to its default
670
      value
671
  @param use_default: boolean
672
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
673
      values as 'to be deleted' values
674
  @param use_none: boolean
675
  @type use_none: whether to recognise C{None} values as 'to be
676
      deleted' values
677
  @rtype: dict
678
  @return: the new parameter dictionary
679

680
  """
681
  params_copy = copy.deepcopy(old_params)
682
  for key, val in update_dict.iteritems():
683
    if ((use_default and val == constants.VALUE_DEFAULT) or
684
        (use_none and val is None)):
685
      try:
686
        del params_copy[key]
687
      except KeyError:
688
        pass
689
    else:
690
      params_copy[key] = val
691
  return params_copy
692

    
693

    
694
def _ReleaseLocks(lu, level, names=None, keep=None):
695
  """Releases locks owned by an LU.
696

697
  @type lu: L{LogicalUnit}
698
  @param level: Lock level
699
  @type names: list or None
700
  @param names: Names of locks to release
701
  @type keep: list or None
702
  @param keep: Names of locks to retain
703

704
  """
705
  assert not (keep is not None and names is not None), \
706
         "Only one of the 'names' and the 'keep' parameters can be given"
707

    
708
  if names is not None:
709
    should_release = names.__contains__
710
  elif keep:
711
    should_release = lambda name: name not in keep
712
  else:
713
    should_release = None
714

    
715
  if should_release:
716
    retain = []
717
    release = []
718

    
719
    # Determine which locks to release
720
    for name in lu.owned_locks(level):
721
      if should_release(name):
722
        release.append(name)
723
      else:
724
        retain.append(name)
725

    
726
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
727

    
728
    # Release just some locks
729
    lu.glm.release(level, names=release)
730

    
731
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
732
  else:
733
    # Release everything
734
    lu.glm.release(level)
735

    
736
    assert not lu.glm.is_owned(level), "No locks should be owned"
737

    
738

    
739
def _MapInstanceDisksToNodes(instances):
740
  """Creates a map from (node, volume) to instance name.
741

742
  @type instances: list of L{objects.Instance}
743
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
744

745
  """
746
  return dict(((node, vol), inst.name)
747
              for inst in instances
748
              for (node, vols) in inst.MapLVsByNode().items()
749
              for vol in vols)
750

    
751

    
752
def _RunPostHook(lu, node_name):
753
  """Runs the post-hook for an opcode on a single node.
754

755
  """
756
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
757
  try:
758
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
759
  except:
760
    # pylint: disable-msg=W0702
761
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
762

    
763

    
764
def _CheckOutputFields(static, dynamic, selected):
765
  """Checks whether all selected fields are valid.
766

767
  @type static: L{utils.FieldSet}
768
  @param static: static fields set
769
  @type dynamic: L{utils.FieldSet}
770
  @param dynamic: dynamic fields set
771

772
  """
773
  f = utils.FieldSet()
774
  f.Extend(static)
775
  f.Extend(dynamic)
776

    
777
  delta = f.NonMatching(selected)
778
  if delta:
779
    raise errors.OpPrereqError("Unknown output fields selected: %s"
780
                               % ",".join(delta), errors.ECODE_INVAL)
781

    
782

    
783
def _CheckGlobalHvParams(params):
784
  """Validates that given hypervisor params are not global ones.
785

786
  This will ensure that instances don't get customised versions of
787
  global params.
788

789
  """
790
  used_globals = constants.HVC_GLOBALS.intersection(params)
791
  if used_globals:
792
    msg = ("The following hypervisor parameters are global and cannot"
793
           " be customized at instance level, please modify them at"
794
           " cluster level: %s" % utils.CommaJoin(used_globals))
795
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
796

    
797

    
798
def _CheckNodeOnline(lu, node, msg=None):
799
  """Ensure that a given node is online.
800

801
  @param lu: the LU on behalf of which we make the check
802
  @param node: the node to check
803
  @param msg: if passed, should be a message to replace the default one
804
  @raise errors.OpPrereqError: if the node is offline
805

806
  """
807
  if msg is None:
808
    msg = "Can't use offline node"
809
  if lu.cfg.GetNodeInfo(node).offline:
810
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
811

    
812

    
813
def _CheckNodeNotDrained(lu, node):
814
  """Ensure that a given node is not drained.
815

816
  @param lu: the LU on behalf of which we make the check
817
  @param node: the node to check
818
  @raise errors.OpPrereqError: if the node is drained
819

820
  """
821
  if lu.cfg.GetNodeInfo(node).drained:
822
    raise errors.OpPrereqError("Can't use drained node %s" % node,
823
                               errors.ECODE_STATE)
824

    
825

    
826
def _CheckNodeVmCapable(lu, node):
827
  """Ensure that a given node is vm capable.
828

829
  @param lu: the LU on behalf of which we make the check
830
  @param node: the node to check
831
  @raise errors.OpPrereqError: if the node is not vm capable
832

833
  """
834
  if not lu.cfg.GetNodeInfo(node).vm_capable:
835
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
836
                               errors.ECODE_STATE)
837

    
838

    
839
def _CheckNodeHasOS(lu, node, os_name, force_variant):
840
  """Ensure that a node supports a given OS.
841

842
  @param lu: the LU on behalf of which we make the check
843
  @param node: the node to check
844
  @param os_name: the OS to query about
845
  @param force_variant: whether to ignore variant errors
846
  @raise errors.OpPrereqError: if the node is not supporting the OS
847

848
  """
849
  result = lu.rpc.call_os_get(node, os_name)
850
  result.Raise("OS '%s' not in supported OS list for node %s" %
851
               (os_name, node),
852
               prereq=True, ecode=errors.ECODE_INVAL)
853
  if not force_variant:
854
    _CheckOSVariant(result.payload, os_name)
855

    
856

    
857
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
858
  """Ensure that a node has the given secondary ip.
859

860
  @type lu: L{LogicalUnit}
861
  @param lu: the LU on behalf of which we make the check
862
  @type node: string
863
  @param node: the node to check
864
  @type secondary_ip: string
865
  @param secondary_ip: the ip to check
866
  @type prereq: boolean
867
  @param prereq: whether to throw a prerequisite or an execute error
868
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
869
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
870

871
  """
872
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
873
  result.Raise("Failure checking secondary ip on node %s" % node,
874
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
875
  if not result.payload:
876
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
877
           " please fix and re-run this command" % secondary_ip)
878
    if prereq:
879
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
880
    else:
881
      raise errors.OpExecError(msg)
882

    
883

    
884
def _GetClusterDomainSecret():
885
  """Reads the cluster domain secret.
886

887
  """
888
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
889
                               strict=True)
890

    
891

    
892
def _CheckInstanceDown(lu, instance, reason):
893
  """Ensure that an instance is not running."""
894
  if instance.admin_up:
895
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
896
                               (instance.name, reason), errors.ECODE_STATE)
897

    
898
  pnode = instance.primary_node
899
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
900
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
901
              prereq=True, ecode=errors.ECODE_ENVIRON)
902

    
903
  if instance.name in ins_l.payload:
904
    raise errors.OpPrereqError("Instance %s is running, %s" %
905
                               (instance.name, reason), errors.ECODE_STATE)
906

    
907

    
908
def _ExpandItemName(fn, name, kind):
909
  """Expand an item name.
910

911
  @param fn: the function to use for expansion
912
  @param name: requested item name
913
  @param kind: text description ('Node' or 'Instance')
914
  @return: the resolved (full) name
915
  @raise errors.OpPrereqError: if the item is not found
916

917
  """
918
  full_name = fn(name)
919
  if full_name is None:
920
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
921
                               errors.ECODE_NOENT)
922
  return full_name
923

    
924

    
925
def _ExpandNodeName(cfg, name):
926
  """Wrapper over L{_ExpandItemName} for nodes."""
927
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
928

    
929

    
930
def _ExpandInstanceName(cfg, name):
931
  """Wrapper over L{_ExpandItemName} for instance."""
932
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
933

    
934

    
935
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
936
                          memory, vcpus, nics, disk_template, disks,
937
                          bep, hvp, hypervisor_name, tags):
938
  """Builds instance related env variables for hooks
939

940
  This builds the hook environment from individual variables.
941

942
  @type name: string
943
  @param name: the name of the instance
944
  @type primary_node: string
945
  @param primary_node: the name of the instance's primary node
946
  @type secondary_nodes: list
947
  @param secondary_nodes: list of secondary nodes as strings
948
  @type os_type: string
949
  @param os_type: the name of the instance's OS
950
  @type status: boolean
951
  @param status: the should_run status of the instance
952
  @type memory: string
953
  @param memory: the memory size of the instance
954
  @type vcpus: string
955
  @param vcpus: the count of VCPUs the instance has
956
  @type nics: list
957
  @param nics: list of tuples (ip, mac, mode, link) representing
958
      the NICs the instance has
959
  @type disk_template: string
960
  @param disk_template: the disk template of the instance
961
  @type disks: list
962
  @param disks: the list of (size, mode) pairs
963
  @type bep: dict
964
  @param bep: the backend parameters for the instance
965
  @type hvp: dict
966
  @param hvp: the hypervisor parameters for the instance
967
  @type hypervisor_name: string
968
  @param hypervisor_name: the hypervisor for the instance
969
  @type tags: list
970
  @param tags: list of instance tags as strings
971
  @rtype: dict
972
  @return: the hook environment for this instance
973

974
  """
975
  if status:
976
    str_status = "up"
977
  else:
978
    str_status = "down"
979
  env = {
980
    "OP_TARGET": name,
981
    "INSTANCE_NAME": name,
982
    "INSTANCE_PRIMARY": primary_node,
983
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
984
    "INSTANCE_OS_TYPE": os_type,
985
    "INSTANCE_STATUS": str_status,
986
    "INSTANCE_MEMORY": memory,
987
    "INSTANCE_VCPUS": vcpus,
988
    "INSTANCE_DISK_TEMPLATE": disk_template,
989
    "INSTANCE_HYPERVISOR": hypervisor_name,
990
  }
991

    
992
  if nics:
993
    nic_count = len(nics)
994
    for idx, (ip, mac, mode, link) in enumerate(nics):
995
      if ip is None:
996
        ip = ""
997
      env["INSTANCE_NIC%d_IP" % idx] = ip
998
      env["INSTANCE_NIC%d_MAC" % idx] = mac
999
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1000
      env["INSTANCE_NIC%d_LINK" % idx] = link
1001
      if mode == constants.NIC_MODE_BRIDGED:
1002
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003
  else:
1004
    nic_count = 0
1005

    
1006
  env["INSTANCE_NIC_COUNT"] = nic_count
1007

    
1008
  if disks:
1009
    disk_count = len(disks)
1010
    for idx, (size, mode) in enumerate(disks):
1011
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1012
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1013
  else:
1014
    disk_count = 0
1015

    
1016
  env["INSTANCE_DISK_COUNT"] = disk_count
1017

    
1018
  if not tags:
1019
    tags = []
1020

    
1021
  env["INSTANCE_TAGS"] = " ".join(tags)
1022

    
1023
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024
    for key, value in source.items():
1025
      env["INSTANCE_%s_%s" % (kind, key)] = value
1026

    
1027
  return env
1028

    
1029

    
1030
def _NICListToTuple(lu, nics):
1031
  """Build a list of nic information tuples.
1032

1033
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034
  value in LUInstanceQueryData.
1035

1036
  @type lu:  L{LogicalUnit}
1037
  @param lu: the logical unit on whose behalf we execute
1038
  @type nics: list of L{objects.NIC}
1039
  @param nics: list of nics to convert to hooks tuples
1040

1041
  """
1042
  hooks_nics = []
1043
  cluster = lu.cfg.GetClusterInfo()
1044
  for nic in nics:
1045
    ip = nic.ip
1046
    mac = nic.mac
1047
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048
    mode = filled_params[constants.NIC_MODE]
1049
    link = filled_params[constants.NIC_LINK]
1050
    hooks_nics.append((ip, mac, mode, link))
1051
  return hooks_nics
1052

    
1053

    
1054
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055
  """Builds instance related env variables for hooks from an object.
1056

1057
  @type lu: L{LogicalUnit}
1058
  @param lu: the logical unit on whose behalf we execute
1059
  @type instance: L{objects.Instance}
1060
  @param instance: the instance for which we should build the
1061
      environment
1062
  @type override: dict
1063
  @param override: dictionary with key/values that will override
1064
      our values
1065
  @rtype: dict
1066
  @return: the hook environment dictionary
1067

1068
  """
1069
  cluster = lu.cfg.GetClusterInfo()
1070
  bep = cluster.FillBE(instance)
1071
  hvp = cluster.FillHV(instance)
1072
  args = {
1073
    "name": instance.name,
1074
    "primary_node": instance.primary_node,
1075
    "secondary_nodes": instance.secondary_nodes,
1076
    "os_type": instance.os,
1077
    "status": instance.admin_up,
1078
    "memory": bep[constants.BE_MEMORY],
1079
    "vcpus": bep[constants.BE_VCPUS],
1080
    "nics": _NICListToTuple(lu, instance.nics),
1081
    "disk_template": instance.disk_template,
1082
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083
    "bep": bep,
1084
    "hvp": hvp,
1085
    "hypervisor_name": instance.hypervisor,
1086
    "tags": instance.tags,
1087
  }
1088
  if override:
1089
    args.update(override)
1090
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1091

    
1092

    
1093
def _AdjustCandidatePool(lu, exceptions):
1094
  """Adjust the candidate pool after node operations.
1095

1096
  """
1097
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098
  if mod_list:
1099
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1100
               utils.CommaJoin(node.name for node in mod_list))
1101
    for name in mod_list:
1102
      lu.context.ReaddNode(name)
1103
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104
  if mc_now > mc_max:
1105
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106
               (mc_now, mc_max))
1107

    
1108

    
1109
def _DecideSelfPromotion(lu, exceptions=None):
1110
  """Decide whether I should promote myself as a master candidate.
1111

1112
  """
1113
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115
  # the new node will increase mc_max with one, so:
1116
  mc_should = min(mc_should + 1, cp_size)
1117
  return mc_now < mc_should
1118

    
1119

    
1120
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121
  """Check that the brigdes needed by a list of nics exist.
1122

1123
  """
1124
  cluster = lu.cfg.GetClusterInfo()
1125
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126
  brlist = [params[constants.NIC_LINK] for params in paramslist
1127
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128
  if brlist:
1129
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1130
    result.Raise("Error checking bridges on destination node '%s'" %
1131
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132

    
1133

    
1134
def _CheckInstanceBridgesExist(lu, instance, node=None):
1135
  """Check that the brigdes needed by an instance exist.
1136

1137
  """
1138
  if node is None:
1139
    node = instance.primary_node
1140
  _CheckNicsBridgesExist(lu, instance.nics, node)
1141

    
1142

    
1143
def _CheckOSVariant(os_obj, name):
1144
  """Check whether an OS name conforms to the os variants specification.
1145

1146
  @type os_obj: L{objects.OS}
1147
  @param os_obj: OS object to check
1148
  @type name: string
1149
  @param name: OS name passed by the user, to check for validity
1150

1151
  """
1152
  variant = objects.OS.GetVariant(name)
1153
  if not os_obj.supported_variants:
1154
    if variant:
1155
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156
                                 " passed)" % (os_obj.name, variant),
1157
                                 errors.ECODE_INVAL)
1158
    return
1159
  if not variant:
1160
    raise errors.OpPrereqError("OS name must include a variant",
1161
                               errors.ECODE_INVAL)
1162

    
1163
  if variant not in os_obj.supported_variants:
1164
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165

    
1166

    
1167
def _GetNodeInstancesInner(cfg, fn):
1168
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169

    
1170

    
1171
def _GetNodeInstances(cfg, node_name):
1172
  """Returns a list of all primary and secondary instances on a node.
1173

1174
  """
1175

    
1176
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177

    
1178

    
1179
def _GetNodePrimaryInstances(cfg, node_name):
1180
  """Returns primary instances on a node.
1181

1182
  """
1183
  return _GetNodeInstancesInner(cfg,
1184
                                lambda inst: node_name == inst.primary_node)
1185

    
1186

    
1187
def _GetNodeSecondaryInstances(cfg, node_name):
1188
  """Returns secondary instances on a node.
1189

1190
  """
1191
  return _GetNodeInstancesInner(cfg,
1192
                                lambda inst: node_name in inst.secondary_nodes)
1193

    
1194

    
1195
def _GetStorageTypeArgs(cfg, storage_type):
1196
  """Returns the arguments for a storage type.
1197

1198
  """
1199
  # Special case for file storage
1200
  if storage_type == constants.ST_FILE:
1201
    # storage.FileStorage wants a list of storage directories
1202
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203

    
1204
  return []
1205

    
1206

    
1207
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208
  faulty = []
1209

    
1210
  for dev in instance.disks:
1211
    cfg.SetDiskID(dev, node_name)
1212

    
1213
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214
  result.Raise("Failed to get disk status from node %s" % node_name,
1215
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216

    
1217
  for idx, bdev_status in enumerate(result.payload):
1218
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219
      faulty.append(idx)
1220

    
1221
  return faulty
1222

    
1223

    
1224
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225
  """Check the sanity of iallocator and node arguments and use the
1226
  cluster-wide iallocator if appropriate.
1227

1228
  Check that at most one of (iallocator, node) is specified. If none is
1229
  specified, then the LU's opcode's iallocator slot is filled with the
1230
  cluster-wide default iallocator.
1231

1232
  @type iallocator_slot: string
1233
  @param iallocator_slot: the name of the opcode iallocator slot
1234
  @type node_slot: string
1235
  @param node_slot: the name of the opcode target node slot
1236

1237
  """
1238
  node = getattr(lu.op, node_slot, None)
1239
  iallocator = getattr(lu.op, iallocator_slot, None)
1240

    
1241
  if node is not None and iallocator is not None:
1242
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243
                               errors.ECODE_INVAL)
1244
  elif node is None and iallocator is None:
1245
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1246
    if default_iallocator:
1247
      setattr(lu.op, iallocator_slot, default_iallocator)
1248
    else:
1249
      raise errors.OpPrereqError("No iallocator or node given and no"
1250
                                 " cluster-wide default iallocator found;"
1251
                                 " please specify either an iallocator or a"
1252
                                 " node, or set a cluster-wide default"
1253
                                 " iallocator")
1254

    
1255

    
1256
def _GetDefaultIAllocator(cfg, iallocator):
1257
  """Decides on which iallocator to use.
1258

1259
  @type cfg: L{config.ConfigWriter}
1260
  @param cfg: Cluster configuration object
1261
  @type iallocator: string or None
1262
  @param iallocator: Iallocator specified in opcode
1263
  @rtype: string
1264
  @return: Iallocator name
1265

1266
  """
1267
  if not iallocator:
1268
    # Use default iallocator
1269
    iallocator = cfg.GetDefaultIAllocator()
1270

    
1271
  if not iallocator:
1272
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273
                               " opcode nor as a cluster-wide default",
1274
                               errors.ECODE_INVAL)
1275

    
1276
  return iallocator
1277

    
1278

    
1279
class LUClusterPostInit(LogicalUnit):
1280
  """Logical unit for running hooks after cluster initialization.
1281

1282
  """
1283
  HPATH = "cluster-init"
1284
  HTYPE = constants.HTYPE_CLUSTER
1285

    
1286
  def BuildHooksEnv(self):
1287
    """Build hooks env.
1288

1289
    """
1290
    return {
1291
      "OP_TARGET": self.cfg.GetClusterName(),
1292
      }
1293

    
1294
  def BuildHooksNodes(self):
1295
    """Build hooks nodes.
1296

1297
    """
1298
    return ([], [self.cfg.GetMasterNode()])
1299

    
1300
  def Exec(self, feedback_fn):
1301
    """Nothing to do.
1302

1303
    """
1304
    return True
1305

    
1306

    
1307
class LUClusterDestroy(LogicalUnit):
1308
  """Logical unit for destroying the cluster.
1309

1310
  """
1311
  HPATH = "cluster-destroy"
1312
  HTYPE = constants.HTYPE_CLUSTER
1313

    
1314
  def BuildHooksEnv(self):
1315
    """Build hooks env.
1316

1317
    """
1318
    return {
1319
      "OP_TARGET": self.cfg.GetClusterName(),
1320
      }
1321

    
1322
  def BuildHooksNodes(self):
1323
    """Build hooks nodes.
1324

1325
    """
1326
    return ([], [])
1327

    
1328
  def CheckPrereq(self):
1329
    """Check prerequisites.
1330

1331
    This checks whether the cluster is empty.
1332

1333
    Any errors are signaled by raising errors.OpPrereqError.
1334

1335
    """
1336
    master = self.cfg.GetMasterNode()
1337

    
1338
    nodelist = self.cfg.GetNodeList()
1339
    if len(nodelist) != 1 or nodelist[0] != master:
1340
      raise errors.OpPrereqError("There are still %d node(s) in"
1341
                                 " this cluster." % (len(nodelist) - 1),
1342
                                 errors.ECODE_INVAL)
1343
    instancelist = self.cfg.GetInstanceList()
1344
    if instancelist:
1345
      raise errors.OpPrereqError("There are still %d instance(s) in"
1346
                                 " this cluster." % len(instancelist),
1347
                                 errors.ECODE_INVAL)
1348

    
1349
  def Exec(self, feedback_fn):
1350
    """Destroys the cluster.
1351

1352
    """
1353
    master = self.cfg.GetMasterNode()
1354

    
1355
    # Run post hooks on master node before it's removed
1356
    _RunPostHook(self, master)
1357

    
1358
    result = self.rpc.call_node_stop_master(master, False)
1359
    result.Raise("Could not disable the master role")
1360

    
1361
    return master
1362

    
1363

    
1364
def _VerifyCertificate(filename):
1365
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1366

1367
  @type filename: string
1368
  @param filename: Path to PEM file
1369

1370
  """
1371
  try:
1372
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373
                                           utils.ReadFile(filename))
1374
  except Exception, err: # pylint: disable-msg=W0703
1375
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1376
            "Failed to load X509 certificate %s: %s" % (filename, err))
1377

    
1378
  (errcode, msg) = \
1379
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380
                                constants.SSL_CERT_EXPIRATION_ERROR)
1381

    
1382
  if msg:
1383
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1384
  else:
1385
    fnamemsg = None
1386

    
1387
  if errcode is None:
1388
    return (None, fnamemsg)
1389
  elif errcode == utils.CERT_WARNING:
1390
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391
  elif errcode == utils.CERT_ERROR:
1392
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393

    
1394
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395

    
1396

    
1397
def _GetAllHypervisorParameters(cluster, instances):
1398
  """Compute the set of all hypervisor parameters.
1399

1400
  @type cluster: L{objects.Cluster}
1401
  @param cluster: the cluster object
1402
  @param instances: list of L{objects.Instance}
1403
  @param instances: additional instances from which to obtain parameters
1404
  @rtype: list of (origin, hypervisor, parameters)
1405
  @return: a list with all parameters found, indicating the hypervisor they
1406
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407

1408
  """
1409
  hvp_data = []
1410

    
1411
  for hv_name in cluster.enabled_hypervisors:
1412
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413

    
1414
  for os_name, os_hvp in cluster.os_hvp.items():
1415
    for hv_name, hv_params in os_hvp.items():
1416
      if hv_params:
1417
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419

    
1420
  # TODO: collapse identical parameter values in a single one
1421
  for instance in instances:
1422
    if instance.hvparams:
1423
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424
                       cluster.FillHV(instance)))
1425

    
1426
  return hvp_data
1427

    
1428

    
1429
class _VerifyErrors(object):
1430
  """Mix-in for cluster/group verify LUs.
1431

1432
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433
  self.op and self._feedback_fn to be available.)
1434

1435
  """
1436
  TCLUSTER = "cluster"
1437
  TNODE = "node"
1438
  TINSTANCE = "instance"
1439

    
1440
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452
  ENODEDRBD = (TNODE, "ENODEDRBD")
1453
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456
  ENODEHV = (TNODE, "ENODEHV")
1457
  ENODELVM = (TNODE, "ENODELVM")
1458
  ENODEN1 = (TNODE, "ENODEN1")
1459
  ENODENET = (TNODE, "ENODENET")
1460
  ENODEOS = (TNODE, "ENODEOS")
1461
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463
  ENODERPC = (TNODE, "ENODERPC")
1464
  ENODESSH = (TNODE, "ENODESSH")
1465
  ENODEVERSION = (TNODE, "ENODEVERSION")
1466
  ENODESETUP = (TNODE, "ENODESETUP")
1467
  ENODETIME = (TNODE, "ENODETIME")
1468
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1469

    
1470
  ETYPE_FIELD = "code"
1471
  ETYPE_ERROR = "ERROR"
1472
  ETYPE_WARNING = "WARNING"
1473

    
1474
  def _Error(self, ecode, item, msg, *args, **kwargs):
1475
    """Format an error message.
1476

1477
    Based on the opcode's error_codes parameter, either format a
1478
    parseable error code, or a simpler error string.
1479

1480
    This must be called only from Exec and functions called from Exec.
1481

1482
    """
1483
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1484
    itype, etxt = ecode
1485
    # first complete the msg
1486
    if args:
1487
      msg = msg % args
1488
    # then format the whole message
1489
    if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1490
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1491
    else:
1492
      if item:
1493
        item = " " + item
1494
      else:
1495
        item = ""
1496
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497
    # and finally report it via the feedback_fn
1498
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1499

    
1500
  def _ErrorIf(self, cond, *args, **kwargs):
1501
    """Log an error message if the passed condition is True.
1502

1503
    """
1504
    cond = (bool(cond)
1505
            or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1506
    if cond:
1507
      self._Error(*args, **kwargs)
1508
    # do not mark the operation as failed for WARN cases only
1509
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510
      self.bad = self.bad or cond
1511

    
1512

    
1513
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1514
  """Verifies the cluster config.
1515

1516
  """
1517
  REQ_BGL = True
1518

    
1519
  def _VerifyHVP(self, hvp_data):
1520
    """Verifies locally the syntax of the hypervisor parameters.
1521

1522
    """
1523
    for item, hv_name, hv_params in hvp_data:
1524
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1525
             (item, hv_name))
1526
      try:
1527
        hv_class = hypervisor.GetHypervisor(hv_name)
1528
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1529
        hv_class.CheckParameterSyntax(hv_params)
1530
      except errors.GenericError, err:
1531
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1532

    
1533
  def ExpandNames(self):
1534
    # Information can be safely retrieved as the BGL is acquired in exclusive
1535
    # mode
1536
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1537
    self.all_node_info = self.cfg.GetAllNodesInfo()
1538
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1539
    self.needed_locks = {}
1540

    
1541
  def Exec(self, feedback_fn):
1542
    """Verify integrity of cluster, performing various test on nodes.
1543

1544
    """
1545
    self.bad = False
1546
    self._feedback_fn = feedback_fn
1547

    
1548
    feedback_fn("* Verifying cluster config")
1549

    
1550
    for msg in self.cfg.VerifyConfig():
1551
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1552

    
1553
    feedback_fn("* Verifying cluster certificate files")
1554

    
1555
    for cert_filename in constants.ALL_CERT_FILES:
1556
      (errcode, msg) = _VerifyCertificate(cert_filename)
1557
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1558

    
1559
    feedback_fn("* Verifying hypervisor parameters")
1560

    
1561
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1562
                                                self.all_inst_info.values()))
1563

    
1564
    feedback_fn("* Verifying all nodes belong to an existing group")
1565

    
1566
    # We do this verification here because, should this bogus circumstance
1567
    # occur, it would never be caught by VerifyGroup, which only acts on
1568
    # nodes/instances reachable from existing node groups.
1569

    
1570
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1571
                         if node.group not in self.all_group_info)
1572

    
1573
    dangling_instances = {}
1574
    no_node_instances = []
1575

    
1576
    for inst in self.all_inst_info.values():
1577
      if inst.primary_node in dangling_nodes:
1578
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1579
      elif inst.primary_node not in self.all_node_info:
1580
        no_node_instances.append(inst.name)
1581

    
1582
    pretty_dangling = [
1583
        "%s (%s)" %
1584
        (node.name,
1585
         utils.CommaJoin(dangling_instances.get(node.name,
1586
                                                ["no instances"])))
1587
        for node in dangling_nodes]
1588

    
1589
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1590
                  "the following nodes (and their instances) belong to a non"
1591
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1592

    
1593
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1594
                  "the following instances have a non-existing primary-node:"
1595
                  " %s", utils.CommaJoin(no_node_instances))
1596

    
1597
    return (not self.bad, [g.name for g in self.all_group_info.values()])
1598

    
1599

    
1600
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1601
  """Verifies the status of a node group.
1602

1603
  """
1604
  HPATH = "cluster-verify"
1605
  HTYPE = constants.HTYPE_CLUSTER
1606
  REQ_BGL = False
1607

    
1608
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1609

    
1610
  class NodeImage(object):
1611
    """A class representing the logical and physical status of a node.
1612

1613
    @type name: string
1614
    @ivar name: the node name to which this object refers
1615
    @ivar volumes: a structure as returned from
1616
        L{ganeti.backend.GetVolumeList} (runtime)
1617
    @ivar instances: a list of running instances (runtime)
1618
    @ivar pinst: list of configured primary instances (config)
1619
    @ivar sinst: list of configured secondary instances (config)
1620
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1621
        instances for which this node is secondary (config)
1622
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1623
    @ivar dfree: free disk, as reported by the node (runtime)
1624
    @ivar offline: the offline status (config)
1625
    @type rpc_fail: boolean
1626
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1627
        not whether the individual keys were correct) (runtime)
1628
    @type lvm_fail: boolean
1629
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1630
    @type hyp_fail: boolean
1631
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1632
    @type ghost: boolean
1633
    @ivar ghost: whether this is a known node or not (config)
1634
    @type os_fail: boolean
1635
    @ivar os_fail: whether the RPC call didn't return valid OS data
1636
    @type oslist: list
1637
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1638
    @type vm_capable: boolean
1639
    @ivar vm_capable: whether the node can host instances
1640

1641
    """
1642
    def __init__(self, offline=False, name=None, vm_capable=True):
1643
      self.name = name
1644
      self.volumes = {}
1645
      self.instances = []
1646
      self.pinst = []
1647
      self.sinst = []
1648
      self.sbp = {}
1649
      self.mfree = 0
1650
      self.dfree = 0
1651
      self.offline = offline
1652
      self.vm_capable = vm_capable
1653
      self.rpc_fail = False
1654
      self.lvm_fail = False
1655
      self.hyp_fail = False
1656
      self.ghost = False
1657
      self.os_fail = False
1658
      self.oslist = {}
1659

    
1660
  def ExpandNames(self):
1661
    # This raises errors.OpPrereqError on its own:
1662
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1663

    
1664
    # Get instances in node group; this is unsafe and needs verification later
1665
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1666

    
1667
    self.needed_locks = {
1668
      locking.LEVEL_INSTANCE: inst_names,
1669
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1670
      locking.LEVEL_NODE: [],
1671
      }
1672

    
1673
    self.share_locks = _ShareAll()
1674

    
1675
  def DeclareLocks(self, level):
1676
    if level == locking.LEVEL_NODE:
1677
      # Get members of node group; this is unsafe and needs verification later
1678
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1679

    
1680
      all_inst_info = self.cfg.GetAllInstancesInfo()
1681

    
1682
      # In Exec(), we warn about mirrored instances that have primary and
1683
      # secondary living in separate node groups. To fully verify that
1684
      # volumes for these instances are healthy, we will need to do an
1685
      # extra call to their secondaries. We ensure here those nodes will
1686
      # be locked.
1687
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1688
        # Important: access only the instances whose lock is owned
1689
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1690
          nodes.update(all_inst_info[inst].secondary_nodes)
1691

    
1692
      self.needed_locks[locking.LEVEL_NODE] = nodes
1693

    
1694
  def CheckPrereq(self):
1695
    group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1696
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1697

    
1698
    unlocked_nodes = \
1699
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1700

    
1701
    unlocked_instances = \
1702
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1703

    
1704
    if unlocked_nodes:
1705
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1706
                                 utils.CommaJoin(unlocked_nodes))
1707

    
1708
    if unlocked_instances:
1709
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1710
                                 utils.CommaJoin(unlocked_instances))
1711

    
1712
    self.all_node_info = self.cfg.GetAllNodesInfo()
1713
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1714

    
1715
    self.my_node_names = utils.NiceSort(group_nodes)
1716
    self.my_inst_names = utils.NiceSort(group_instances)
1717

    
1718
    self.my_node_info = dict((name, self.all_node_info[name])
1719
                             for name in self.my_node_names)
1720

    
1721
    self.my_inst_info = dict((name, self.all_inst_info[name])
1722
                             for name in self.my_inst_names)
1723

    
1724
    # We detect here the nodes that will need the extra RPC calls for verifying
1725
    # split LV volumes; they should be locked.
1726
    extra_lv_nodes = set()
1727

    
1728
    for inst in self.my_inst_info.values():
1729
      if inst.disk_template in constants.DTS_INT_MIRROR:
1730
        group = self.my_node_info[inst.primary_node].group
1731
        for nname in inst.secondary_nodes:
1732
          if self.all_node_info[nname].group != group:
1733
            extra_lv_nodes.add(nname)
1734

    
1735
    unlocked_lv_nodes = \
1736
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1737

    
1738
    if unlocked_lv_nodes:
1739
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1740
                                 utils.CommaJoin(unlocked_lv_nodes))
1741
    self.extra_lv_nodes = list(extra_lv_nodes)
1742

    
1743
  def _VerifyNode(self, ninfo, nresult):
1744
    """Perform some basic validation on data returned from a node.
1745

1746
      - check the result data structure is well formed and has all the
1747
        mandatory fields
1748
      - check ganeti version
1749

1750
    @type ninfo: L{objects.Node}
1751
    @param ninfo: the node to check
1752
    @param nresult: the results from the node
1753
    @rtype: boolean
1754
    @return: whether overall this call was successful (and we can expect
1755
         reasonable values in the respose)
1756

1757
    """
1758
    node = ninfo.name
1759
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1760

    
1761
    # main result, nresult should be a non-empty dict
1762
    test = not nresult or not isinstance(nresult, dict)
1763
    _ErrorIf(test, self.ENODERPC, node,
1764
                  "unable to verify node: no data returned")
1765
    if test:
1766
      return False
1767

    
1768
    # compares ganeti version
1769
    local_version = constants.PROTOCOL_VERSION
1770
    remote_version = nresult.get("version", None)
1771
    test = not (remote_version and
1772
                isinstance(remote_version, (list, tuple)) and
1773
                len(remote_version) == 2)
1774
    _ErrorIf(test, self.ENODERPC, node,
1775
             "connection to node returned invalid data")
1776
    if test:
1777
      return False
1778

    
1779
    test = local_version != remote_version[0]
1780
    _ErrorIf(test, self.ENODEVERSION, node,
1781
             "incompatible protocol versions: master %s,"
1782
             " node %s", local_version, remote_version[0])
1783
    if test:
1784
      return False
1785

    
1786
    # node seems compatible, we can actually try to look into its results
1787

    
1788
    # full package version
1789
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1790
                  self.ENODEVERSION, node,
1791
                  "software version mismatch: master %s, node %s",
1792
                  constants.RELEASE_VERSION, remote_version[1],
1793
                  code=self.ETYPE_WARNING)
1794

    
1795
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1796
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1797
      for hv_name, hv_result in hyp_result.iteritems():
1798
        test = hv_result is not None
1799
        _ErrorIf(test, self.ENODEHV, node,
1800
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1801

    
1802
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1803
    if ninfo.vm_capable and isinstance(hvp_result, list):
1804
      for item, hv_name, hv_result in hvp_result:
1805
        _ErrorIf(True, self.ENODEHV, node,
1806
                 "hypervisor %s parameter verify failure (source %s): %s",
1807
                 hv_name, item, hv_result)
1808

    
1809
    test = nresult.get(constants.NV_NODESETUP,
1810
                       ["Missing NODESETUP results"])
1811
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1812
             "; ".join(test))
1813

    
1814
    return True
1815

    
1816
  def _VerifyNodeTime(self, ninfo, nresult,
1817
                      nvinfo_starttime, nvinfo_endtime):
1818
    """Check the node time.
1819

1820
    @type ninfo: L{objects.Node}
1821
    @param ninfo: the node to check
1822
    @param nresult: the remote results for the node
1823
    @param nvinfo_starttime: the start time of the RPC call
1824
    @param nvinfo_endtime: the end time of the RPC call
1825

1826
    """
1827
    node = ninfo.name
1828
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1829

    
1830
    ntime = nresult.get(constants.NV_TIME, None)
1831
    try:
1832
      ntime_merged = utils.MergeTime(ntime)
1833
    except (ValueError, TypeError):
1834
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1835
      return
1836

    
1837
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1838
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1839
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1840
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1841
    else:
1842
      ntime_diff = None
1843

    
1844
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1845
             "Node time diverges by at least %s from master node time",
1846
             ntime_diff)
1847

    
1848
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1849
    """Check the node LVM results.
1850

1851
    @type ninfo: L{objects.Node}
1852
    @param ninfo: the node to check
1853
    @param nresult: the remote results for the node
1854
    @param vg_name: the configured VG name
1855

1856
    """
1857
    if vg_name is None:
1858
      return
1859

    
1860
    node = ninfo.name
1861
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1862

    
1863
    # checks vg existence and size > 20G
1864
    vglist = nresult.get(constants.NV_VGLIST, None)
1865
    test = not vglist
1866
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1867
    if not test:
1868
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1869
                                            constants.MIN_VG_SIZE)
1870
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1871

    
1872
    # check pv names
1873
    pvlist = nresult.get(constants.NV_PVLIST, None)
1874
    test = pvlist is None
1875
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1876
    if not test:
1877
      # check that ':' is not present in PV names, since it's a
1878
      # special character for lvcreate (denotes the range of PEs to
1879
      # use on the PV)
1880
      for _, pvname, owner_vg in pvlist:
1881
        test = ":" in pvname
1882
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1883
                 " '%s' of VG '%s'", pvname, owner_vg)
1884

    
1885
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1886
    """Check the node bridges.
1887

1888
    @type ninfo: L{objects.Node}
1889
    @param ninfo: the node to check
1890
    @param nresult: the remote results for the node
1891
    @param bridges: the expected list of bridges
1892

1893
    """
1894
    if not bridges:
1895
      return
1896

    
1897
    node = ninfo.name
1898
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1899

    
1900
    missing = nresult.get(constants.NV_BRIDGES, None)
1901
    test = not isinstance(missing, list)
1902
    _ErrorIf(test, self.ENODENET, node,
1903
             "did not return valid bridge information")
1904
    if not test:
1905
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1906
               utils.CommaJoin(sorted(missing)))
1907

    
1908
  def _VerifyNodeNetwork(self, ninfo, nresult):
1909
    """Check the node network connectivity results.
1910

1911
    @type ninfo: L{objects.Node}
1912
    @param ninfo: the node to check
1913
    @param nresult: the remote results for the node
1914

1915
    """
1916
    node = ninfo.name
1917
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1918

    
1919
    test = constants.NV_NODELIST not in nresult
1920
    _ErrorIf(test, self.ENODESSH, node,
1921
             "node hasn't returned node ssh connectivity data")
1922
    if not test:
1923
      if nresult[constants.NV_NODELIST]:
1924
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1925
          _ErrorIf(True, self.ENODESSH, node,
1926
                   "ssh communication with node '%s': %s", a_node, a_msg)
1927

    
1928
    test = constants.NV_NODENETTEST not in nresult
1929
    _ErrorIf(test, self.ENODENET, node,
1930
             "node hasn't returned node tcp connectivity data")
1931
    if not test:
1932
      if nresult[constants.NV_NODENETTEST]:
1933
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1934
        for anode in nlist:
1935
          _ErrorIf(True, self.ENODENET, node,
1936
                   "tcp communication with node '%s': %s",
1937
                   anode, nresult[constants.NV_NODENETTEST][anode])
1938

    
1939
    test = constants.NV_MASTERIP not in nresult
1940
    _ErrorIf(test, self.ENODENET, node,
1941
             "node hasn't returned node master IP reachability data")
1942
    if not test:
1943
      if not nresult[constants.NV_MASTERIP]:
1944
        if node == self.master_node:
1945
          msg = "the master node cannot reach the master IP (not configured?)"
1946
        else:
1947
          msg = "cannot reach the master IP"
1948
        _ErrorIf(True, self.ENODENET, node, msg)
1949

    
1950
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1951
                      diskstatus):
1952
    """Verify an instance.
1953

1954
    This function checks to see if the required block devices are
1955
    available on the instance's node.
1956

1957
    """
1958
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1959
    node_current = instanceconfig.primary_node
1960

    
1961
    node_vol_should = {}
1962
    instanceconfig.MapLVsByNode(node_vol_should)
1963

    
1964
    for node in node_vol_should:
1965
      n_img = node_image[node]
1966
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1967
        # ignore missing volumes on offline or broken nodes
1968
        continue
1969
      for volume in node_vol_should[node]:
1970
        test = volume not in n_img.volumes
1971
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1972
                 "volume %s missing on node %s", volume, node)
1973

    
1974
    if instanceconfig.admin_up:
1975
      pri_img = node_image[node_current]
1976
      test = instance not in pri_img.instances and not pri_img.offline
1977
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1978
               "instance not running on its primary node %s",
1979
               node_current)
1980

    
1981
    diskdata = [(nname, success, status, idx)
1982
                for (nname, disks) in diskstatus.items()
1983
                for idx, (success, status) in enumerate(disks)]
1984

    
1985
    for nname, success, bdev_status, idx in diskdata:
1986
      # the 'ghost node' construction in Exec() ensures that we have a
1987
      # node here
1988
      snode = node_image[nname]
1989
      bad_snode = snode.ghost or snode.offline
1990
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1991
               self.EINSTANCEFAULTYDISK, instance,
1992
               "couldn't retrieve status for disk/%s on %s: %s",
1993
               idx, nname, bdev_status)
1994
      _ErrorIf((instanceconfig.admin_up and success and
1995
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1996
               self.EINSTANCEFAULTYDISK, instance,
1997
               "disk/%s on %s is faulty", idx, nname)
1998

    
1999
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2000
    """Verify if there are any unknown volumes in the cluster.
2001

2002
    The .os, .swap and backup volumes are ignored. All other volumes are
2003
    reported as unknown.
2004

2005
    @type reserved: L{ganeti.utils.FieldSet}
2006
    @param reserved: a FieldSet of reserved volume names
2007

2008
    """
2009
    for node, n_img in node_image.items():
2010
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2011
        # skip non-healthy nodes
2012
        continue
2013
      for volume in n_img.volumes:
2014
        test = ((node not in node_vol_should or
2015
                volume not in node_vol_should[node]) and
2016
                not reserved.Matches(volume))
2017
        self._ErrorIf(test, self.ENODEORPHANLV, node,
2018
                      "volume %s is unknown", volume)
2019

    
2020
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2021
    """Verify N+1 Memory Resilience.
2022

2023
    Check that if one single node dies we can still start all the
2024
    instances it was primary for.
2025

2026
    """
2027
    cluster_info = self.cfg.GetClusterInfo()
2028
    for node, n_img in node_image.items():
2029
      # This code checks that every node which is now listed as
2030
      # secondary has enough memory to host all instances it is
2031
      # supposed to should a single other node in the cluster fail.
2032
      # FIXME: not ready for failover to an arbitrary node
2033
      # FIXME: does not support file-backed instances
2034
      # WARNING: we currently take into account down instances as well
2035
      # as up ones, considering that even if they're down someone
2036
      # might want to start them even in the event of a node failure.
2037
      if n_img.offline:
2038
        # we're skipping offline nodes from the N+1 warning, since
2039
        # most likely we don't have good memory infromation from them;
2040
        # we already list instances living on such nodes, and that's
2041
        # enough warning
2042
        continue
2043
      for prinode, instances in n_img.sbp.items():
2044
        needed_mem = 0
2045
        for instance in instances:
2046
          bep = cluster_info.FillBE(instance_cfg[instance])
2047
          if bep[constants.BE_AUTO_BALANCE]:
2048
            needed_mem += bep[constants.BE_MEMORY]
2049
        test = n_img.mfree < needed_mem
2050
        self._ErrorIf(test, self.ENODEN1, node,
2051
                      "not enough memory to accomodate instance failovers"
2052
                      " should node %s fail (%dMiB needed, %dMiB available)",
2053
                      prinode, needed_mem, n_img.mfree)
2054

    
2055
  @classmethod
2056
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2057
                   (files_all, files_all_opt, files_mc, files_vm)):
2058
    """Verifies file checksums collected from all nodes.
2059

2060
    @param errorif: Callback for reporting errors
2061
    @param nodeinfo: List of L{objects.Node} objects
2062
    @param master_node: Name of master node
2063
    @param all_nvinfo: RPC results
2064

2065
    """
2066
    node_names = frozenset(node.name for node in nodeinfo if not node.offline)
2067

    
2068
    assert master_node in node_names
2069
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2070
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2071
           "Found file listed in more than one file list"
2072

    
2073
    # Define functions determining which nodes to consider for a file
2074
    file2nodefn = dict([(filename, fn)
2075
      for (files, fn) in [(files_all, None),
2076
                          (files_all_opt, None),
2077
                          (files_mc, lambda node: (node.master_candidate or
2078
                                                   node.name == master_node)),
2079
                          (files_vm, lambda node: node.vm_capable)]
2080
      for filename in files])
2081

    
2082
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
2083

    
2084
    for node in nodeinfo:
2085
      if node.offline:
2086
        continue
2087

    
2088
      nresult = all_nvinfo[node.name]
2089

    
2090
      if nresult.fail_msg or not nresult.payload:
2091
        node_files = None
2092
      else:
2093
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2094

    
2095
      test = not (node_files and isinstance(node_files, dict))
2096
      errorif(test, cls.ENODEFILECHECK, node.name,
2097
              "Node did not return file checksum data")
2098
      if test:
2099
        continue
2100

    
2101
      for (filename, checksum) in node_files.items():
2102
        # Check if the file should be considered for a node
2103
        fn = file2nodefn[filename]
2104
        if fn is None or fn(node):
2105
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
2106

    
2107
    for (filename, checksums) in fileinfo.items():
2108
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2109

    
2110
      # Nodes having the file
2111
      with_file = frozenset(node_name
2112
                            for nodes in fileinfo[filename].values()
2113
                            for node_name in nodes)
2114

    
2115
      # Nodes missing file
2116
      missing_file = node_names - with_file
2117

    
2118
      if filename in files_all_opt:
2119
        # All or no nodes
2120
        errorif(missing_file and missing_file != node_names,
2121
                cls.ECLUSTERFILECHECK, None,
2122
                "File %s is optional, but it must exist on all or no"
2123
                " nodes (not found on %s)",
2124
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2125
      else:
2126
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2127
                "File %s is missing from node(s) %s", filename,
2128
                utils.CommaJoin(utils.NiceSort(missing_file)))
2129

    
2130
      # See if there are multiple versions of the file
2131
      test = len(checksums) > 1
2132
      if test:
2133
        variants = ["variant %s on %s" %
2134
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2135
                    for (idx, (checksum, nodes)) in
2136
                      enumerate(sorted(checksums.items()))]
2137
      else:
2138
        variants = []
2139

    
2140
      errorif(test, cls.ECLUSTERFILECHECK, None,
2141
              "File %s found with %s different checksums (%s)",
2142
              filename, len(checksums), "; ".join(variants))
2143

    
2144
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2145
                      drbd_map):
2146
    """Verifies and the node DRBD status.
2147

2148
    @type ninfo: L{objects.Node}
2149
    @param ninfo: the node to check
2150
    @param nresult: the remote results for the node
2151
    @param instanceinfo: the dict of instances
2152
    @param drbd_helper: the configured DRBD usermode helper
2153
    @param drbd_map: the DRBD map as returned by
2154
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2155

2156
    """
2157
    node = ninfo.name
2158
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2159

    
2160
    if drbd_helper:
2161
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2162
      test = (helper_result == None)
2163
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2164
               "no drbd usermode helper returned")
2165
      if helper_result:
2166
        status, payload = helper_result
2167
        test = not status
2168
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2169
                 "drbd usermode helper check unsuccessful: %s", payload)
2170
        test = status and (payload != drbd_helper)
2171
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2172
                 "wrong drbd usermode helper: %s", payload)
2173

    
2174
    # compute the DRBD minors
2175
    node_drbd = {}
2176
    for minor, instance in drbd_map[node].items():
2177
      test = instance not in instanceinfo
2178
      _ErrorIf(test, self.ECLUSTERCFG, None,
2179
               "ghost instance '%s' in temporary DRBD map", instance)
2180
        # ghost instance should not be running, but otherwise we
2181
        # don't give double warnings (both ghost instance and
2182
        # unallocated minor in use)
2183
      if test:
2184
        node_drbd[minor] = (instance, False)
2185
      else:
2186
        instance = instanceinfo[instance]
2187
        node_drbd[minor] = (instance.name, instance.admin_up)
2188

    
2189
    # and now check them
2190
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2191
    test = not isinstance(used_minors, (tuple, list))
2192
    _ErrorIf(test, self.ENODEDRBD, node,
2193
             "cannot parse drbd status file: %s", str(used_minors))
2194
    if test:
2195
      # we cannot check drbd status
2196
      return
2197

    
2198
    for minor, (iname, must_exist) in node_drbd.items():
2199
      test = minor not in used_minors and must_exist
2200
      _ErrorIf(test, self.ENODEDRBD, node,
2201
               "drbd minor %d of instance %s is not active", minor, iname)
2202
    for minor in used_minors:
2203
      test = minor not in node_drbd
2204
      _ErrorIf(test, self.ENODEDRBD, node,
2205
               "unallocated drbd minor %d is in use", minor)
2206

    
2207
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2208
    """Builds the node OS structures.
2209

2210
    @type ninfo: L{objects.Node}
2211
    @param ninfo: the node to check
2212
    @param nresult: the remote results for the node
2213
    @param nimg: the node image object
2214

2215
    """
2216
    node = ninfo.name
2217
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2218

    
2219
    remote_os = nresult.get(constants.NV_OSLIST, None)
2220
    test = (not isinstance(remote_os, list) or
2221
            not compat.all(isinstance(v, list) and len(v) == 7
2222
                           for v in remote_os))
2223

    
2224
    _ErrorIf(test, self.ENODEOS, node,
2225
             "node hasn't returned valid OS data")
2226

    
2227
    nimg.os_fail = test
2228

    
2229
    if test:
2230
      return
2231

    
2232
    os_dict = {}
2233

    
2234
    for (name, os_path, status, diagnose,
2235
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2236

    
2237
      if name not in os_dict:
2238
        os_dict[name] = []
2239

    
2240
      # parameters is a list of lists instead of list of tuples due to
2241
      # JSON lacking a real tuple type, fix it:
2242
      parameters = [tuple(v) for v in parameters]
2243
      os_dict[name].append((os_path, status, diagnose,
2244
                            set(variants), set(parameters), set(api_ver)))
2245

    
2246
    nimg.oslist = os_dict
2247

    
2248
  def _VerifyNodeOS(self, ninfo, nimg, base):
2249
    """Verifies the node OS list.
2250

2251
    @type ninfo: L{objects.Node}
2252
    @param ninfo: the node to check
2253
    @param nimg: the node image object
2254
    @param base: the 'template' node we match against (e.g. from the master)
2255

2256
    """
2257
    node = ninfo.name
2258
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2259

    
2260
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2261

    
2262
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2263
    for os_name, os_data in nimg.oslist.items():
2264
      assert os_data, "Empty OS status for OS %s?!" % os_name
2265
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2266
      _ErrorIf(not f_status, self.ENODEOS, node,
2267
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2268
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2269
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2270
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2271
      # comparisons with the 'base' image
2272
      test = os_name not in base.oslist
2273
      _ErrorIf(test, self.ENODEOS, node,
2274
               "Extra OS %s not present on reference node (%s)",
2275
               os_name, base.name)
2276
      if test:
2277
        continue
2278
      assert base.oslist[os_name], "Base node has empty OS status?"
2279
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2280
      if not b_status:
2281
        # base OS is invalid, skipping
2282
        continue
2283
      for kind, a, b in [("API version", f_api, b_api),
2284
                         ("variants list", f_var, b_var),
2285
                         ("parameters", beautify_params(f_param),
2286
                          beautify_params(b_param))]:
2287
        _ErrorIf(a != b, self.ENODEOS, node,
2288
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2289
                 kind, os_name, base.name,
2290
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2291

    
2292
    # check any missing OSes
2293
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2294
    _ErrorIf(missing, self.ENODEOS, node,
2295
             "OSes present on reference node %s but missing on this node: %s",
2296
             base.name, utils.CommaJoin(missing))
2297

    
2298
  def _VerifyOob(self, ninfo, nresult):
2299
    """Verifies out of band functionality of a node.
2300

2301
    @type ninfo: L{objects.Node}
2302
    @param ninfo: the node to check
2303
    @param nresult: the remote results for the node
2304

2305
    """
2306
    node = ninfo.name
2307
    # We just have to verify the paths on master and/or master candidates
2308
    # as the oob helper is invoked on the master
2309
    if ((ninfo.master_candidate or ninfo.master_capable) and
2310
        constants.NV_OOB_PATHS in nresult):
2311
      for path_result in nresult[constants.NV_OOB_PATHS]:
2312
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2313

    
2314
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2315
    """Verifies and updates the node volume data.
2316

2317
    This function will update a L{NodeImage}'s internal structures
2318
    with data from the remote call.
2319

2320
    @type ninfo: L{objects.Node}
2321
    @param ninfo: the node to check
2322
    @param nresult: the remote results for the node
2323
    @param nimg: the node image object
2324
    @param vg_name: the configured VG name
2325

2326
    """
2327
    node = ninfo.name
2328
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2329

    
2330
    nimg.lvm_fail = True
2331
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2332
    if vg_name is None:
2333
      pass
2334
    elif isinstance(lvdata, basestring):
2335
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2336
               utils.SafeEncode(lvdata))
2337
    elif not isinstance(lvdata, dict):
2338
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2339
    else:
2340
      nimg.volumes = lvdata
2341
      nimg.lvm_fail = False
2342

    
2343
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2344
    """Verifies and updates the node instance list.
2345

2346
    If the listing was successful, then updates this node's instance
2347
    list. Otherwise, it marks the RPC call as failed for the instance
2348
    list key.
2349

2350
    @type ninfo: L{objects.Node}
2351
    @param ninfo: the node to check
2352
    @param nresult: the remote results for the node
2353
    @param nimg: the node image object
2354

2355
    """
2356
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2357
    test = not isinstance(idata, list)
2358
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2359
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2360
    if test:
2361
      nimg.hyp_fail = True
2362
    else:
2363
      nimg.instances = idata
2364

    
2365
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2366
    """Verifies and computes a node information map
2367

2368
    @type ninfo: L{objects.Node}
2369
    @param ninfo: the node to check
2370
    @param nresult: the remote results for the node
2371
    @param nimg: the node image object
2372
    @param vg_name: the configured VG name
2373

2374
    """
2375
    node = ninfo.name
2376
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2377

    
2378
    # try to read free memory (from the hypervisor)
2379
    hv_info = nresult.get(constants.NV_HVINFO, None)
2380
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2381
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2382
    if not test:
2383
      try:
2384
        nimg.mfree = int(hv_info["memory_free"])
2385
      except (ValueError, TypeError):
2386
        _ErrorIf(True, self.ENODERPC, node,
2387
                 "node returned invalid nodeinfo, check hypervisor")
2388

    
2389
    # FIXME: devise a free space model for file based instances as well
2390
    if vg_name is not None:
2391
      test = (constants.NV_VGLIST not in nresult or
2392
              vg_name not in nresult[constants.NV_VGLIST])
2393
      _ErrorIf(test, self.ENODELVM, node,
2394
               "node didn't return data for the volume group '%s'"
2395
               " - it is either missing or broken", vg_name)
2396
      if not test:
2397
        try:
2398
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2399
        except (ValueError, TypeError):
2400
          _ErrorIf(True, self.ENODERPC, node,
2401
                   "node returned invalid LVM info, check LVM status")
2402

    
2403
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2404
    """Gets per-disk status information for all instances.
2405

2406
    @type nodelist: list of strings
2407
    @param nodelist: Node names
2408
    @type node_image: dict of (name, L{objects.Node})
2409
    @param node_image: Node objects
2410
    @type instanceinfo: dict of (name, L{objects.Instance})
2411
    @param instanceinfo: Instance objects
2412
    @rtype: {instance: {node: [(succes, payload)]}}
2413
    @return: a dictionary of per-instance dictionaries with nodes as
2414
        keys and disk information as values; the disk information is a
2415
        list of tuples (success, payload)
2416

2417
    """
2418
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2419

    
2420
    node_disks = {}
2421
    node_disks_devonly = {}
2422
    diskless_instances = set()
2423
    diskless = constants.DT_DISKLESS
2424

    
2425
    for nname in nodelist:
2426
      node_instances = list(itertools.chain(node_image[nname].pinst,
2427
                                            node_image[nname].sinst))
2428
      diskless_instances.update(inst for inst in node_instances
2429
                                if instanceinfo[inst].disk_template == diskless)
2430
      disks = [(inst, disk)
2431
               for inst in node_instances
2432
               for disk in instanceinfo[inst].disks]
2433

    
2434
      if not disks:
2435
        # No need to collect data
2436
        continue
2437

    
2438
      node_disks[nname] = disks
2439

    
2440
      # Creating copies as SetDiskID below will modify the objects and that can
2441
      # lead to incorrect data returned from nodes
2442
      devonly = [dev.Copy() for (_, dev) in disks]
2443

    
2444
      for dev in devonly:
2445
        self.cfg.SetDiskID(dev, nname)
2446

    
2447
      node_disks_devonly[nname] = devonly
2448

    
2449
    assert len(node_disks) == len(node_disks_devonly)
2450

    
2451
    # Collect data from all nodes with disks
2452
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2453
                                                          node_disks_devonly)
2454

    
2455
    assert len(result) == len(node_disks)
2456

    
2457
    instdisk = {}
2458

    
2459
    for (nname, nres) in result.items():
2460
      disks = node_disks[nname]
2461

    
2462
      if nres.offline:
2463
        # No data from this node
2464
        data = len(disks) * [(False, "node offline")]
2465
      else:
2466
        msg = nres.fail_msg
2467
        _ErrorIf(msg, self.ENODERPC, nname,
2468
                 "while getting disk information: %s", msg)
2469
        if msg:
2470
          # No data from this node
2471
          data = len(disks) * [(False, msg)]
2472
        else:
2473
          data = []
2474
          for idx, i in enumerate(nres.payload):
2475
            if isinstance(i, (tuple, list)) and len(i) == 2:
2476
              data.append(i)
2477
            else:
2478
              logging.warning("Invalid result from node %s, entry %d: %s",
2479
                              nname, idx, i)
2480
              data.append((False, "Invalid result from the remote node"))
2481

    
2482
      for ((inst, _), status) in zip(disks, data):
2483
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2484

    
2485
    # Add empty entries for diskless instances.
2486
    for inst in diskless_instances:
2487
      assert inst not in instdisk
2488
      instdisk[inst] = {}
2489

    
2490
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2491
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2492
                      compat.all(isinstance(s, (tuple, list)) and
2493
                                 len(s) == 2 for s in statuses)
2494
                      for inst, nnames in instdisk.items()
2495
                      for nname, statuses in nnames.items())
2496
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2497

    
2498
    return instdisk
2499

    
2500
  def BuildHooksEnv(self):
2501
    """Build hooks env.
2502

2503
    Cluster-Verify hooks just ran in the post phase and their failure makes
2504
    the output be logged in the verify output and the verification to fail.
2505

2506
    """
2507
    env = {
2508
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2509
      }
2510

    
2511
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2512
               for node in self.my_node_info.values())
2513

    
2514
    return env
2515

    
2516
  def BuildHooksNodes(self):
2517
    """Build hooks nodes.
2518

2519
    """
2520
    return ([], self.my_node_names)
2521

    
2522
  def Exec(self, feedback_fn):
2523
    """Verify integrity of the node group, performing various test on nodes.
2524

2525
    """
2526
    # This method has too many local variables. pylint: disable-msg=R0914
2527

    
2528
    if not self.my_node_names:
2529
      # empty node group
2530
      feedback_fn("* Empty node group, skipping verification")
2531
      return True
2532

    
2533
    self.bad = False
2534
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2535
    verbose = self.op.verbose
2536
    self._feedback_fn = feedback_fn
2537

    
2538
    vg_name = self.cfg.GetVGName()
2539
    drbd_helper = self.cfg.GetDRBDHelper()
2540
    cluster = self.cfg.GetClusterInfo()
2541
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2542
    hypervisors = cluster.enabled_hypervisors
2543
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2544

    
2545
    i_non_redundant = [] # Non redundant instances
2546
    i_non_a_balanced = [] # Non auto-balanced instances
2547
    n_offline = 0 # Count of offline nodes
2548
    n_drained = 0 # Count of nodes being drained
2549
    node_vol_should = {}
2550

    
2551
    # FIXME: verify OS list
2552

    
2553
    # File verification
2554
    filemap = _ComputeAncillaryFiles(cluster, False)
2555

    
2556
    # do local checksums
2557
    master_node = self.master_node = self.cfg.GetMasterNode()
2558
    master_ip = self.cfg.GetMasterIP()
2559

    
2560
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2561

    
2562
    # We will make nodes contact all nodes in their group, and one node from
2563
    # every other group.
2564
    # TODO: should it be a *random* node, different every time?
2565
    online_nodes = [node.name for node in node_data_list if not node.offline]
2566
    other_group_nodes = {}
2567

    
2568
    for name in sorted(self.all_node_info):
2569
      node = self.all_node_info[name]
2570
      if (node.group not in other_group_nodes
2571
          and node.group != self.group_uuid
2572
          and not node.offline):
2573
        other_group_nodes[node.group] = node.name
2574

    
2575
    node_verify_param = {
2576
      constants.NV_FILELIST:
2577
        utils.UniqueSequence(filename
2578
                             for files in filemap
2579
                             for filename in files),
2580
      constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2581
      constants.NV_HYPERVISOR: hypervisors,
2582
      constants.NV_HVPARAMS:
2583
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2584
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2585
                                 for node in node_data_list
2586
                                 if not node.offline],
2587
      constants.NV_INSTANCELIST: hypervisors,
2588
      constants.NV_VERSION: None,
2589
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2590
      constants.NV_NODESETUP: None,
2591
      constants.NV_TIME: None,
2592
      constants.NV_MASTERIP: (master_node, master_ip),
2593
      constants.NV_OSLIST: None,
2594
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2595
      }
2596

    
2597
    if vg_name is not None:
2598
      node_verify_param[constants.NV_VGLIST] = None
2599
      node_verify_param[constants.NV_LVLIST] = vg_name
2600
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2601
      node_verify_param[constants.NV_DRBDLIST] = None
2602

    
2603
    if drbd_helper:
2604
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2605

    
2606
    # bridge checks
2607
    # FIXME: this needs to be changed per node-group, not cluster-wide
2608
    bridges = set()
2609
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2610
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2611
      bridges.add(default_nicpp[constants.NIC_LINK])
2612
    for instance in self.my_inst_info.values():
2613
      for nic in instance.nics:
2614
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2615
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2616
          bridges.add(full_nic[constants.NIC_LINK])
2617

    
2618
    if bridges:
2619
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2620

    
2621
    # Build our expected cluster state
2622
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2623
                                                 name=node.name,
2624
                                                 vm_capable=node.vm_capable))
2625
                      for node in node_data_list)
2626

    
2627
    # Gather OOB paths
2628
    oob_paths = []
2629
    for node in self.all_node_info.values():
2630
      path = _SupportsOob(self.cfg, node)
2631
      if path and path not in oob_paths:
2632
        oob_paths.append(path)
2633

    
2634
    if oob_paths:
2635
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2636

    
2637
    for instance in self.my_inst_names:
2638
      inst_config = self.my_inst_info[instance]
2639

    
2640
      for nname in inst_config.all_nodes:
2641
        if nname not in node_image:
2642
          gnode = self.NodeImage(name=nname)
2643
          gnode.ghost = (nname not in self.all_node_info)
2644
          node_image[nname] = gnode
2645

    
2646
      inst_config.MapLVsByNode(node_vol_should)
2647

    
2648
      pnode = inst_config.primary_node
2649
      node_image[pnode].pinst.append(instance)
2650

    
2651
      for snode in inst_config.secondary_nodes:
2652
        nimg = node_image[snode]
2653
        nimg.sinst.append(instance)
2654
        if pnode not in nimg.sbp:
2655
          nimg.sbp[pnode] = []
2656
        nimg.sbp[pnode].append(instance)
2657

    
2658
    # At this point, we have the in-memory data structures complete,
2659
    # except for the runtime information, which we'll gather next
2660

    
2661
    # Due to the way our RPC system works, exact response times cannot be
2662
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2663
    # time before and after executing the request, we can at least have a time
2664
    # window.
2665
    nvinfo_starttime = time.time()
2666
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2667
                                           node_verify_param,
2668
                                           self.cfg.GetClusterName())
2669
    nvinfo_endtime = time.time()
2670

    
2671
    if self.extra_lv_nodes and vg_name is not None:
2672
      extra_lv_nvinfo = \
2673
          self.rpc.call_node_verify(self.extra_lv_nodes,
2674
                                    {constants.NV_LVLIST: vg_name},
2675
                                    self.cfg.GetClusterName())
2676
    else:
2677
      extra_lv_nvinfo = {}
2678

    
2679
    all_drbd_map = self.cfg.ComputeDRBDMap()
2680

    
2681
    feedback_fn("* Gathering disk information (%s nodes)" %
2682
                len(self.my_node_names))
2683
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2684
                                     self.my_inst_info)
2685

    
2686
    feedback_fn("* Verifying configuration file consistency")
2687

    
2688
    # If not all nodes are being checked, we need to make sure the master node
2689
    # and a non-checked vm_capable node are in the list.
2690
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2691
    if absent_nodes:
2692
      vf_nvinfo = all_nvinfo.copy()
2693
      vf_node_info = list(self.my_node_info.values())
2694
      additional_nodes = []
2695
      if master_node not in self.my_node_info:
2696
        additional_nodes.append(master_node)
2697
        vf_node_info.append(self.all_node_info[master_node])
2698
      # Add the first vm_capable node we find which is not included
2699
      for node in absent_nodes:
2700
        nodeinfo = self.all_node_info[node]
2701
        if nodeinfo.vm_capable and not nodeinfo.offline:
2702
          additional_nodes.append(node)
2703
          vf_node_info.append(self.all_node_info[node])
2704
          break
2705
      key = constants.NV_FILELIST
2706
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2707
                                                 {key: node_verify_param[key]},
2708
                                                 self.cfg.GetClusterName()))
2709
    else:
2710
      vf_nvinfo = all_nvinfo
2711
      vf_node_info = self.my_node_info.values()
2712

    
2713
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2714

    
2715
    feedback_fn("* Verifying node status")
2716

    
2717
    refos_img = None
2718

    
2719
    for node_i in node_data_list:
2720
      node = node_i.name
2721
      nimg = node_image[node]
2722

    
2723
      if node_i.offline:
2724
        if verbose:
2725
          feedback_fn("* Skipping offline node %s" % (node,))
2726
        n_offline += 1
2727
        continue
2728

    
2729
      if node == master_node:
2730
        ntype = "master"
2731
      elif node_i.master_candidate:
2732
        ntype = "master candidate"
2733
      elif node_i.drained:
2734
        ntype = "drained"
2735
        n_drained += 1
2736
      else:
2737
        ntype = "regular"
2738
      if verbose:
2739
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2740

    
2741
      msg = all_nvinfo[node].fail_msg
2742
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2743
      if msg:
2744
        nimg.rpc_fail = True
2745
        continue
2746

    
2747
      nresult = all_nvinfo[node].payload
2748

    
2749
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2750
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2751
      self._VerifyNodeNetwork(node_i, nresult)
2752
      self._VerifyOob(node_i, nresult)
2753

    
2754
      if nimg.vm_capable:
2755
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2756
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2757
                             all_drbd_map)
2758

    
2759
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2760
        self._UpdateNodeInstances(node_i, nresult, nimg)
2761
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2762
        self._UpdateNodeOS(node_i, nresult, nimg)
2763

    
2764
        if not nimg.os_fail:
2765
          if refos_img is None:
2766
            refos_img = nimg
2767
          self._VerifyNodeOS(node_i, nimg, refos_img)
2768
        self._VerifyNodeBridges(node_i, nresult, bridges)
2769

    
2770
        # Check whether all running instancies are primary for the node. (This
2771
        # can no longer be done from _VerifyInstance below, since some of the
2772
        # wrong instances could be from other node groups.)
2773
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2774

    
2775
        for inst in non_primary_inst:
2776
          test = inst in self.all_inst_info
2777
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2778
                   "instance should not run on node %s", node_i.name)
2779
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2780
                   "node is running unknown instance %s", inst)
2781

    
2782
    for node, result in extra_lv_nvinfo.items():
2783
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2784
                              node_image[node], vg_name)
2785

    
2786
    feedback_fn("* Verifying instance status")
2787
    for instance in self.my_inst_names:
2788
      if verbose:
2789
        feedback_fn("* Verifying instance %s" % instance)
2790
      inst_config = self.my_inst_info[instance]
2791
      self._VerifyInstance(instance, inst_config, node_image,
2792
                           instdisk[instance])
2793
      inst_nodes_offline = []
2794

    
2795
      pnode = inst_config.primary_node
2796
      pnode_img = node_image[pnode]
2797
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2798
               self.ENODERPC, pnode, "instance %s, connection to"
2799
               " primary node failed", instance)
2800

    
2801
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2802
               self.EINSTANCEBADNODE, instance,
2803
               "instance is marked as running and lives on offline node %s",
2804
               inst_config.primary_node)
2805

    
2806
      # If the instance is non-redundant we cannot survive losing its primary
2807
      # node, so we are not N+1 compliant. On the other hand we have no disk
2808
      # templates with more than one secondary so that situation is not well
2809
      # supported either.
2810
      # FIXME: does not support file-backed instances
2811
      if not inst_config.secondary_nodes:
2812
        i_non_redundant.append(instance)
2813

    
2814
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2815
               instance, "instance has multiple secondary nodes: %s",
2816
               utils.CommaJoin(inst_config.secondary_nodes),
2817
               code=self.ETYPE_WARNING)
2818

    
2819
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2820
        pnode = inst_config.primary_node
2821
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2822
        instance_groups = {}
2823

    
2824
        for node in instance_nodes:
2825
          instance_groups.setdefault(self.all_node_info[node].group,
2826
                                     []).append(node)
2827

    
2828
        pretty_list = [
2829
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2830
          # Sort so that we always list the primary node first.
2831
          for group, nodes in sorted(instance_groups.items(),
2832
                                     key=lambda (_, nodes): pnode in nodes,
2833
                                     reverse=True)]
2834

    
2835
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2836
                      instance, "instance has primary and secondary nodes in"
2837
                      " different groups: %s", utils.CommaJoin(pretty_list),
2838
                      code=self.ETYPE_WARNING)
2839

    
2840
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2841
        i_non_a_balanced.append(instance)
2842

    
2843
      for snode in inst_config.secondary_nodes:
2844
        s_img = node_image[snode]
2845
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2846
                 "instance %s, connection to secondary node failed", instance)
2847

    
2848
        if s_img.offline:
2849
          inst_nodes_offline.append(snode)
2850

    
2851
      # warn that the instance lives on offline nodes
2852
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2853
               "instance has offline secondary node(s) %s",
2854
               utils.CommaJoin(inst_nodes_offline))
2855
      # ... or ghost/non-vm_capable nodes
2856
      for node in inst_config.all_nodes:
2857
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2858
                 "instance lives on ghost node %s", node)
2859
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2860
                 instance, "instance lives on non-vm_capable node %s", node)
2861

    
2862
    feedback_fn("* Verifying orphan volumes")
2863
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2864

    
2865
    # We will get spurious "unknown volume" warnings if any node of this group
2866
    # is secondary for an instance whose primary is in another group. To avoid
2867
    # them, we find these instances and add their volumes to node_vol_should.
2868
    for inst in self.all_inst_info.values():
2869
      for secondary in inst.secondary_nodes:
2870
        if (secondary in self.my_node_info
2871
            and inst.name not in self.my_inst_info):
2872
          inst.MapLVsByNode(node_vol_should)
2873
          break
2874

    
2875
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2876

    
2877
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2878
      feedback_fn("* Verifying N+1 Memory redundancy")
2879
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2880

    
2881
    feedback_fn("* Other Notes")
2882
    if i_non_redundant:
2883
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2884
                  % len(i_non_redundant))
2885

    
2886
    if i_non_a_balanced:
2887
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2888
                  % len(i_non_a_balanced))
2889

    
2890
    if n_offline:
2891
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2892

    
2893
    if n_drained:
2894
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2895

    
2896
    return not self.bad
2897

    
2898
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2899
    """Analyze the post-hooks' result
2900

2901
    This method analyses the hook result, handles it, and sends some
2902
    nicely-formatted feedback back to the user.
2903

2904
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2905
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2906
    @param hooks_results: the results of the multi-node hooks rpc call
2907
    @param feedback_fn: function used send feedback back to the caller
2908
    @param lu_result: previous Exec result
2909
    @return: the new Exec result, based on the previous result
2910
        and hook results
2911

2912
    """
2913
    # We only really run POST phase hooks, only for non-empty groups,
2914
    # and are only interested in their results
2915
    if not self.my_node_names:
2916
      # empty node group
2917
      pass
2918
    elif phase == constants.HOOKS_PHASE_POST:
2919
      # Used to change hooks' output to proper indentation
2920
      feedback_fn("* Hooks Results")
2921
      assert hooks_results, "invalid result from hooks"
2922

    
2923
      for node_name in hooks_results:
2924
        res = hooks_results[node_name]
2925
        msg = res.fail_msg
2926
        test = msg and not res.offline
2927
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2928
                      "Communication failure in hooks execution: %s", msg)
2929
        if res.offline or msg:
2930
          # No need to investigate payload if node is offline or gave an error.
2931
          # override manually lu_result here as _ErrorIf only
2932
          # overrides self.bad
2933
          lu_result = 1
2934
          continue
2935
        for script, hkr, output in res.payload:
2936
          test = hkr == constants.HKR_FAIL
2937
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2938
                        "Script %s failed, output:", script)
2939
          if test:
2940
            output = self._HOOKS_INDENT_RE.sub("      ", output)
2941
            feedback_fn("%s" % output)
2942
            lu_result = 0
2943

    
2944
    return lu_result
2945

    
2946

    
2947
class LUClusterVerifyDisks(NoHooksLU):
2948
  """Verifies the cluster disks status.
2949

2950
  """
2951
  REQ_BGL = False
2952

    
2953
  def ExpandNames(self):
2954
    self.share_locks = _ShareAll()
2955
    self.needed_locks = {
2956
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
2957
      }
2958

    
2959
  def Exec(self, feedback_fn):
2960
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
2961

    
2962
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
2963
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
2964
                           for group in group_names])
2965

    
2966

    
2967
class LUGroupVerifyDisks(NoHooksLU):
2968
  """Verifies the status of all disks in a node group.
2969

2970
  """
2971
  REQ_BGL = False
2972

    
2973
  def ExpandNames(self):
2974
    # Raises errors.OpPrereqError on its own if group can't be found
2975
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2976

    
2977
    self.share_locks = _ShareAll()
2978
    self.needed_locks = {
2979
      locking.LEVEL_INSTANCE: [],
2980
      locking.LEVEL_NODEGROUP: [],
2981
      locking.LEVEL_NODE: [],
2982
      }
2983

    
2984
  def DeclareLocks(self, level):
2985
    if level == locking.LEVEL_INSTANCE:
2986
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
2987

    
2988
      # Lock instances optimistically, needs verification once node and group
2989
      # locks have been acquired
2990
      self.needed_locks[locking.LEVEL_INSTANCE] = \
2991
        self.cfg.GetNodeGroupInstances(self.group_uuid)
2992

    
2993
    elif level == locking.LEVEL_NODEGROUP:
2994
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
2995

    
2996
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
2997
        set([self.group_uuid] +
2998
            # Lock all groups used by instances optimistically; this requires
2999
            # going via the node before it's locked, requiring verification
3000
            # later on
3001
            [group_uuid
3002
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3003
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3004

    
3005
    elif level == locking.LEVEL_NODE:
3006
      # This will only lock the nodes in the group to be verified which contain
3007
      # actual instances
3008
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3009
      self._LockInstancesNodes()
3010

    
3011
      # Lock all nodes in group to be verified
3012
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3013
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3014
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3015

    
3016
  def CheckPrereq(self):
3017
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3018
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3019
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3020

    
3021
    assert self.group_uuid in owned_groups
3022

    
3023
    # Check if locked instances are still correct
3024
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3025

    
3026
    # Get instance information
3027
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3028

    
3029
    # Check if node groups for locked instances are still correct
3030
    for (instance_name, inst) in self.instances.items():
3031
      assert owned_nodes.issuperset(inst.all_nodes), \
3032
        "Instance %s's nodes changed while we kept the lock" % instance_name
3033

    
3034
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3035
                                             owned_groups)
3036

    
3037
      assert self.group_uuid in inst_groups, \
3038
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3039

    
3040
  def Exec(self, feedback_fn):
3041
    """Verify integrity of cluster disks.
3042

3043
    @rtype: tuple of three items
3044
    @return: a tuple of (dict of node-to-node_error, list of instances
3045
        which need activate-disks, dict of instance: (node, volume) for
3046
        missing volumes
3047

3048
    """
3049
    res_nodes = {}
3050
    res_instances = set()
3051
    res_missing = {}
3052

    
3053
    nv_dict = _MapInstanceDisksToNodes([inst
3054
                                        for inst in self.instances.values()
3055
                                        if inst.admin_up])
3056

    
3057
    if nv_dict:
3058
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3059
                             set(self.cfg.GetVmCapableNodeList()))
3060

    
3061
      node_lvs = self.rpc.call_lv_list(nodes, [])
3062

    
3063
      for (node, node_res) in node_lvs.items():
3064
        if node_res.offline:
3065
          continue
3066

    
3067
        msg = node_res.fail_msg
3068
        if msg:
3069
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3070
          res_nodes[node] = msg
3071
          continue
3072

    
3073
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3074
          inst = nv_dict.pop((node, lv_name), None)
3075
          if not (lv_online or inst is None):
3076
            res_instances.add(inst)
3077

    
3078
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3079
      # better
3080
      for key, inst in nv_dict.iteritems():
3081
        res_missing.setdefault(inst, []).append(key)
3082

    
3083
    return (res_nodes, list(res_instances), res_missing)
3084

    
3085

    
3086
class LUClusterRepairDiskSizes(NoHooksLU):
3087
  """Verifies the cluster disks sizes.
3088

3089
  """
3090
  REQ_BGL = False
3091

    
3092
  def ExpandNames(self):
3093
    if self.op.instances:
3094
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3095
      self.needed_locks = {
3096
        locking.LEVEL_NODE: [],
3097
        locking.LEVEL_INSTANCE: self.wanted_names,
3098
        }
3099
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3100
    else:
3101
      self.wanted_names = None
3102
      self.needed_locks = {
3103
        locking.LEVEL_NODE: locking.ALL_SET,
3104
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3105
        }
3106
    self.share_locks = _ShareAll()
3107

    
3108
  def DeclareLocks(self, level):
3109
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3110
      self._LockInstancesNodes(primary_only=True)
3111

    
3112
  def CheckPrereq(self):
3113
    """Check prerequisites.
3114

3115
    This only checks the optional instance list against the existing names.
3116

3117
    """
3118
    if self.wanted_names is None:
3119
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3120

    
3121
    self.wanted_instances = \
3122
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3123

    
3124
  def _EnsureChildSizes(self, disk):
3125
    """Ensure children of the disk have the needed disk size.
3126

3127
    This is valid mainly for DRBD8 and fixes an issue where the
3128
    children have smaller disk size.
3129

3130
    @param disk: an L{ganeti.objects.Disk} object
3131

3132
    """
3133
    if disk.dev_type == constants.LD_DRBD8:
3134
      assert disk.children, "Empty children for DRBD8?"
3135
      fchild = disk.children[0]
3136
      mismatch = fchild.size < disk.size
3137
      if mismatch:
3138
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3139
                     fchild.size, disk.size)
3140
        fchild.size = disk.size
3141

    
3142
      # and we recurse on this child only, not on the metadev
3143
      return self._EnsureChildSizes(fchild) or mismatch
3144
    else:
3145
      return False
3146

    
3147
  def Exec(self, feedback_fn):
3148
    """Verify the size of cluster disks.
3149

3150
    """
3151
    # TODO: check child disks too
3152
    # TODO: check differences in size between primary/secondary nodes
3153
    per_node_disks = {}
3154
    for instance in self.wanted_instances:
3155
      pnode = instance.primary_node
3156
      if pnode not in per_node_disks:
3157
        per_node_disks[pnode] = []
3158
      for idx, disk in enumerate(instance.disks):
3159
        per_node_disks[pnode].append((instance, idx, disk))
3160

    
3161
    changed = []
3162
    for node, dskl in per_node_disks.items():
3163
      newl = [v[2].Copy() for v in dskl]
3164
      for dsk in newl:
3165
        self.cfg.SetDiskID(dsk, node)
3166
      result = self.rpc.call_blockdev_getsize(node, newl)
3167
      if result.fail_msg:
3168
        self.LogWarning("Failure in blockdev_getsize call to node"
3169
                        " %s, ignoring", node)
3170
        continue
3171
      if len(result.payload) != len(dskl):
3172
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3173
                        " result.payload=%s", node, len(dskl), result.payload)
3174
        self.LogWarning("Invalid result from node %s, ignoring node results",
3175
                        node)
3176
        continue
3177
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3178
        if size is None:
3179
          self.LogWarning("Disk %d of instance %s did not return size"
3180
                          " information, ignoring", idx, instance.name)
3181
          continue
3182
        if not isinstance(size, (int, long)):
3183
          self.LogWarning("Disk %d of instance %s did not return valid"
3184
                          " size information, ignoring", idx, instance.name)
3185
          continue
3186
        size = size >> 20
3187
        if size != disk.size:
3188
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3189
                       " correcting: recorded %d, actual %d", idx,
3190
                       instance.name, disk.size, size)
3191
          disk.size = size
3192
          self.cfg.Update(instance, feedback_fn)
3193
          changed.append((instance.name, idx, size))
3194
        if self._EnsureChildSizes(disk):
3195
          self.cfg.Update(instance, feedback_fn)
3196
          changed.append((instance.name, idx, disk.size))
3197
    return changed
3198

    
3199

    
3200
class LUClusterRename(LogicalUnit):
3201
  """Rename the cluster.
3202

3203
  """
3204
  HPATH = "cluster-rename"
3205
  HTYPE = constants.HTYPE_CLUSTER
3206

    
3207
  def BuildHooksEnv(self):
3208
    """Build hooks env.
3209

3210
    """
3211
    return {
3212
      "OP_TARGET": self.cfg.GetClusterName(),
3213
      "NEW_NAME": self.op.name,
3214
      }
3215

    
3216
  def BuildHooksNodes(self):
3217
    """Build hooks nodes.
3218

3219
    """
3220
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3221

    
3222
  def CheckPrereq(self):
3223
    """Verify that the passed name is a valid one.
3224

3225
    """
3226
    hostname = netutils.GetHostname(name=self.op.name,
3227
                                    family=self.cfg.GetPrimaryIPFamily())
3228

    
3229
    new_name = hostname.name
3230
    self.ip = new_ip = hostname.ip
3231
    old_name = self.cfg.GetClusterName()
3232
    old_ip = self.cfg.GetMasterIP()
3233
    if new_name == old_name and new_ip == old_ip:
3234
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3235
                                 " cluster has changed",
3236
                                 errors.ECODE_INVAL)
3237
    if new_ip != old_ip:
3238
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3239
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3240
                                   " reachable on the network" %
3241
                                   new_ip, errors.ECODE_NOTUNIQUE)
3242

    
3243
    self.op.name = new_name
3244

    
3245
  def Exec(self, feedback_fn):
3246
    """Rename the cluster.
3247

3248
    """
3249
    clustername = self.op.name
3250
    ip = self.ip
3251

    
3252
    # shutdown the master IP
3253
    master = self.cfg.GetMasterNode()
3254
    result = self.rpc.call_node_stop_master(master, False)
3255
    result.Raise("Could not disable the master role")
3256

    
3257
    try:
3258
      cluster = self.cfg.GetClusterInfo()
3259
      cluster.cluster_name = clustername
3260
      cluster.master_ip = ip
3261
      self.cfg.Update(cluster, feedback_fn)
3262

    
3263
      # update the known hosts file
3264
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3265
      node_list = self.cfg.GetOnlineNodeList()
3266
      try:
3267
        node_list.remove(master)
3268
      except ValueError:
3269
        pass
3270
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3271
    finally:
3272
      result = self.rpc.call_node_start_master(master, False, False)
3273
      msg = result.fail_msg
3274
      if msg:
3275
        self.LogWarning("Could not re-enable the master role on"
3276
                        " the master, please restart manually: %s", msg)
3277

    
3278
    return clustername
3279

    
3280

    
3281
class LUClusterSetParams(LogicalUnit):
3282
  """Change the parameters of the cluster.
3283

3284
  """
3285
  HPATH = "cluster-modify"
3286
  HTYPE = constants.HTYPE_CLUSTER
3287
  REQ_BGL = False
3288

    
3289
  def CheckArguments(self):
3290
    """Check parameters
3291

3292
    """
3293
    if self.op.uid_pool:
3294
      uidpool.CheckUidPool(self.op.uid_pool)
3295

    
3296
    if self.op.add_uids:
3297
      uidpool.CheckUidPool(self.op.add_uids)
3298

    
3299
    if self.op.remove_uids:
3300
      uidpool.CheckUidPool(self.op.remove_uids)
3301

    
3302
  def ExpandNames(self):
3303
    # FIXME: in the future maybe other cluster params won't require checking on
3304
    # all nodes to be modified.
3305
    self.needed_locks = {
3306
      locking.LEVEL_NODE: locking.ALL_SET,
3307
    }
3308
    self.share_locks[locking.LEVEL_NODE] = 1
3309

    
3310
  def BuildHooksEnv(self):
3311
    """Build hooks env.
3312

3313
    """
3314
    return {
3315
      "OP_TARGET": self.cfg.GetClusterName(),
3316
      "NEW_VG_NAME": self.op.vg_name,
3317
      }
3318

    
3319
  def BuildHooksNodes(self):
3320
    """Build hooks nodes.
3321

3322
    """
3323
    mn = self.cfg.GetMasterNode()
3324
    return ([mn], [mn])
3325

    
3326
  def CheckPrereq(self):
3327
    """Check prerequisites.
3328

3329
    This checks whether the given params don't conflict and
3330
    if the given volume group is valid.
3331

3332
    """
3333
    if self.op.vg_name is not None and not self.op.vg_name:
3334
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3335
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3336
                                   " instances exist", errors.ECODE_INVAL)
3337

    
3338
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3339
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3340
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3341
                                   " drbd-based instances exist",
3342
                                   errors.ECODE_INVAL)
3343

    
3344
    node_list = self.owned_locks(locking.LEVEL_NODE)
3345

    
3346
    # if vg_name not None, checks given volume group on all nodes
3347
    if self.op.vg_name:
3348
      vglist = self.rpc.call_vg_list(node_list)
3349
      for node in node_list:
3350
        msg = vglist[node].fail_msg
3351
        if msg:
3352
          # ignoring down node
3353
          self.LogWarning("Error while gathering data on node %s"
3354
                          " (ignoring node): %s", node, msg)
3355
          continue
3356
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3357
                                              self.op.vg_name,
3358
                                              constants.MIN_VG_SIZE)
3359
        if vgstatus:
3360
          raise errors.OpPrereqError("Error on node '%s': %s" %
3361
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3362

    
3363
    if self.op.drbd_helper:
3364
      # checks given drbd helper on all nodes
3365
      helpers = self.rpc.call_drbd_helper(node_list)
3366
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3367
        if ninfo.offline:
3368
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3369
          continue
3370
        msg = helpers[node].fail_msg
3371
        if msg:
3372
          raise errors.OpPrereqError("Error checking drbd helper on node"
3373
                                     " '%s': %s" % (node, msg),
3374
                                     errors.ECODE_ENVIRON)
3375
        node_helper = helpers[node].payload
3376
        if node_helper != self.op.drbd_helper:
3377
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3378
                                     (node, node_helper), errors.ECODE_ENVIRON)
3379

    
3380
    self.cluster = cluster = self.cfg.GetClusterInfo()
3381
    # validate params changes
3382
    if self.op.beparams:
3383
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3384
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3385

    
3386
    if self.op.ndparams:
3387
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3388
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3389

    
3390
      # TODO: we need a more general way to handle resetting
3391
      # cluster-level parameters to default values
3392
      if self.new_ndparams["oob_program"] == "":
3393
        self.new_ndparams["oob_program"] = \
3394
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3395

    
3396
    if self.op.nicparams:
3397
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3398
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3399
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3400
      nic_errors = []
3401

    
3402
      # check all instances for consistency
3403
      for instance in self.cfg.GetAllInstancesInfo().values():
3404
        for nic_idx, nic in enumerate(instance.nics):
3405
          params_copy = copy.deepcopy(nic.nicparams)
3406
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3407

    
3408
          # check parameter syntax
3409
          try:
3410
            objects.NIC.CheckParameterSyntax(params_filled)
3411
          except errors.ConfigurationError, err:
3412
            nic_errors.append("Instance %s, nic/%d: %s" %
3413
                              (instance.name, nic_idx, err))
3414

    
3415
          # if we're moving instances to routed, check that they have an ip
3416
          target_mode = params_filled[constants.NIC_MODE]
3417
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3418
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3419
                              " address" % (instance.name, nic_idx))
3420
      if nic_errors:
3421
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3422
                                   "\n".join(nic_errors))
3423

    
3424
    # hypervisor list/parameters
3425
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3426
    if self.op.hvparams:
3427
      for hv_name, hv_dict in self.op.hvparams.items():
3428
        if hv_name not in self.new_hvparams:
3429
          self.new_hvparams[hv_name] = hv_dict
3430
        else:
3431
          self.new_hvparams[hv_name].update(hv_dict)
3432

    
3433
    # os hypervisor parameters
3434
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3435
    if self.op.os_hvp:
3436
      for os_name, hvs in self.op.os_hvp.items():
3437
        if os_name not in self.new_os_hvp:
3438
          self.new_os_hvp[os_name] = hvs
3439
        else:
3440
          for hv_name, hv_dict in hvs.items():
3441
            if hv_name not in self.new_os_hvp[os_name]:
3442
              self.new_os_hvp[os_name][hv_name] = hv_dict
3443
            else:
3444
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3445

    
3446
    # os parameters
3447
    self.new_osp = objects.FillDict(cluster.osparams, {})
3448
    if self.op.osparams:
3449
      for os_name, osp in self.op.osparams.items():
3450
        if os_name not in self.new_osp:
3451
          self.new_osp[os_name] = {}
3452

    
3453
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3454
                                                  use_none=True)
3455

    
3456
        if not self.new_osp[os_name]:
3457
          # we removed all parameters
3458
          del self.new_osp[os_name]
3459
        else:
3460
          # check the parameter validity (remote check)
3461
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3462
                         os_name, self.new_osp[os_name])
3463

    
3464
    # changes to the hypervisor list
3465
    if self.op.enabled_hypervisors is not None:
3466
      self.hv_list = self.op.enabled_hypervisors
3467
      for hv in self.hv_list:
3468
        # if the hypervisor doesn't already exist in the cluster
3469
        # hvparams, we initialize it to empty, and then (in both
3470
        # cases) we make sure to fill the defaults, as we might not
3471
        # have a complete defaults list if the hypervisor wasn't
3472
        # enabled before
3473
        if hv not in new_hvp:
3474
          new_hvp[hv] = {}
3475
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3476
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3477
    else:
3478
      self.hv_list = cluster.enabled_hypervisors
3479

    
3480
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3481
      # either the enabled list has changed, or the parameters have, validate
3482
      for hv_name, hv_params in self.new_hvparams.items():
3483
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3484
            (self.op.enabled_hypervisors and
3485
             hv_name in self.op.enabled_hypervisors)):
3486
          # either this is a new hypervisor, or its parameters have changed
3487
          hv_class = hypervisor.GetHypervisor(hv_name)
3488
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3489
          hv_class.CheckParameterSyntax(hv_params)
3490
          _CheckHVParams(self, node_list, hv_name, hv_params)
3491

    
3492
    if self.op.os_hvp:
3493
      # no need to check any newly-enabled hypervisors, since the
3494
      # defaults have already been checked in the above code-block
3495
      for os_name, os_hvp in self.new_os_hvp.items():
3496
        for hv_name, hv_params in os_hvp.items():
3497
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3498
          # we need to fill in the new os_hvp on top of the actual hv_p
3499
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3500
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3501
          hv_class = hypervisor.GetHypervisor(hv_name)
3502
          hv_class.CheckParameterSyntax(new_osp)
3503
          _CheckHVParams(self, node_list, hv_name, new_osp)
3504

    
3505
    if self.op.default_iallocator:
3506
      alloc_script = utils.FindFile(self.op.default_iallocator,
3507
                                    constants.IALLOCATOR_SEARCH_PATH,
3508
                                    os.path.isfile)
3509
      if alloc_script is None:
3510
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3511
                                   " specified" % self.op.default_iallocator,
3512
                                   errors.ECODE_INVAL)
3513

    
3514
  def Exec(self, feedback_fn):
3515
    """Change the parameters of the cluster.
3516

3517
    """
3518
    if self.op.vg_name is not None:
3519
      new_volume = self.op.vg_name
3520
      if not new_volume:
3521
        new_volume = None
3522
      if new_volume != self.cfg.GetVGName():
3523
        self.cfg.SetVGName(new_volume)
3524
      else:
3525
        feedback_fn("Cluster LVM configuration already in desired"
3526
                    " state, not changing")
3527
    if self.op.drbd_helper is not None:
3528
      new_helper = self.op.drbd_helper
3529
      if not new_helper:
3530
        new_helper = None
3531
      if new_helper != self.cfg.GetDRBDHelper():
3532
        self.cfg.SetDRBDHelper(new_helper)
3533
      else:
3534
        feedback_fn("Cluster DRBD helper already in desired state,"
3535
                    " not changing")
3536
    if self.op.hvparams:
3537
      self.cluster.hvparams = self.new_hvparams
3538
    if self.op.os_hvp:
3539
      self.cluster.os_hvp = self.new_os_hvp
3540
    if self.op.enabled_hypervisors is not None:
3541
      self.cluster.hvparams = self.new_hvparams
3542
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3543
    if self.op.beparams:
3544
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3545
    if self.op.nicparams:
3546
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3547
    if self.op.osparams:
3548
      self.cluster.osparams = self.new_osp
3549
    if self.op.ndparams:
3550
      self.cluster.ndparams = self.new_ndparams
3551

    
3552
    if self.op.candidate_pool_size is not None:
3553
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3554
      # we need to update the pool size here, otherwise the save will fail
3555
      _AdjustCandidatePool(self, [])
3556

    
3557
    if self.op.maintain_node_health is not None:
3558
      self.cluster.maintain_node_health = self.op.maintain_node_health
3559

    
3560
    if self.op.prealloc_wipe_disks is not None:
3561
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3562

    
3563
    if self.op.add_uids is not None:
3564
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3565

    
3566
    if self.op.remove_uids is not None:
3567
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3568

    
3569
    if self.op.uid_pool is not None:
3570
      self.cluster.uid_pool = self.op.uid_pool
3571

    
3572
    if self.op.default_iallocator is not None:
3573
      self.cluster.default_iallocator = self.op.default_iallocator
3574

    
3575
    if self.op.reserved_lvs is not None:
3576
      self.cluster.reserved_lvs = self.op.reserved_lvs
3577

    
3578
    def helper_os(aname, mods, desc):
3579
      desc += " OS list"
3580
      lst = getattr(self.cluster, aname)
3581
      for key, val in mods:
3582
        if key == constants.DDM_ADD:
3583
          if val in lst:
3584
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3585
          else:
3586
            lst.append(val)
3587
        elif key == constants.DDM_REMOVE:
3588
          if val in lst:
3589
            lst.remove(val)
3590
          else:
3591
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3592
        else:
3593
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3594

    
3595
    if self.op.hidden_os:
3596
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3597

    
3598
    if self.op.blacklisted_os:
3599
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3600

    
3601
    if self.op.master_netdev:
3602
      master = self.cfg.GetMasterNode()
3603
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3604
                  self.cluster.master_netdev)
3605
      result = self.rpc.call_node_stop_master(master, False)
3606
      result.Raise("Could not disable the master ip")
3607
      feedback_fn("Changing master_netdev from %s to %s" %
3608
                  (self.cluster.master_netdev, self.op.master_netdev))
3609
      self.cluster.master_netdev = self.op.master_netdev
3610

    
3611
    self.cfg.Update(self.cluster, feedback_fn)
3612

    
3613
    if self.op.master_netdev:
3614
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3615
                  self.op.master_netdev)
3616
      result = self.rpc.call_node_start_master(master, False, False)
3617
      if result.fail_msg:
3618
        self.LogWarning("Could not re-enable the master ip on"
3619
                        " the master, please restart manually: %s",
3620
                        result.fail_msg)
3621

    
3622

    
3623
def _UploadHelper(lu, nodes, fname):
3624
  """Helper for uploading a file and showing warnings.
3625

3626
  """
3627
  if os.path.exists(fname):
3628
    result = lu.rpc.call_upload_file(nodes, fname)
3629
    for to_node, to_result in result.items():
3630
      msg = to_result.fail_msg
3631
      if msg:
3632
        msg = ("Copy of file %s to node %s failed: %s" %
3633
               (fname, to_node, msg))
3634
        lu.proc.LogWarning(msg)
3635

    
3636

    
3637
def _ComputeAncillaryFiles(cluster, redist):
3638
  """Compute files external to Ganeti which need to be consistent.
3639

3640
  @type redist: boolean
3641
  @param redist: Whether to include files which need to be redistributed
3642

3643
  """
3644
  # Compute files for all nodes
3645
  files_all = set([
3646
    constants.SSH_KNOWN_HOSTS_FILE,
3647
    constants.CONFD_HMAC_KEY,
3648
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3649
    ])
3650

    
3651
  if not redist:
3652
    files_all.update(constants.ALL_CERT_FILES)
3653
    files_all.update(ssconf.SimpleStore().GetFileList())
3654

    
3655
  if cluster.modify_etc_hosts:
3656
    files_all.add(constants.ETC_HOSTS)
3657

    
3658
  # Files which must either exist on all nodes or on none
3659
  files_all_opt = set([
3660
    constants.RAPI_USERS_FILE,
3661
    ])
3662

    
3663
  # Files which should only be on master candidates
3664
  files_mc = set()
3665
  if not redist:
3666
    files_mc.add(constants.CLUSTER_CONF_FILE)
3667

    
3668
  # Files which should only be on VM-capable nodes
3669
  files_vm = set(filename
3670
    for hv_name in cluster.enabled_hypervisors
3671
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3672

    
3673
  # Filenames must be unique
3674
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3675
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3676
         "Found file listed in more than one file list"
3677

    
3678
  return (files_all, files_all_opt, files_mc, files_vm)
3679

    
3680

    
3681
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3682
  """Distribute additional files which are part of the cluster configuration.
3683

3684
  ConfigWriter takes care of distributing the config and ssconf files, but
3685
  there are more files which should be distributed to all nodes. This function
3686
  makes sure those are copied.
3687

3688
  @param lu: calling logical unit
3689
  @param additional_nodes: list of nodes not in the config to distribute to
3690
  @type additional_vm: boolean
3691
  @param additional_vm: whether the additional nodes are vm-capable or not
3692

3693
  """
3694
  # Gather target nodes
3695
  cluster = lu.cfg.GetClusterInfo()
3696
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3697

    
3698
  online_nodes = lu.cfg.GetOnlineNodeList()
3699
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3700

    
3701
  if additional_nodes is not None:
3702
    online_nodes.extend(additional_nodes)
3703
    if additional_vm:
3704
      vm_nodes.extend(additional_nodes)
3705

    
3706
  # Never distribute to master node
3707
  for nodelist in [online_nodes, vm_nodes]:
3708
    if master_info.name in nodelist:
3709
      nodelist.remove(master_info.name)
3710

    
3711
  # Gather file lists
3712
  (files_all, files_all_opt, files_mc, files_vm) = \
3713
    _ComputeAncillaryFiles(cluster, True)
3714

    
3715
  # Never re-distribute configuration file from here
3716
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3717
              constants.CLUSTER_CONF_FILE in files_vm)
3718
  assert not files_mc, "Master candidates not handled in this function"
3719

    
3720
  filemap = [
3721
    (online_nodes, files_all),
3722
    (online_nodes, files_all_opt),
3723
    (vm_nodes, files_vm),
3724
    ]
3725

    
3726
  # Upload the files
3727
  for (node_list, files) in filemap:
3728
    for fname in files:
3729
      _UploadHelper(lu, node_list, fname)
3730

    
3731

    
3732
class LUClusterRedistConf(NoHooksLU):
3733
  """Force the redistribution of cluster configuration.
3734

3735
  This is a very simple LU.
3736

3737
  """
3738
  REQ_BGL = False
3739

    
3740
  def ExpandNames(self):
3741
    self.needed_locks = {
3742
      locking.LEVEL_NODE: locking.ALL_SET,
3743
    }
3744
    self.share_locks[locking.LEVEL_NODE] = 1
3745

    
3746
  def Exec(self, feedback_fn):
3747
    """Redistribute the configuration.
3748

3749
    """
3750
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3751
    _RedistributeAncillaryFiles(self)
3752

    
3753

    
3754
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3755
  """Sleep and poll for an instance's disk to sync.
3756

3757
  """
3758
  if not instance.disks or disks is not None and not disks:
3759
    return True
3760

    
3761
  disks = _ExpandCheckDisks(instance, disks)
3762

    
3763
  if not oneshot:
3764
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3765

    
3766
  node = instance.primary_node
3767

    
3768
  for dev in disks:
3769
    lu.cfg.SetDiskID(dev, node)
3770

    
3771
  # TODO: Convert to utils.Retry
3772

    
3773
  retries = 0
3774
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3775
  while True:
3776
    max_time = 0
3777
    done = True
3778
    cumul_degraded = False
3779
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3780
    msg = rstats.fail_msg
3781
    if msg:
3782
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3783
      retries += 1
3784
      if retries >= 10:
3785
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3786
                                 " aborting." % node)
3787
      time.sleep(6)
3788
      continue
3789
    rstats = rstats.payload
3790
    retries = 0
3791
    for i, mstat in enumerate(rstats):
3792
      if mstat is None:
3793
        lu.LogWarning("Can't compute data for node %s/%s",
3794
                           node, disks[i].iv_name)
3795
        continue
3796

    
3797
      cumul_degraded = (cumul_degraded or
3798
                        (mstat.is_degraded and mstat.sync_percent is None))
3799
      if mstat.sync_percent is not None:
3800
        done = False
3801
        if mstat.estimated_time is not None:
3802
          rem_time = ("%s remaining (estimated)" %
3803
                      utils.FormatSeconds(mstat.estimated_time))
3804
          max_time = mstat.estimated_time
3805
        else:
3806
          rem_time = "no time estimate"
3807
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3808
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3809

    
3810
    # if we're done but degraded, let's do a few small retries, to
3811
    # make sure we see a stable and not transient situation; therefore
3812
    # we force restart of the loop
3813
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3814
      logging.info("Degraded disks found, %d retries left", degr_retries)
3815
      degr_retries -= 1
3816
      time.sleep(1)
3817
      continue
3818

    
3819
    if done or oneshot:
3820
      break
3821

    
3822
    time.sleep(min(60, max_time))
3823

    
3824
  if done:
3825
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3826
  return not cumul_degraded
3827

    
3828

    
3829
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3830
  """Check that mirrors are not degraded.
3831

3832
  The ldisk parameter, if True, will change the test from the
3833
  is_degraded attribute (which represents overall non-ok status for
3834
  the device(s)) to the ldisk (representing the local storage status).
3835

3836
  """
3837
  lu.cfg.SetDiskID(dev, node)
3838

    
3839
  result = True
3840

    
3841
  if on_primary or dev.AssembleOnSecondary():
3842
    rstats = lu.rpc.call_blockdev_find(node, dev)
3843
    msg = rstats.fail_msg
3844
    if msg:
3845
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3846
      result = False
3847
    elif not rstats.payload:
3848
      lu.LogWarning("Can't find disk on node %s", node)
3849
      result = False
3850
    else:
3851
      if ldisk:
3852
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3853
      else:
3854
        result = result and not rstats.payload.is_degraded
3855

    
3856
  if dev.children:
3857
    for child in dev.children:
3858
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3859

    
3860
  return result
3861

    
3862

    
3863
class LUOobCommand(NoHooksLU):
3864
  """Logical unit for OOB handling.
3865

3866
  """
3867
  REG_BGL = False
3868
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3869

    
3870
  def ExpandNames(self):
3871
    """Gather locks we need.
3872

3873
    """
3874
    if self.op.node_names:
3875
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3876
      lock_names = self.op.node_names
3877
    else:
3878
      lock_names = locking.ALL_SET
3879

    
3880
    self.needed_locks = {
3881
      locking.LEVEL_NODE: lock_names,
3882
      }
3883

    
3884
  def CheckPrereq(self):
3885
    """Check prerequisites.
3886

3887
    This checks:
3888
     - the node exists in the configuration
3889
     - OOB is supported
3890

3891
    Any errors are signaled by raising errors.OpPrereqError.
3892

3893
    """
3894
    self.nodes = []
3895
    self.master_node = self.cfg.GetMasterNode()
3896

    
3897
    assert self.op.power_delay >= 0.0
3898

    
3899
    if self.op.node_names:
3900
      if (self.op.command in self._SKIP_MASTER and
3901
          self.master_node in self.op.node_names):
3902
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3903
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3904

    
3905
        if master_oob_handler:
3906
          additional_text = ("run '%s %s %s' if you want to operate on the"
3907
                             " master regardless") % (master_oob_handler,
3908
                                                      self.op.command,
3909
                                                      self.master_node)
3910
        else:
3911
          additional_text = "it does not support out-of-band operations"
3912

    
3913
        raise errors.OpPrereqError(("Operating on the master node %s is not"
3914
                                    " allowed for %s; %s") %
3915
                                   (self.master_node, self.op.command,
3916
                                    additional_text), errors.ECODE_INVAL)
3917
    else:
3918
      self.op.node_names = self.cfg.GetNodeList()
3919
      if self.op.command in self._SKIP_MASTER:
3920
        self.op.node_names.remove(self.master_node)
3921

    
3922
    if self.op.command in self._SKIP_MASTER:
3923
      assert self.master_node not in self.op.node_names
3924

    
3925
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
3926
      if node is None:
3927
        raise errors.OpPrereqError("Node %s not found" % node_name,
3928
                                   errors.ECODE_NOENT)
3929
      else:
3930
        self.nodes.append(node)
3931

    
3932
      if (not self.op.ignore_status and
3933
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3934
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3935
                                    " not marked offline") % node_name,
3936
                                   errors.ECODE_STATE)
3937

    
3938
  def Exec(self, feedback_fn):
3939
    """Execute OOB and return result if we expect any.
3940

3941
    """
3942
    master_node = self.master_node
3943
    ret = []
3944

    
3945
    for idx, node in enumerate(utils.NiceSort(self.nodes,
3946
                                              key=lambda node: node.name)):
3947
      node_entry = [(constants.RS_NORMAL, node.name)]
3948
      ret.append(node_entry)
3949

    
3950
      oob_program = _SupportsOob(self.cfg, node)
3951

    
3952
      if not oob_program:
3953
        node_entry.append((constants.RS_UNAVAIL, None))
3954
        continue
3955

    
3956
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3957
                   self.op.command, oob_program, node.name)
3958
      result = self.rpc.call_run_oob(master_node, oob_program,
3959
                                     self.op.command, node.name,
3960
                                     self.op.timeout)
3961

    
3962
      if result.fail_msg:
3963
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3964
                        node.name, result.fail_msg)
3965
        node_entry.append((constants.RS_NODATA, None))
3966
      else:
3967
        try:
3968
          self._CheckPayload(result)
3969
        except errors.OpExecError, err:
3970
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
3971
                          node.name, err)
3972
          node_entry.append((constants.RS_NODATA, None))
3973
        else:
3974
          if self.op.command == constants.OOB_HEALTH:
3975
            # For health we should log important events
3976
            for item, status in result.payload:
3977
              if status in [constants.OOB_STATUS_WARNING,
3978
                            constants.OOB_STATUS_CRITICAL]:
3979
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
3980
                                item, node.name, status)
3981

    
3982
          if self.op.command == constants.OOB_POWER_ON:
3983
            node.powered = True
3984
          elif self.op.command == constants.OOB_POWER_OFF:
3985
            node.powered = False
3986
          elif self.op.command == constants.OOB_POWER_STATUS:
3987
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3988
            if powered != node.powered:
3989
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3990
                               " match actual power state (%s)"), node.powered,
3991
                              node.name, powered)
3992

    
3993
          # For configuration changing commands we should update the node
3994
          if self.op.command in (constants.OOB_POWER_ON,
3995
                                 constants.OOB_POWER_OFF):
3996
            self.cfg.Update(node, feedback_fn)
3997

    
3998
          node_entry.append((constants.RS_NORMAL, result.payload))
3999

    
4000
          if (self.op.command == constants.OOB_POWER_ON and
4001
              idx < len(self.nodes) - 1):
4002
            time.sleep(self.op.power_delay)
4003

    
4004
    return ret
4005

    
4006
  def _CheckPayload(self, result):
4007
    """Checks if the payload is valid.
4008

4009
    @param result: RPC result
4010
    @raises errors.OpExecError: If payload is not valid
4011

4012
    """
4013
    errs = []
4014
    if self.op.command == constants.OOB_HEALTH:
4015
      if not isinstance(result.payload, list):
4016
        errs.append("command 'health' is expected to return a list but got %s" %
4017
                    type(result.payload))
4018
      else:
4019
        for item, status in result.payload:
4020
          if status not in constants.OOB_STATUSES:
4021
            errs.append("health item '%s' has invalid status '%s'" %
4022
                        (item, status))
4023

    
4024
    if self.op.command == constants.OOB_POWER_STATUS:
4025
      if not isinstance(result.payload, dict):
4026
        errs.append("power-status is expected to return a dict but got %s" %
4027
                    type(result.payload))
4028

    
4029
    if self.op.command in [
4030
        constants.OOB_POWER_ON,
4031
        constants.OOB_POWER_OFF,
4032
        constants.OOB_POWER_CYCLE,
4033
        ]:
4034
      if result.payload is not None:
4035
        errs.append("%s is expected to not return payload but got '%s'" %
4036
                    (self.op.command, result.payload))
4037

    
4038
    if errs:
4039
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4040
                               utils.CommaJoin(errs))
4041

    
4042
class _OsQuery(_QueryBase):
4043
  FIELDS = query.OS_FIELDS
4044

    
4045
  def ExpandNames(self, lu):
4046
    # Lock all nodes in shared mode
4047
    # Temporary removal of locks, should be reverted later
4048
    # TODO: reintroduce locks when they are lighter-weight
4049
    lu.needed_locks = {}
4050
    #self.share_locks[locking.LEVEL_NODE] = 1
4051
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4052

    
4053
    # The following variables interact with _QueryBase._GetNames
4054
    if self.names:
4055
      self.wanted = self.names
4056
    else:
4057
      self.wanted = locking.ALL_SET
4058

    
4059
    self.do_locking = self.use_locking
4060

    
4061
  def DeclareLocks(self, lu, level):
4062
    pass
4063

    
4064
  @staticmethod
4065
  def _DiagnoseByOS(rlist):
4066
    """Remaps a per-node return list into an a per-os per-node dictionary
4067

4068
    @param rlist: a map with node names as keys and OS objects as values
4069

4070
    @rtype: dict
4071
    @return: a dictionary with osnames as keys and as value another
4072
        map, with nodes as keys and tuples of (path, status, diagnose,
4073
        variants, parameters, api_versions) as values, eg::
4074

4075
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4076
                                     (/srv/..., False, "invalid api")],
4077
                           "node2": [(/srv/..., True, "", [], [])]}
4078
          }
4079

4080
    """
4081
    all_os = {}
4082
    # we build here the list of nodes that didn't fail the RPC (at RPC
4083
    # level), so that nodes with a non-responding node daemon don't
4084
    # make all OSes invalid
4085
    good_nodes = [node_name for node_name in rlist
4086
                  if not rlist[node_name].fail_msg]
4087
    for node_name, nr in rlist.items():
4088
      if nr.fail_msg or not nr.payload:
4089
        continue
4090
      for (name, path, status, diagnose, variants,
4091
           params, api_versions) in nr.payload:
4092
        if name not in all_os:
4093
          # build a list of nodes for this os containing empty lists
4094
          # for each node in node_list
4095
          all_os[name] = {}
4096
          for nname in good_nodes:
4097
            all_os[name][nname] = []
4098
        # convert params from [name, help] to (name, help)
4099
        params = [tuple(v) for v in params]
4100
        all_os[name][node_name].append((path, status, diagnose,
4101
                                        variants, params, api_versions))
4102
    return all_os
4103

    
4104
  def _GetQueryData(self, lu):
4105
    """Computes the list of nodes and their attributes.
4106

4107
    """
4108
    # Locking is not used
4109
    assert not (compat.any(lu.glm.is_owned(level)
4110
                           for level in locking.LEVELS
4111
                           if level != locking.LEVEL_CLUSTER) or
4112
                self.do_locking or self.use_locking)
4113

    
4114
    valid_nodes = [node.name
4115
                   for node in lu.cfg.GetAllNodesInfo().values()
4116
                   if not node.offline and node.vm_capable]
4117
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4118
    cluster = lu.cfg.GetClusterInfo()
4119

    
4120
    data = {}
4121

    
4122
    for (os_name, os_data) in pol.items():
4123
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4124
                          hidden=(os_name in cluster.hidden_os),
4125
                          blacklisted=(os_name in cluster.blacklisted_os))
4126

    
4127
      variants = set()
4128
      parameters = set()
4129
      api_versions = set()
4130

    
4131
      for idx, osl in enumerate(os_data.values()):
4132
        info.valid = bool(info.valid and osl and osl[0][1])
4133
        if not info.valid:
4134
          break
4135

    
4136
        (node_variants, node_params, node_api) = osl[0][3:6]
4137
        if idx == 0:
4138
          # First entry
4139
          variants.update(node_variants)
4140
          parameters.update(node_params)
4141
          api_versions.update(node_api)
4142
        else:
4143
          # Filter out inconsistent values
4144
          variants.intersection_update(node_variants)
4145
          parameters.intersection_update(node_params)
4146
          api_versions.intersection_update(node_api)
4147

    
4148
      info.variants = list(variants)
4149
      info.parameters = list(parameters)
4150
      info.api_versions = list(api_versions)
4151

    
4152
      data[os_name] = info
4153

    
4154
    # Prepare data in requested order
4155
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4156
            if name in data]
4157

    
4158

    
4159
class LUOsDiagnose(NoHooksLU):
4160
  """Logical unit for OS diagnose/query.
4161

4162
  """
4163
  REQ_BGL = False
4164

    
4165
  @staticmethod
4166
  def _BuildFilter(fields, names):
4167
    """Builds a filter for querying OSes.
4168

4169
    """
4170
    name_filter = qlang.MakeSimpleFilter("name", names)
4171

    
4172
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4173
    # respective field is not requested
4174
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4175
                     for fname in ["hidden", "blacklisted"]
4176
                     if fname not in fields]
4177
    if "valid" not in fields:
4178
      status_filter.append([qlang.OP_TRUE, "valid"])
4179

    
4180
    if status_filter:
4181
      status_filter.insert(0, qlang.OP_AND)
4182
    else:
4183
      status_filter = None
4184

    
4185
    if name_filter and status_filter:
4186
      return [qlang.OP_AND, name_filter, status_filter]
4187
    elif name_filter:
4188
      return name_filter
4189
    else:
4190
      return status_filter
4191

    
4192
  def CheckArguments(self):
4193
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4194
                       self.op.output_fields, False)
4195

    
4196
  def ExpandNames(self):
4197
    self.oq.ExpandNames(self)
4198

    
4199
  def Exec(self, feedback_fn):
4200
    return self.oq.OldStyleQuery(self)
4201

    
4202

    
4203
class LUNodeRemove(LogicalUnit):
4204
  """Logical unit for removing a node.
4205

4206
  """
4207
  HPATH = "node-remove"
4208
  HTYPE = constants.HTYPE_NODE
4209

    
4210
  def BuildHooksEnv(self):
4211
    """Build hooks env.
4212

4213
    This doesn't run on the target node in the pre phase as a failed
4214
    node would then be impossible to remove.
4215

4216
    """
4217
    return {
4218
      "OP_TARGET": self.op.node_name,
4219
      "NODE_NAME": self.op.node_name,
4220
      }
4221

    
4222
  def BuildHooksNodes(self):
4223
    """Build hooks nodes.
4224

4225
    """
4226
    all_nodes = self.cfg.GetNodeList()
4227
    try:
4228
      all_nodes.remove(self.op.node_name)
4229
    except ValueError:
4230
      logging.warning("Node '%s', which is about to be removed, was not found"
4231
                      " in the list of all nodes", self.op.node_name)
4232
    return (all_nodes, all_nodes)
4233

    
4234
  def CheckPrereq(self):
4235
    """Check prerequisites.
4236

4237
    This checks:
4238
     - the node exists in the configuration
4239
     - it does not have primary or secondary instances
4240
     - it's not the master
4241

4242
    Any errors are signaled by raising errors.OpPrereqError.
4243

4244
    """
4245
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4246
    node = self.cfg.GetNodeInfo(self.op.node_name)
4247
    assert node is not None
4248

    
4249
    masternode = self.cfg.GetMasterNode()
4250
    if node.name == masternode:
4251
      raise errors.OpPrereqError("Node is the master node, failover to another"
4252
                                 " node is required", errors.ECODE_INVAL)
4253

    
4254
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4255
      if node.name in instance.all_nodes:
4256
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4257
                                   " please remove first" % instance_name,
4258
                                   errors.ECODE_INVAL)
4259
    self.op.node_name = node.name
4260
    self.node = node
4261

    
4262
  def Exec(self, feedback_fn):
4263
    """Removes the node from the cluster.
4264

4265
    """
4266
    node = self.node
4267
    logging.info("Stopping the node daemon and removing configs from node %s",
4268
                 node.name)
4269

    
4270
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4271

    
4272
    # Promote nodes to master candidate as needed
4273
    _AdjustCandidatePool(self, exceptions=[node.name])
4274
    self.context.RemoveNode(node.name)
4275

    
4276
    # Run post hooks on the node before it's removed
4277
    _RunPostHook(self, node.name)
4278

    
4279
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4280
    msg = result.fail_msg
4281
    if msg:
4282
      self.LogWarning("Errors encountered on the remote node while leaving"
4283
                      " the cluster: %s", msg)
4284

    
4285
    # Remove node from our /etc/hosts
4286
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4287
      master_node = self.cfg.GetMasterNode()
4288
      result = self.rpc.call_etc_hosts_modify(master_node,
4289
                                              constants.ETC_HOSTS_REMOVE,
4290
                                              node.name, None)
4291
      result.Raise("Can't update hosts file with new host data")
4292
      _RedistributeAncillaryFiles(self)
4293

    
4294

    
4295
class _NodeQuery(_QueryBase):
4296
  FIELDS = query.NODE_FIELDS
4297

    
4298
  def ExpandNames(self, lu):
4299
    lu.needed_locks = {}
4300
    lu.share_locks[locking.LEVEL_NODE] = 1
4301

    
4302
    if self.names:
4303
      self.wanted = _GetWantedNodes(lu, self.names)
4304
    else:
4305
      self.wanted = locking.ALL_SET
4306

    
4307
    self.do_locking = (self.use_locking and
4308
                       query.NQ_LIVE in self.requested_data)
4309

    
4310
    if self.do_locking:
4311
      # if we don't request only static fields, we need to lock the nodes
4312
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4313

    
4314
  def DeclareLocks(self, lu, level):
4315
    pass
4316

    
4317
  def _GetQueryData(self, lu):
4318
    """Computes the list of nodes and their attributes.
4319

4320
    """
4321
    all_info = lu.cfg.GetAllNodesInfo()
4322

    
4323
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4324

    
4325
    # Gather data as requested
4326
    if query.NQ_LIVE in self.requested_data:
4327
      # filter out non-vm_capable nodes
4328
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4329

    
4330
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4331
                                        lu.cfg.GetHypervisorType())
4332
      live_data = dict((name, nresult.payload)
4333
                       for (name, nresult) in node_data.items()
4334
                       if not nresult.fail_msg and nresult.payload)
4335
    else:
4336
      live_data = None
4337

    
4338
    if query.NQ_INST in self.requested_data:
4339
      node_to_primary = dict([(name, set()) for name in nodenames])
4340
      node_to_secondary = dict([(name, set()) for name in nodenames])
4341

    
4342
      inst_data = lu.cfg.GetAllInstancesInfo()
4343

    
4344
      for inst in inst_data.values():
4345
        if inst.primary_node in node_to_primary:
4346
          node_to_primary[inst.primary_node].add(inst.name)
4347
        for secnode in inst.secondary_nodes:
4348
          if secnode in node_to_secondary:
4349
            node_to_secondary[secnode].add(inst.name)
4350
    else:
4351
      node_to_primary = None
4352
      node_to_secondary = None
4353

    
4354
    if query.NQ_OOB in self.requested_data:
4355
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4356
                         for name, node in all_info.iteritems())
4357
    else:
4358
      oob_support = None
4359

    
4360
    if query.NQ_GROUP in self.requested_data:
4361
      groups = lu.cfg.GetAllNodeGroupsInfo()
4362
    else:
4363
      groups = {}
4364

    
4365
    return query.NodeQueryData([all_info[name] for name in nodenames],
4366
                               live_data, lu.cfg.GetMasterNode(),
4367
                               node_to_primary, node_to_secondary, groups,
4368
                               oob_support, lu.cfg.GetClusterInfo())
4369

    
4370

    
4371
class LUNodeQuery(NoHooksLU):
4372
  """Logical unit for querying nodes.
4373

4374
  """
4375
  # pylint: disable-msg=W0142
4376
  REQ_BGL = False
4377

    
4378
  def CheckArguments(self):
4379
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4380
                         self.op.output_fields, self.op.use_locking)
4381

    
4382
  def ExpandNames(self):
4383
    self.nq.ExpandNames(self)
4384

    
4385
  def Exec(self, feedback_fn):
4386
    return self.nq.OldStyleQuery(self)
4387

    
4388

    
4389
class LUNodeQueryvols(NoHooksLU):
4390
  """Logical unit for getting volumes on node(s).
4391

4392
  """
4393
  REQ_BGL = False
4394
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4395
  _FIELDS_STATIC = utils.FieldSet("node")
4396

    
4397
  def CheckArguments(self):
4398
    _CheckOutputFields(static=self._FIELDS_STATIC,
4399
                       dynamic=self._FIELDS_DYNAMIC,
4400
                       selected=self.op.output_fields)
4401

    
4402
  def ExpandNames(self):
4403
    self.needed_locks = {}
4404
    self.share_locks[locking.LEVEL_NODE] = 1
4405
    if not self.op.nodes:
4406
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4407
    else:
4408
      self.needed_locks[locking.LEVEL_NODE] = \
4409
        _GetWantedNodes(self, self.op.nodes)
4410

    
4411
  def Exec(self, feedback_fn):
4412
    """Computes the list of nodes and their attributes.
4413

4414
    """
4415
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4416
    volumes = self.rpc.call_node_volumes(nodenames)
4417

    
4418
    ilist = self.cfg.GetAllInstancesInfo()
4419
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4420

    
4421
    output = []
4422
    for node in nodenames:
4423
      nresult = volumes[node]
4424
      if nresult.offline:
4425
        continue
4426
      msg = nresult.fail_msg
4427
      if msg:
4428
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4429
        continue
4430

    
4431
      node_vols = sorted(nresult.payload,
4432
                         key=operator.itemgetter("dev"))
4433

    
4434
      for vol in node_vols:
4435
        node_output = []
4436
        for field in self.op.output_fields:
4437
          if field == "node":
4438
            val = node
4439
          elif field == "phys":
4440
            val = vol["dev"]
4441
          elif field == "vg":
4442
            val = vol["vg"]
4443
          elif field == "name":
4444
            val = vol["name"]
4445
          elif field == "size":
4446
            val = int(float(vol["size"]))
4447
          elif field == "instance":
4448
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4449
          else:
4450
            raise errors.ParameterError(field)
4451
          node_output.append(str(val))
4452

    
4453
        output.append(node_output)
4454

    
4455
    return output
4456

    
4457

    
4458
class LUNodeQueryStorage(NoHooksLU):
4459
  """Logical unit for getting information on storage units on node(s).
4460

4461
  """
4462
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4463
  REQ_BGL = False
4464

    
4465
  def CheckArguments(self):
4466
    _CheckOutputFields(static=self._FIELDS_STATIC,
4467
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4468
                       selected=self.op.output_fields)
4469

    
4470
  def ExpandNames(self):
4471
    self.needed_locks = {}
4472
    self.share_locks[locking.LEVEL_NODE] = 1
4473

    
4474
    if self.op.nodes:
4475
      self.needed_locks[locking.LEVEL_NODE] = \
4476
        _GetWantedNodes(self, self.op.nodes)
4477
    else:
4478
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4479

    
4480
  def Exec(self, feedback_fn):
4481
    """Computes the list of nodes and their attributes.
4482

4483
    """
4484
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4485

    
4486
    # Always get name to sort by
4487
    if constants.SF_NAME in self.op.output_fields:
4488
      fields = self.op.output_fields[:]
4489
    else:
4490
      fields = [constants.SF_NAME] + self.op.output_fields
4491

    
4492
    # Never ask for node or type as it's only known to the LU
4493
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4494
      while extra in fields:
4495
        fields.remove(extra)
4496

    
4497
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4498
    name_idx = field_idx[constants.SF_NAME]
4499

    
4500
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4501
    data = self.rpc.call_storage_list(self.nodes,
4502
                                      self.op.storage_type, st_args,
4503
                                      self.op.name, fields)
4504

    
4505
    result = []
4506

    
4507
    for node in utils.NiceSort(self.nodes):
4508
      nresult = data[node]
4509
      if nresult.offline:
4510
        continue
4511

    
4512
      msg = nresult.fail_msg
4513
      if msg:
4514
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4515
        continue
4516

    
4517
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4518

    
4519
      for name in utils.NiceSort(rows.keys()):
4520
        row = rows[name]
4521

    
4522
        out = []
4523

    
4524
        for field in self.op.output_fields:
4525
          if field == constants.SF_NODE:
4526
            val = node
4527
          elif field == constants.SF_TYPE:
4528
            val = self.op.storage_type
4529
          elif field in field_idx:
4530
            val = row[field_idx[field]]
4531
          else:
4532
            raise errors.ParameterError(field)
4533

    
4534
          out.append(val)
4535

    
4536
        result.append(out)
4537

    
4538
    return result
4539

    
4540

    
4541
class _InstanceQuery(_QueryBase):
4542
  FIELDS = query.INSTANCE_FIELDS
4543

    
4544
  def ExpandNames(self, lu):
4545
    lu.needed_locks = {}
4546
    lu.share_locks = _ShareAll()
4547

    
4548
    if self.names:
4549
      self.wanted = _GetWantedInstances(lu, self.names)
4550
    else:
4551
      self.wanted = locking.ALL_SET
4552

    
4553
    self.do_locking = (self.use_locking and
4554
                       query.IQ_LIVE in self.requested_data)
4555
    if self.do_locking:
4556
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4557
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4558
      lu.needed_locks[locking.LEVEL_NODE] = []
4559
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4560

    
4561
    self.do_grouplocks = (self.do_locking and
4562
                          query.IQ_NODES in self.requested_data)
4563

    
4564
  def DeclareLocks(self, lu, level):
4565
    if self.do_locking:
4566
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4567
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4568

    
4569
        # Lock all groups used by instances optimistically; this requires going
4570
        # via the node before it's locked, requiring verification later on
4571
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4572
          set(group_uuid
4573
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4574
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4575
      elif level == locking.LEVEL_NODE:
4576
        lu._LockInstancesNodes() # pylint: disable-msg=W0212
4577

    
4578
  @staticmethod
4579
  def _CheckGroupLocks(lu):
4580
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4581
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4582

    
4583
    # Check if node groups for locked instances are still correct
4584
    for instance_name in owned_instances:
4585
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4586

    
4587
  def _GetQueryData(self, lu):
4588
    """Computes the list of instances and their attributes.
4589

4590
    """
4591
    if self.do_grouplocks:
4592
      self._CheckGroupLocks(lu)
4593

    
4594
    cluster = lu.cfg.GetClusterInfo()
4595
    all_info = lu.cfg.GetAllInstancesInfo()
4596

    
4597
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4598

    
4599
    instance_list = [all_info[name] for name in instance_names]
4600
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4601
                                        for inst in instance_list)))
4602
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4603
    bad_nodes = []
4604
    offline_nodes = []
4605
    wrongnode_inst = set()
4606

    
4607
    # Gather data as requested
4608
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4609
      live_data = {}
4610
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4611
      for name in nodes:
4612
        result = node_data[name]
4613
        if result.offline:
4614
          # offline nodes will be in both lists
4615
          assert result.fail_msg
4616
          offline_nodes.append(name)
4617
        if result.fail_msg:
4618
          bad_nodes.append(name)
4619
        elif result.payload:
4620
          for inst in result.payload:
4621
            if inst in all_info:
4622
              if all_info[inst].primary_node == name:
4623
                live_data.update(result.payload)
4624
              else:
4625
                wrongnode_inst.add(inst)
4626
            else:
4627
              # orphan instance; we don't list it here as we don't
4628
              # handle this case yet in the output of instance listing
4629
              logging.warning("Orphan instance '%s' found on node %s",
4630
                              inst, name)
4631
        # else no instance is alive
4632
    else:
4633
      live_data = {}
4634

    
4635
    if query.IQ_DISKUSAGE in self.requested_data:
4636
      disk_usage = dict((inst.name,
4637
                         _ComputeDiskSize(inst.disk_template,
4638
                                          [{constants.IDISK_SIZE: disk.size}
4639
                                           for disk in inst.disks]))
4640
                        for inst in instance_list)
4641
    else:
4642
      disk_usage = None
4643

    
4644
    if query.IQ_CONSOLE in self.requested_data:
4645
      consinfo = {}
4646
      for inst in instance_list:
4647
        if inst.name in live_data:
4648
          # Instance is running
4649
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4650
        else:
4651
          consinfo[inst.name] = None
4652
      assert set(consinfo.keys()) == set(instance_names)
4653
    else:
4654
      consinfo = None
4655

    
4656
    if query.IQ_NODES in self.requested_data:
4657
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4658
                                            instance_list)))
4659
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4660
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4661
                    for uuid in set(map(operator.attrgetter("group"),
4662
                                        nodes.values())))
4663
    else:
4664
      nodes = None
4665
      groups = None
4666

    
4667
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4668
                                   disk_usage, offline_nodes, bad_nodes,
4669
                                   live_data, wrongnode_inst, consinfo,
4670
                                   nodes, groups)
4671

    
4672

    
4673
class LUQuery(NoHooksLU):
4674
  """Query for resources/items of a certain kind.
4675

4676
  """
4677
  # pylint: disable-msg=W0142
4678
  REQ_BGL = False
4679

    
4680
  def CheckArguments(self):
4681
    qcls = _GetQueryImplementation(self.op.what)
4682

    
4683
    self.impl = qcls(self.op.filter, self.op.fields, False)
4684

    
4685
  def ExpandNames(self):
4686
    self.impl.ExpandNames(self)
4687

    
4688
  def DeclareLocks(self, level):
4689
    self.impl.DeclareLocks(self, level)
4690

    
4691
  def Exec(self, feedback_fn):
4692
    return self.impl.NewStyleQuery(self)
4693

    
4694

    
4695
class LUQueryFields(NoHooksLU):
4696
  """Query for resources/items of a certain kind.
4697

4698
  """
4699
  # pylint: disable-msg=W0142
4700
  REQ_BGL = False
4701

    
4702
  def CheckArguments(self):
4703
    self.qcls = _GetQueryImplementation(self.op.what)
4704

    
4705
  def ExpandNames(self):
4706
    self.needed_locks = {}
4707

    
4708
  def Exec(self, feedback_fn):
4709
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4710

    
4711

    
4712
class LUNodeModifyStorage(NoHooksLU):
4713
  """Logical unit for modifying a storage volume on a node.
4714

4715
  """
4716
  REQ_BGL = False
4717

    
4718
  def CheckArguments(self):
4719
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4720

    
4721
    storage_type = self.op.storage_type
4722

    
4723
    try:
4724
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4725
    except KeyError:
4726
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4727
                                 " modified" % storage_type,
4728
                                 errors.ECODE_INVAL)
4729

    
4730
    diff = set(self.op.changes.keys()) - modifiable
4731
    if diff:
4732
      raise errors.OpPrereqError("The following fields can not be modified for"
4733
                                 " storage units of type '%s': %r" %
4734
                                 (storage_type, list(diff)),
4735
                                 errors.ECODE_INVAL)
4736

    
4737
  def ExpandNames(self):
4738
    self.needed_locks = {
4739
      locking.LEVEL_NODE: self.op.node_name,
4740
      }
4741

    
4742
  def Exec(self, feedback_fn):
4743
    """Computes the list of nodes and their attributes.
4744

4745
    """
4746
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4747
    result = self.rpc.call_storage_modify(self.op.node_name,
4748
                                          self.op.storage_type, st_args,
4749
                                          self.op.name, self.op.changes)
4750
    result.Raise("Failed to modify storage unit '%s' on %s" %
4751
                 (self.op.name, self.op.node_name))
4752

    
4753

    
4754
class LUNodeAdd(LogicalUnit):
4755
  """Logical unit for adding node to the cluster.
4756

4757
  """
4758
  HPATH = "node-add"
4759
  HTYPE = constants.HTYPE_NODE
4760
  _NFLAGS = ["master_capable", "vm_capable"]
4761

    
4762
  def CheckArguments(self):
4763
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4764
    # validate/normalize the node name
4765
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4766
                                         family=self.primary_ip_family)
4767
    self.op.node_name = self.hostname.name
4768

    
4769
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4770
      raise errors.OpPrereqError("Cannot readd the master node",
4771
                                 errors.ECODE_STATE)
4772

    
4773
    if self.op.readd and self.op.group:
4774
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4775
                                 " being readded", errors.ECODE_INVAL)
4776

    
4777
  def BuildHooksEnv(self):
4778
    """Build hooks env.
4779

4780
    This will run on all nodes before, and on all nodes + the new node after.
4781

4782
    """
4783
    return {
4784
      "OP_TARGET": self.op.node_name,
4785
      "NODE_NAME": self.op.node_name,
4786
      "NODE_PIP": self.op.primary_ip,
4787
      "NODE_SIP": self.op.secondary_ip,
4788
      "MASTER_CAPABLE": str(self.op.master_capable),
4789
      "VM_CAPABLE": str(self.op.vm_capable),
4790
      }
4791

    
4792
  def BuildHooksNodes(self):
4793
    """Build hooks nodes.
4794

4795
    """
4796
    # Exclude added node
4797
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4798
    post_nodes = pre_nodes + [self.op.node_name, ]
4799

    
4800
    return (pre_nodes, post_nodes)
4801

    
4802
  def CheckPrereq(self):
4803
    """Check prerequisites.
4804

4805
    This checks:
4806
     - the new node is not already in the config
4807
     - it is resolvable
4808
     - its parameters (single/dual homed) matches the cluster
4809

4810
    Any errors are signaled by raising errors.OpPrereqError.
4811

4812
    """
4813
    cfg = self.cfg
4814
    hostname = self.hostname
4815
    node = hostname.name
4816
    primary_ip = self.op.primary_ip = hostname.ip
4817
    if self.op.secondary_ip is None:
4818
      if self.primary_ip_family == netutils.IP6Address.family:
4819
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4820
                                   " IPv4 address must be given as secondary",
4821
                                   errors.ECODE_INVAL)
4822
      self.op.secondary_ip = primary_ip
4823

    
4824
    secondary_ip = self.op.secondary_ip
4825
    if not netutils.IP4Address.IsValid(secondary_ip):
4826
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4827
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4828

    
4829
    node_list = cfg.GetNodeList()
4830
    if not self.op.readd and node in node_list:
4831
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4832
                                 node, errors.ECODE_EXISTS)
4833
    elif self.op.readd and node not in node_list:
4834
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4835
                                 errors.ECODE_NOENT)
4836

    
4837
    self.changed_primary_ip = False
4838

    
4839
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4840
      if self.op.readd and node == existing_node_name:
4841
        if existing_node.secondary_ip != secondary_ip:
4842
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4843
                                     " address configuration as before",
4844
                                     errors.ECODE_INVAL)
4845
        if existing_node.primary_ip != primary_ip:
4846
          self.changed_primary_ip = True
4847

    
4848
        continue
4849

    
4850
      if (existing_node.primary_ip == primary_ip or
4851
          existing_node.secondary_ip == primary_ip or
4852
          existing_node.primary_ip == secondary_ip or
4853
          existing_node.secondary_ip == secondary_ip):
4854
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4855
                                   " existing node %s" % existing_node.name,
4856
                                   errors.ECODE_NOTUNIQUE)
4857

    
4858
    # After this 'if' block, None is no longer a valid value for the
4859
    # _capable op attributes
4860
    if self.op.readd:
4861
      old_node = self.cfg.GetNodeInfo(node)
4862
      assert old_node is not None, "Can't retrieve locked node %s" % node
4863
      for attr in self._NFLAGS:
4864
        if getattr(self.op, attr) is None:
4865
          setattr(self.op, attr, getattr(old_node, attr))
4866
    else:
4867
      for attr in self._NFLAGS:
4868
        if getattr(self.op, attr) is None:
4869
          setattr(self.op, attr, True)
4870

    
4871
    if self.op.readd and not self.op.vm_capable:
4872
      pri, sec = cfg.GetNodeInstances(node)
4873
      if pri or sec:
4874
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4875
                                   " flag set to false, but it already holds"
4876
                                   " instances" % node,
4877
                                   errors.ECODE_STATE)
4878

    
4879
    # check that the type of the node (single versus dual homed) is the
4880
    # same as for the master
4881
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4882
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4883
    newbie_singlehomed = secondary_ip == primary_ip
4884
    if master_singlehomed != newbie_singlehomed:
4885
      if master_singlehomed:
4886
        raise errors.OpPrereqError("The master has no secondary ip but the"
4887
                                   " new node has one",
4888
                                   errors.ECODE_INVAL)
4889
      else:
4890
        raise errors.OpPrereqError("The master has a secondary ip but the"
4891
                                   " new node doesn't have one",
4892
                                   errors.ECODE_INVAL)
4893

    
4894
    # checks reachability
4895
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4896
      raise errors.OpPrereqError("Node not reachable by ping",
4897
                                 errors.ECODE_ENVIRON)
4898

    
4899
    if not newbie_singlehomed:
4900
      # check reachability from my secondary ip to newbie's secondary ip
4901
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4902
                           source=myself.secondary_ip):
4903
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4904
                                   " based ping to node daemon port",
4905
                                   errors.ECODE_ENVIRON)
4906

    
4907
    if self.op.readd:
4908
      exceptions = [node]
4909
    else:
4910
      exceptions = []
4911

    
4912
    if self.op.master_capable:
4913
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4914
    else:
4915
      self.master_candidate = False
4916

    
4917
    if self.op.readd:
4918
      self.new_node = old_node
4919
    else:
4920
      node_group = cfg.LookupNodeGroup(self.op.group)
4921
      self.new_node = objects.Node(name=node,
4922
                                   primary_ip=primary_ip,
4923
                                   secondary_ip=secondary_ip,
4924
                                   master_candidate=self.master_candidate,
4925
                                   offline=False, drained=False,
4926
                                   group=node_group)
4927

    
4928
    if self.op.ndparams:
4929
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4930

    
4931
  def Exec(self, feedback_fn):
4932
    """Adds the new node to the cluster.
4933

4934
    """
4935
    new_node = self.new_node
4936
    node = new_node.name
4937

    
4938
    # We adding a new node so we assume it's powered
4939
    new_node.powered = True
4940

    
4941
    # for re-adds, reset the offline/drained/master-candidate flags;
4942
    # we need to reset here, otherwise offline would prevent RPC calls
4943
    # later in the procedure; this also means that if the re-add
4944
    # fails, we are left with a non-offlined, broken node
4945
    if self.op.readd:
4946
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4947
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4948
      # if we demote the node, we do cleanup later in the procedure
4949
      new_node.master_candidate = self.master_candidate
4950
      if self.changed_primary_ip:
4951
        new_node.primary_ip = self.op.primary_ip
4952

    
4953
    # copy the master/vm_capable flags
4954
    for attr in self._NFLAGS:
4955
      setattr(new_node, attr, getattr(self.op, attr))
4956

    
4957
    # notify the user about any possible mc promotion
4958
    if new_node.master_candidate:
4959
      self.LogInfo("Node will be a master candidate")
4960

    
4961
    if self.op.ndparams:
4962
      new_node.ndparams = self.op.ndparams
4963
    else:
4964
      new_node.ndparams = {}
4965

    
4966
    # check connectivity
4967
    result = self.rpc.call_version([node])[node]
4968
    result.Raise("Can't get version information from node %s" % node)
4969
    if constants.PROTOCOL_VERSION == result.payload:
4970
      logging.info("Communication to node %s fine, sw version %s match",
4971
                   node, result.payload)
4972
    else:
4973
      raise errors.OpExecError("Version mismatch master version %s,"
4974
                               " node version %s" %
4975
                               (constants.PROTOCOL_VERSION, result.payload))
4976

    
4977
    # Add node to our /etc/hosts, and add key to known_hosts
4978
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4979
      master_node = self.cfg.GetMasterNode()
4980
      result = self.rpc.call_etc_hosts_modify(master_node,
4981
                                              constants.ETC_HOSTS_ADD,
4982
                                              self.hostname.name,
4983
                                              self.hostname.ip)
4984
      result.Raise("Can't update hosts file with new host data")
4985

    
4986
    if new_node.secondary_ip != new_node.primary_ip:
4987
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4988
                               False)
4989

    
4990
    node_verify_list = [self.cfg.GetMasterNode()]
4991
    node_verify_param = {
4992
      constants.NV_NODELIST: [node],
4993
      # TODO: do a node-net-test as well?
4994
    }
4995

    
4996
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4997
                                       self.cfg.GetClusterName())
4998
    for verifier in node_verify_list:
4999
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5000
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5001
      if nl_payload:
5002
        for failed in nl_payload:
5003
          feedback_fn("ssh/hostname verification failed"
5004
                      " (checking from %s): %s" %
5005
                      (verifier, nl_payload[failed]))
5006
        raise errors.OpExecError("ssh/hostname verification failed")
5007

    
5008
    if self.op.readd:
5009
      _RedistributeAncillaryFiles(self)
5010
      self.context.ReaddNode(new_node)
5011
      # make sure we redistribute the config
5012
      self.cfg.Update(new_node, feedback_fn)
5013
      # and make sure the new node will not have old files around
5014
      if not new_node.master_candidate:
5015
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5016
        msg = result.fail_msg
5017
        if msg:
5018
          self.LogWarning("Node failed to demote itself from master"
5019
                          " candidate status: %s" % msg)
5020
    else:
5021
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5022
                                  additional_vm=self.op.vm_capable)
5023
      self.context.AddNode(new_node, self.proc.GetECId())
5024

    
5025

    
5026
class LUNodeSetParams(LogicalUnit):
5027
  """Modifies the parameters of a node.
5028

5029
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5030
      to the node role (as _ROLE_*)
5031
  @cvar _R2F: a dictionary from node role to tuples of flags
5032
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5033

5034
  """
5035
  HPATH = "node-modify"
5036
  HTYPE = constants.HTYPE_NODE
5037
  REQ_BGL = False
5038
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5039
  _F2R = {
5040
    (True, False, False): _ROLE_CANDIDATE,
5041
    (False, True, False): _ROLE_DRAINED,
5042
    (False, False, True): _ROLE_OFFLINE,
5043
    (False, False, False): _ROLE_REGULAR,
5044
    }
5045
  _R2F = dict((v, k) for k, v in _F2R.items())
5046
  _FLAGS = ["master_candidate", "drained", "offline"]
5047

    
5048
  def CheckArguments(self):
5049
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5050
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5051
                self.op.master_capable, self.op.vm_capable,
5052
                self.op.secondary_ip, self.op.ndparams]
5053
    if all_mods.count(None) == len(all_mods):
5054
      raise errors.OpPrereqError("Please pass at least one modification",
5055
                                 errors.ECODE_INVAL)
5056
    if all_mods.count(True) > 1:
5057
      raise errors.OpPrereqError("Can't set the node into more than one"
5058
                                 " state at the same time",
5059
                                 errors.ECODE_INVAL)
5060

    
5061
    # Boolean value that tells us whether we might be demoting from MC
5062
    self.might_demote = (self.op.master_candidate == False or
5063
                         self.op.offline == True or
5064
                         self.op.drained == True or
5065
                         self.op.master_capable == False)
5066

    
5067
    if self.op.secondary_ip:
5068
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5069
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5070
                                   " address" % self.op.secondary_ip,
5071
                                   errors.ECODE_INVAL)
5072

    
5073
    self.lock_all = self.op.auto_promote and self.might_demote
5074
    self.lock_instances = self.op.secondary_ip is not None
5075

    
5076
  def ExpandNames(self):
5077
    if self.lock_all:
5078
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5079
    else:
5080
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5081

    
5082
    if self.lock_instances:
5083
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5084

    
5085
  def DeclareLocks(self, level):
5086
    # If we have locked all instances, before waiting to lock nodes, release
5087
    # all the ones living on nodes unrelated to the current operation.
5088
    if level == locking.LEVEL_NODE and self.lock_instances:
5089
      self.affected_instances = []
5090
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5091
        instances_keep = []
5092

    
5093
        # Build list of instances to release
5094
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5095
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5096
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5097
              self.op.node_name in instance.all_nodes):
5098
            instances_keep.append(instance_name)
5099
            self.affected_instances.append(instance)
5100

    
5101
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5102

    
5103
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5104
                set(instances_keep))
5105

    
5106
  def BuildHooksEnv(self):
5107
    """Build hooks env.
5108

5109
    This runs on the master node.
5110

5111
    """
5112
    return {
5113
      "OP_TARGET": self.op.node_name,
5114
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5115
      "OFFLINE": str(self.op.offline),
5116
      "DRAINED": str(self.op.drained),
5117
      "MASTER_CAPABLE": str(self.op.master_capable),
5118
      "VM_CAPABLE": str(self.op.vm_capable),
5119
      }
5120

    
5121
  def BuildHooksNodes(self):
5122
    """Build hooks nodes.
5123

5124
    """
5125
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5126
    return (nl, nl)
5127

    
5128
  def CheckPrereq(self):
5129
    """Check prerequisites.
5130

5131
    This only checks the instance list against the existing names.
5132

5133
    """
5134
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5135

    
5136
    if (self.op.master_candidate is not None or
5137
        self.op.drained is not None or
5138
        self.op.offline is not None):
5139
      # we can't change the master's node flags
5140
      if self.op.node_name == self.cfg.GetMasterNode():
5141
        raise errors.OpPrereqError("The master role can be changed"
5142
                                   " only via master-failover",
5143
                                   errors.ECODE_INVAL)
5144

    
5145
    if self.op.master_candidate and not node.master_capable:
5146
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5147
                                 " it a master candidate" % node.name,
5148
                                 errors.ECODE_STATE)
5149

    
5150
    if self.op.vm_capable == False:
5151
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5152
      if ipri or isec:
5153
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5154
                                   " the vm_capable flag" % node.name,
5155
                                   errors.ECODE_STATE)
5156

    
5157
    if node.master_candidate and self.might_demote and not self.lock_all:
5158
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5159
      # check if after removing the current node, we're missing master
5160
      # candidates
5161
      (mc_remaining, mc_should, _) = \
5162
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5163
      if mc_remaining < mc_should:
5164
        raise errors.OpPrereqError("Not enough master candidates, please"
5165
                                   " pass auto promote option to allow"
5166
                                   " promotion", errors.ECODE_STATE)
5167

    
5168
    self.old_flags = old_flags = (node.master_candidate,
5169
                                  node.drained, node.offline)
5170
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5171
    self.old_role = old_role = self._F2R[old_flags]
5172

    
5173
    # Check for ineffective changes
5174
    for attr in self._FLAGS:
5175
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5176
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5177
        setattr(self.op, attr, None)
5178

    
5179
    # Past this point, any flag change to False means a transition
5180
    # away from the respective state, as only real changes are kept
5181

    
5182
    # TODO: We might query the real power state if it supports OOB
5183
    if _SupportsOob(self.cfg, node):
5184
      if self.op.offline is False and not (node.powered or
5185
                                           self.op.powered == True):
5186
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5187
                                    " offline status can be reset") %
5188
                                   self.op.node_name)
5189
    elif self.op.powered is not None:
5190
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5191
                                  " as it does not support out-of-band"
5192
                                  " handling") % self.op.node_name)
5193

    
5194
    # If we're being deofflined/drained, we'll MC ourself if needed
5195
    if (self.op.drained == False or self.op.offline == False or
5196
        (self.op.master_capable and not node.master_capable)):
5197
      if _DecideSelfPromotion(self):
5198
        self.op.master_candidate = True
5199
        self.LogInfo("Auto-promoting node to master candidate")
5200

    
5201
    # If we're no longer master capable, we'll demote ourselves from MC
5202
    if self.op.master_capable == False and node.master_candidate:
5203
      self.LogInfo("Demoting from master candidate")
5204
      self.op.master_candidate = False
5205

    
5206
    # Compute new role
5207
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5208
    if self.op.master_candidate:
5209
      new_role = self._ROLE_CANDIDATE
5210
    elif self.op.drained:
5211
      new_role = self._ROLE_DRAINED
5212
    elif self.op.offline:
5213
      new_role = self._ROLE_OFFLINE
5214
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5215
      # False is still in new flags, which means we're un-setting (the
5216
      # only) True flag
5217
      new_role = self._ROLE_REGULAR
5218
    else: # no new flags, nothing, keep old role
5219
      new_role = old_role
5220

    
5221
    self.new_role = new_role
5222

    
5223
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5224
      # Trying to transition out of offline status
5225
      result = self.rpc.call_version([node.name])[node.name]
5226
      if result.fail_msg:
5227
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5228
                                   " to report its version: %s" %
5229
                                   (node.name, result.fail_msg),
5230
                                   errors.ECODE_STATE)
5231
      else:
5232
        self.LogWarning("Transitioning node from offline to online state"
5233
                        " without using re-add. Please make sure the node"
5234
                        " is healthy!")
5235

    
5236
    if self.op.secondary_ip:
5237
      # Ok even without locking, because this can't be changed by any LU
5238
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5239
      master_singlehomed = master.secondary_ip == master.primary_ip
5240
      if master_singlehomed and self.op.secondary_ip:
5241
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5242
                                   " homed cluster", errors.ECODE_INVAL)
5243

    
5244
      if node.offline:
5245
        if self.affected_instances:
5246
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5247
                                     " node has instances (%s) configured"
5248
                                     " to use it" % self.affected_instances)
5249
      else:
5250
        # On online nodes, check that no instances are running, and that
5251
        # the node has the new ip and we can reach it.
5252
        for instance in self.affected_instances:
5253
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5254

    
5255
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5256
        if master.name != node.name:
5257
          # check reachability from master secondary ip to new secondary ip
5258
          if not netutils.TcpPing(self.op.secondary_ip,
5259
                                  constants.DEFAULT_NODED_PORT,
5260
                                  source=master.secondary_ip):
5261
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5262
                                       " based ping to node daemon port",
5263
                                       errors.ECODE_ENVIRON)
5264

    
5265
    if self.op.ndparams:
5266
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5267
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5268
      self.new_ndparams = new_ndparams
5269

    
5270
  def Exec(self, feedback_fn):
5271
    """Modifies a node.
5272

5273
    """
5274
    node = self.node
5275
    old_role = self.old_role
5276
    new_role = self.new_role
5277

    
5278
    result = []
5279

    
5280
    if self.op.ndparams:
5281
      node.ndparams = self.new_ndparams
5282

    
5283
    if self.op.powered is not None:
5284
      node.powered = self.op.powered
5285

    
5286
    for attr in ["master_capable", "vm_capable"]:
5287
      val = getattr(self.op, attr)
5288
      if val is not None:
5289
        setattr(node, attr, val)
5290
        result.append((attr, str(val)))
5291

    
5292
    if new_role != old_role:
5293
      # Tell the node to demote itself, if no longer MC and not offline
5294
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5295
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5296
        if msg:
5297
          self.LogWarning("Node failed to demote itself: %s", msg)
5298

    
5299
      new_flags = self._R2F[new_role]
5300
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5301
        if of != nf:
5302
          result.append((desc, str(nf)))
5303
      (node.master_candidate, node.drained, node.offline) = new_flags
5304

    
5305
      # we locked all nodes, we adjust the CP before updating this node
5306
      if self.lock_all:
5307
        _AdjustCandidatePool(self, [node.name])
5308

    
5309
    if self.op.secondary_ip:
5310
      node.secondary_ip = self.op.secondary_ip
5311
      result.append(("secondary_ip", self.op.secondary_ip))
5312

    
5313
    # this will trigger configuration file update, if needed
5314
    self.cfg.Update(node, feedback_fn)
5315

    
5316
    # this will trigger job queue propagation or cleanup if the mc
5317
    # flag changed
5318
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5319
      self.context.ReaddNode(node)
5320

    
5321
    return result
5322

    
5323

    
5324
class LUNodePowercycle(NoHooksLU):
5325
  """Powercycles a node.
5326

5327
  """
5328
  REQ_BGL = False
5329

    
5330
  def CheckArguments(self):
5331
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5332
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5333
      raise errors.OpPrereqError("The node is the master and the force"
5334
                                 " parameter was not set",
5335
                                 errors.ECODE_INVAL)
5336

    
5337
  def ExpandNames(self):
5338
    """Locking for PowercycleNode.
5339

5340
    This is a last-resort option and shouldn't block on other
5341
    jobs. Therefore, we grab no locks.
5342

5343
    """
5344
    self.needed_locks = {}
5345

    
5346
  def Exec(self, feedback_fn):
5347
    """Reboots a node.
5348

5349
    """
5350
    result = self.rpc.call_node_powercycle(self.op.node_name,
5351
                                           self.cfg.GetHypervisorType())
5352
    result.Raise("Failed to schedule the reboot")
5353
    return result.payload
5354

    
5355

    
5356
class LUClusterQuery(NoHooksLU):
5357
  """Query cluster configuration.
5358

5359
  """
5360
  REQ_BGL = False
5361

    
5362
  def ExpandNames(self):
5363
    self.needed_locks = {}
5364

    
5365
  def Exec(self, feedback_fn):
5366
    """Return cluster config.
5367

5368
    """
5369
    cluster = self.cfg.GetClusterInfo()
5370
    os_hvp = {}
5371

    
5372
    # Filter just for enabled hypervisors
5373
    for os_name, hv_dict in cluster.os_hvp.items():
5374
      os_hvp[os_name] = {}
5375
      for hv_name, hv_params in hv_dict.items():
5376
        if hv_name in cluster.enabled_hypervisors:
5377
          os_hvp[os_name][hv_name] = hv_params
5378

    
5379
    # Convert ip_family to ip_version
5380
    primary_ip_version = constants.IP4_VERSION
5381
    if cluster.primary_ip_family == netutils.IP6Address.family:
5382
      primary_ip_version = constants.IP6_VERSION
5383

    
5384
    result = {
5385
      "software_version": constants.RELEASE_VERSION,
5386
      "protocol_version": constants.PROTOCOL_VERSION,
5387
      "config_version": constants.CONFIG_VERSION,
5388
      "os_api_version": max(constants.OS_API_VERSIONS),
5389
      "export_version": constants.EXPORT_VERSION,
5390
      "architecture": (platform.architecture()[0], platform.machine()),
5391
      "name": cluster.cluster_name,
5392
      "master": cluster.master_node,
5393
      "default_hypervisor": cluster.enabled_hypervisors[0],
5394
      "enabled_hypervisors": cluster.enabled_hypervisors,
5395
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5396
                        for hypervisor_name in cluster.enabled_hypervisors]),
5397
      "os_hvp": os_hvp,
5398
      "beparams": cluster.beparams,
5399
      "osparams": cluster.osparams,
5400
      "nicparams": cluster.nicparams,
5401
      "ndparams": cluster.ndparams,
5402
      "candidate_pool_size": cluster.candidate_pool_size,
5403
      "master_netdev": cluster.master_netdev,
5404
      "volume_group_name": cluster.volume_group_name,
5405
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5406
      "file_storage_dir": cluster.file_storage_dir,
5407
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5408
      "maintain_node_health": cluster.maintain_node_health,
5409
      "ctime": cluster.ctime,
5410
      "mtime": cluster.mtime,
5411
      "uuid": cluster.uuid,
5412
      "tags": list(cluster.GetTags()),
5413
      "uid_pool": cluster.uid_pool,
5414
      "default_iallocator": cluster.default_iallocator,
5415
      "reserved_lvs": cluster.reserved_lvs,
5416
      "primary_ip_version": primary_ip_version,
5417
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5418
      "hidden_os": cluster.hidden_os,
5419
      "blacklisted_os": cluster.blacklisted_os,
5420
      }
5421

    
5422
    return result
5423

    
5424

    
5425
class LUClusterConfigQuery(NoHooksLU):
5426
  """Return configuration values.
5427

5428
  """
5429
  REQ_BGL = False
5430
  _FIELDS_DYNAMIC = utils.FieldSet()
5431
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5432
                                  "watcher_pause", "volume_group_name")
5433

    
5434
  def CheckArguments(self):
5435
    _CheckOutputFields(static=self._FIELDS_STATIC,
5436
                       dynamic=self._FIELDS_DYNAMIC,
5437
                       selected=self.op.output_fields)
5438

    
5439
  def ExpandNames(self):
5440
    self.needed_locks = {}
5441

    
5442
  def Exec(self, feedback_fn):
5443
    """Dump a representation of the cluster config to the standard output.
5444

5445
    """
5446
    values = []
5447
    for field in self.op.output_fields:
5448
      if field == "cluster_name":
5449
        entry = self.cfg.GetClusterName()
5450
      elif field == "master_node":
5451
        entry = self.cfg.GetMasterNode()
5452
      elif field == "drain_flag":
5453
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5454
      elif field == "watcher_pause":
5455
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5456
      elif field == "volume_group_name":
5457
        entry = self.cfg.GetVGName()
5458
      else:
5459
        raise errors.ParameterError(field)
5460
      values.append(entry)
5461
    return values
5462

    
5463

    
5464
class LUInstanceActivateDisks(NoHooksLU):
5465
  """Bring up an instance's disks.
5466

5467
  """
5468
  REQ_BGL = False
5469

    
5470
  def ExpandNames(self):
5471
    self._ExpandAndLockInstance()
5472
    self.needed_locks[locking.LEVEL_NODE] = []
5473
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5474

    
5475
  def DeclareLocks(self, level):
5476
    if level == locking.LEVEL_NODE:
5477
      self._LockInstancesNodes()
5478

    
5479
  def CheckPrereq(self):
5480
    """Check prerequisites.
5481

5482
    This checks that the instance is in the cluster.
5483

5484
    """
5485
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5486
    assert self.instance is not None, \
5487
      "Cannot retrieve locked instance %s" % self.op.instance_name
5488
    _CheckNodeOnline(self, self.instance.primary_node)
5489

    
5490
  def Exec(self, feedback_fn):
5491
    """Activate the disks.
5492

5493
    """
5494
    disks_ok, disks_info = \
5495
              _AssembleInstanceDisks(self, self.instance,
5496
                                     ignore_size=self.op.ignore_size)
5497
    if not disks_ok:
5498
      raise errors.OpExecError("Cannot activate block devices")
5499

    
5500
    return disks_info
5501

    
5502

    
5503
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5504
                           ignore_size=False):
5505
  """Prepare the block devices for an instance.
5506

5507
  This sets up the block devices on all nodes.
5508

5509
  @type lu: L{LogicalUnit}
5510
  @param lu: the logical unit on whose behalf we execute
5511
  @type instance: L{objects.Instance}
5512
  @param instance: the instance for whose disks we assemble
5513
  @type disks: list of L{objects.Disk} or None
5514
  @param disks: which disks to assemble (or all, if None)
5515
  @type ignore_secondaries: boolean
5516
  @param ignore_secondaries: if true, errors on secondary nodes
5517
      won't result in an error return from the function
5518
  @type ignore_size: boolean
5519
  @param ignore_size: if true, the current known size of the disk
5520
      will not be used during the disk activation, useful for cases
5521
      when the size is wrong
5522
  @return: False if the operation failed, otherwise a list of
5523
      (host, instance_visible_name, node_visible_name)
5524
      with the mapping from node devices to instance devices
5525

5526
  """
5527
  device_info = []
5528
  disks_ok = True
5529
  iname = instance.name
5530
  disks = _ExpandCheckDisks(instance, disks)
5531

    
5532
  # With the two passes mechanism we try to reduce the window of
5533
  # opportunity for the race condition of switching DRBD to primary
5534
  # before handshaking occured, but we do not eliminate it
5535

    
5536
  # The proper fix would be to wait (with some limits) until the
5537
  # connection has been made and drbd transitions from WFConnection
5538
  # into any other network-connected state (Connected, SyncTarget,
5539
  # SyncSource, etc.)
5540

    
5541
  # 1st pass, assemble on all nodes in secondary mode
5542
  for idx, inst_disk in enumerate(disks):
5543
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5544
      if ignore_size:
5545
        node_disk = node_disk.Copy()
5546
        node_disk.UnsetSize()
5547
      lu.cfg.SetDiskID(node_disk, node)
5548
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5549
      msg = result.fail_msg
5550
      if msg:
5551
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5552
                           " (is_primary=False, pass=1): %s",
5553
                           inst_disk.iv_name, node, msg)
5554
        if not ignore_secondaries:
5555
          disks_ok = False
5556

    
5557
  # FIXME: race condition on drbd migration to primary
5558

    
5559
  # 2nd pass, do only the primary node
5560
  for idx, inst_disk in enumerate(disks):
5561
    dev_path = None
5562

    
5563
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5564
      if node != instance.primary_node:
5565
        continue
5566
      if ignore_size:
5567
        node_disk = node_disk.Copy()
5568
        node_disk.UnsetSize()
5569
      lu.cfg.SetDiskID(node_disk, node)
5570
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5571
      msg = result.fail_msg
5572
      if msg:
5573
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5574
                           " (is_primary=True, pass=2): %s",
5575
                           inst_disk.iv_name, node, msg)
5576
        disks_ok = False
5577
      else:
5578
        dev_path = result.payload
5579

    
5580
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5581

    
5582
  # leave the disks configured for the primary node
5583
  # this is a workaround that would be fixed better by
5584
  # improving the logical/physical id handling
5585
  for disk in disks:
5586
    lu.cfg.SetDiskID(disk, instance.primary_node)
5587

    
5588
  return disks_ok, device_info
5589

    
5590

    
5591
def _StartInstanceDisks(lu, instance, force):
5592
  """Start the disks of an instance.
5593

5594
  """
5595
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5596
                                           ignore_secondaries=force)
5597
  if not disks_ok:
5598
    _ShutdownInstanceDisks(lu, instance)
5599
    if force is not None and not force:
5600
      lu.proc.LogWarning("", hint="If the message above refers to a"
5601
                         " secondary node,"
5602
                         " you can retry the operation using '--force'.")
5603
    raise errors.OpExecError("Disk consistency error")
5604

    
5605

    
5606
class LUInstanceDeactivateDisks(NoHooksLU):
5607
  """Shutdown an instance's disks.
5608

5609
  """
5610
  REQ_BGL = False
5611

    
5612
  def ExpandNames(self):
5613
    self._ExpandAndLockInstance()
5614
    self.needed_locks[locking.LEVEL_NODE] = []
5615
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5616

    
5617
  def DeclareLocks(self, level):
5618
    if level == locking.LEVEL_NODE:
5619
      self._LockInstancesNodes()
5620

    
5621
  def CheckPrereq(self):
5622
    """Check prerequisites.
5623

5624
    This checks that the instance is in the cluster.
5625

5626
    """
5627
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5628
    assert self.instance is not None, \
5629
      "Cannot retrieve locked instance %s" % self.op.instance_name
5630

    
5631
  def Exec(self, feedback_fn):
5632
    """Deactivate the disks
5633

5634
    """
5635
    instance = self.instance
5636
    if self.op.force:
5637
      _ShutdownInstanceDisks(self, instance)
5638
    else:
5639
      _SafeShutdownInstanceDisks(self, instance)
5640

    
5641

    
5642
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5643
  """Shutdown block devices of an instance.
5644

5645
  This function checks if an instance is running, before calling
5646
  _ShutdownInstanceDisks.
5647

5648
  """
5649
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5650
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5651

    
5652

    
5653
def _ExpandCheckDisks(instance, disks):
5654
  """Return the instance disks selected by the disks list
5655

5656
  @type disks: list of L{objects.Disk} or None
5657
  @param disks: selected disks
5658
  @rtype: list of L{objects.Disk}
5659
  @return: selected instance disks to act on
5660

5661
  """
5662
  if disks is None:
5663
    return instance.disks
5664
  else:
5665
    if not set(disks).issubset(instance.disks):
5666
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5667
                                   " target instance")
5668
    return disks
5669

    
5670

    
5671
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5672
  """Shutdown block devices of an instance.
5673

5674
  This does the shutdown on all nodes of the instance.
5675

5676
  If the ignore_primary is false, errors on the primary node are
5677
  ignored.
5678

5679
  """
5680
  all_result = True
5681
  disks = _ExpandCheckDisks(instance, disks)
5682

    
5683
  for disk in disks:
5684
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5685
      lu.cfg.SetDiskID(top_disk, node)
5686
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5687
      msg = result.fail_msg
5688
      if msg:
5689
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5690
                      disk.iv_name, node, msg)
5691
        if ((node == instance.primary_node and not ignore_primary) or
5692
            (node != instance.primary_node and not result.offline)):
5693
          all_result = False
5694
  return all_result
5695

    
5696

    
5697
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5698
  """Checks if a node has enough free memory.
5699

5700
  This function check if a given node has the needed amount of free
5701
  memory. In case the node has less memory or we cannot get the
5702
  information from the node, this function raise an OpPrereqError
5703
  exception.
5704

5705
  @type lu: C{LogicalUnit}
5706
  @param lu: a logical unit from which we get configuration data
5707
  @type node: C{str}
5708
  @param node: the node to check
5709
  @type reason: C{str}
5710
  @param reason: string to use in the error message
5711
  @type requested: C{int}
5712
  @param requested: the amount of memory in MiB to check for
5713
  @type hypervisor_name: C{str}
5714
  @param hypervisor_name: the hypervisor to ask for memory stats
5715
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5716
      we cannot check the node
5717

5718
  """
5719
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5720
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5721
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5722
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5723
  if not isinstance(free_mem, int):
5724
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5725
                               " was '%s'" % (node, free_mem),
5726
                               errors.ECODE_ENVIRON)
5727
  if requested > free_mem:
5728
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5729
                               " needed %s MiB, available %s MiB" %
5730
                               (node, reason, requested, free_mem),
5731
                               errors.ECODE_NORES)
5732

    
5733

    
5734
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5735
  """Checks if nodes have enough free disk space in the all VGs.
5736

5737
  This function check if all given nodes have the needed amount of
5738
  free disk. In case any node has less disk or we cannot get the
5739
  information from the node, this function raise an OpPrereqError
5740
  exception.
5741

5742
  @type lu: C{LogicalUnit}
5743
  @param lu: a logical unit from which we get configuration data
5744
  @type nodenames: C{list}
5745
  @param nodenames: the list of node names to check
5746
  @type req_sizes: C{dict}
5747
  @param req_sizes: the hash of vg and corresponding amount of disk in
5748
      MiB to check for
5749
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5750
      or we cannot check the node
5751

5752
  """
5753
  for vg, req_size in req_sizes.items():
5754
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5755

    
5756

    
5757
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5758
  """Checks if nodes have enough free disk space in the specified VG.
5759

5760
  This function check if all given nodes have the needed amount of
5761
  free disk. In case any node has less disk or we cannot get the
5762
  information from the node, this function raise an OpPrereqError
5763
  exception.
5764

5765
  @type lu: C{LogicalUnit}
5766
  @param lu: a logical unit from which we get configuration data
5767
  @type nodenames: C{list}
5768
  @param nodenames: the list of node names to check
5769
  @type vg: C{str}
5770
  @param vg: the volume group to check
5771
  @type requested: C{int}
5772
  @param requested: the amount of disk in MiB to check for
5773
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5774
      or we cannot check the node
5775

5776
  """
5777
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5778
  for node in nodenames:
5779
    info = nodeinfo[node]
5780
    info.Raise("Cannot get current information from node %s" % node,
5781
               prereq=True, ecode=errors.ECODE_ENVIRON)
5782
    vg_free = info.payload.get("vg_free", None)
5783
    if not isinstance(vg_free, int):
5784
      raise errors.OpPrereqError("Can't compute free disk space on node"
5785
                                 " %s for vg %s, result was '%s'" %
5786
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5787
    if requested > vg_free:
5788
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5789
                                 " vg %s: required %d MiB, available %d MiB" %
5790
                                 (node, vg, requested, vg_free),
5791
                                 errors.ECODE_NORES)
5792

    
5793

    
5794
class LUInstanceStartup(LogicalUnit):
5795
  """Starts an instance.
5796

5797
  """
5798
  HPATH = "instance-start"
5799
  HTYPE = constants.HTYPE_INSTANCE
5800
  REQ_BGL = False
5801

    
5802
  def CheckArguments(self):
5803
    # extra beparams
5804
    if self.op.beparams:
5805
      # fill the beparams dict
5806
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5807

    
5808
  def ExpandNames(self):
5809
    self._ExpandAndLockInstance()
5810

    
5811
  def BuildHooksEnv(self):
5812
    """Build hooks env.
5813

5814
    This runs on master, primary and secondary nodes of the instance.
5815

5816
    """
5817
    env = {
5818
      "FORCE": self.op.force,
5819
      }
5820

    
5821
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5822

    
5823
    return env
5824

    
5825
  def BuildHooksNodes(self):
5826
    """Build hooks nodes.
5827

5828
    """
5829
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5830
    return (nl, nl)
5831

    
5832
  def CheckPrereq(self):
5833
    """Check prerequisites.
5834

5835
    This checks that the instance is in the cluster.
5836

5837
    """
5838
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5839
    assert self.instance is not None, \
5840
      "Cannot retrieve locked instance %s" % self.op.instance_name
5841

    
5842
    # extra hvparams
5843
    if self.op.hvparams:
5844
      # check hypervisor parameter syntax (locally)
5845
      cluster = self.cfg.GetClusterInfo()
5846
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5847
      filled_hvp = cluster.FillHV(instance)
5848
      filled_hvp.update(self.op.hvparams)
5849
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5850
      hv_type.CheckParameterSyntax(filled_hvp)
5851
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5852

    
5853
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5854

    
5855
    if self.primary_offline and self.op.ignore_offline_nodes:
5856
      self.proc.LogWarning("Ignoring offline primary node")
5857

    
5858
      if self.op.hvparams or self.op.beparams:
5859
        self.proc.LogWarning("Overridden parameters are ignored")
5860
    else:
5861
      _CheckNodeOnline(self, instance.primary_node)
5862

    
5863
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5864

    
5865
      # check bridges existence
5866
      _CheckInstanceBridgesExist(self, instance)
5867

    
5868
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5869
                                                instance.name,
5870
                                                instance.hypervisor)
5871
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5872
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5873
      if not remote_info.payload: # not running already
5874
        _CheckNodeFreeMemory(self, instance.primary_node,
5875
                             "starting instance %s" % instance.name,
5876
                             bep[constants.BE_MEMORY], instance.hypervisor)
5877

    
5878
  def Exec(self, feedback_fn):
5879
    """Start the instance.
5880

5881
    """
5882
    instance = self.instance
5883
    force = self.op.force
5884

    
5885
    if not self.op.no_remember:
5886
      self.cfg.MarkInstanceUp(instance.name)
5887

    
5888
    if self.primary_offline:
5889
      assert self.op.ignore_offline_nodes
5890
      self.proc.LogInfo("Primary node offline, marked instance as started")
5891
    else:
5892
      node_current = instance.primary_node
5893

    
5894
      _StartInstanceDisks(self, instance, force)
5895

    
5896
      result = self.rpc.call_instance_start(node_current, instance,
5897
                                            self.op.hvparams, self.op.beparams,
5898
                                            self.op.startup_paused)
5899
      msg = result.fail_msg
5900
      if msg:
5901
        _ShutdownInstanceDisks(self, instance)
5902
        raise errors.OpExecError("Could not start instance: %s" % msg)
5903

    
5904

    
5905
class LUInstanceReboot(LogicalUnit):
5906
  """Reboot an instance.
5907

5908
  """
5909
  HPATH = "instance-reboot"
5910
  HTYPE = constants.HTYPE_INSTANCE
5911
  REQ_BGL = False
5912

    
5913
  def ExpandNames(self):
5914
    self._ExpandAndLockInstance()
5915

    
5916
  def BuildHooksEnv(self):
5917
    """Build hooks env.
5918

5919
    This runs on master, primary and secondary nodes of the instance.
5920

5921
    """
5922
    env = {
5923
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5924
      "REBOOT_TYPE": self.op.reboot_type,
5925
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5926
      }
5927

    
5928
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5929

    
5930
    return env
5931

    
5932
  def BuildHooksNodes(self):
5933
    """Build hooks nodes.
5934

5935
    """
5936
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5937
    return (nl, nl)
5938

    
5939
  def CheckPrereq(self):
5940
    """Check prerequisites.
5941

5942
    This checks that the instance is in the cluster.
5943

5944
    """
5945
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5946
    assert self.instance is not None, \
5947
      "Cannot retrieve locked instance %s" % self.op.instance_name
5948

    
5949
    _CheckNodeOnline(self, instance.primary_node)
5950

    
5951
    # check bridges existence
5952
    _CheckInstanceBridgesExist(self, instance)
5953

    
5954
  def Exec(self, feedback_fn):
5955
    """Reboot the instance.
5956

5957
    """
5958
    instance = self.instance
5959
    ignore_secondaries = self.op.ignore_secondaries
5960
    reboot_type = self.op.reboot_type
5961

    
5962
    remote_info = self.rpc.call_instance_info(instance.primary_node,
5963
                                              instance.name,
5964
                                              instance.hypervisor)
5965
    remote_info.Raise("Error checking node %s" % instance.primary_node)
5966
    instance_running = bool(remote_info.payload)
5967

    
5968
    node_current = instance.primary_node
5969

    
5970
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5971
                                            constants.INSTANCE_REBOOT_HARD]:
5972
      for disk in instance.disks:
5973
        self.cfg.SetDiskID(disk, node_current)
5974
      result = self.rpc.call_instance_reboot(node_current, instance,
5975
                                             reboot_type,
5976
                                             self.op.shutdown_timeout)
5977
      result.Raise("Could not reboot instance")
5978
    else:
5979
      if instance_running:
5980
        result = self.rpc.call_instance_shutdown(node_current, instance,
5981
                                                 self.op.shutdown_timeout)
5982
        result.Raise("Could not shutdown instance for full reboot")
5983
        _ShutdownInstanceDisks(self, instance)
5984
      else:
5985
        self.LogInfo("Instance %s was already stopped, starting now",
5986
                     instance.name)
5987
      _StartInstanceDisks(self, instance, ignore_secondaries)
5988
      result = self.rpc.call_instance_start(node_current, instance,
5989
                                            None, None, False)
5990
      msg = result.fail_msg
5991
      if msg:
5992
        _ShutdownInstanceDisks(self, instance)
5993
        raise errors.OpExecError("Could not start instance for"
5994
                                 " full reboot: %s" % msg)
5995

    
5996
    self.cfg.MarkInstanceUp(instance.name)
5997

    
5998

    
5999
class LUInstanceShutdown(LogicalUnit):
6000
  """Shutdown an instance.
6001

6002
  """
6003
  HPATH = "instance-stop"
6004
  HTYPE = constants.HTYPE_INSTANCE
6005
  REQ_BGL = False
6006

    
6007
  def ExpandNames(self):
6008
    self._ExpandAndLockInstance()
6009

    
6010
  def BuildHooksEnv(self):
6011
    """Build hooks env.
6012

6013
    This runs on master, primary and secondary nodes of the instance.
6014

6015
    """
6016
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6017
    env["TIMEOUT"] = self.op.timeout
6018
    return env
6019

    
6020
  def BuildHooksNodes(self):
6021
    """Build hooks nodes.
6022

6023
    """
6024
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6025
    return (nl, nl)
6026

    
6027
  def CheckPrereq(self):
6028
    """Check prerequisites.
6029

6030
    This checks that the instance is in the cluster.
6031

6032
    """
6033
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6034
    assert self.instance is not None, \
6035
      "Cannot retrieve locked instance %s" % self.op.instance_name
6036

    
6037
    self.primary_offline = \
6038
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6039

    
6040
    if self.primary_offline and self.op.ignore_offline_nodes:
6041
      self.proc.LogWarning("Ignoring offline primary node")
6042
    else:
6043
      _CheckNodeOnline(self, self.instance.primary_node)
6044

    
6045
  def Exec(self, feedback_fn):
6046
    """Shutdown the instance.
6047

6048
    """
6049
    instance = self.instance
6050
    node_current = instance.primary_node
6051
    timeout = self.op.timeout
6052

    
6053
    if not self.op.no_remember:
6054
      self.cfg.MarkInstanceDown(instance.name)
6055

    
6056
    if self.primary_offline:
6057
      assert self.op.ignore_offline_nodes
6058
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6059
    else:
6060
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6061
      msg = result.fail_msg
6062
      if msg:
6063
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6064

    
6065
      _ShutdownInstanceDisks(self, instance)
6066

    
6067

    
6068
class LUInstanceReinstall(LogicalUnit):
6069
  """Reinstall an instance.
6070

6071
  """
6072
  HPATH = "instance-reinstall"
6073
  HTYPE = constants.HTYPE_INSTANCE
6074
  REQ_BGL = False
6075

    
6076
  def ExpandNames(self):
6077
    self._ExpandAndLockInstance()
6078

    
6079
  def BuildHooksEnv(self):
6080
    """Build hooks env.
6081

6082
    This runs on master, primary and secondary nodes of the instance.
6083

6084
    """
6085
    return _BuildInstanceHookEnvByObject(self, self.instance)
6086

    
6087
  def BuildHooksNodes(self):
6088
    """Build hooks nodes.
6089

6090
    """
6091
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6092
    return (nl, nl)
6093

    
6094
  def CheckPrereq(self):
6095
    """Check prerequisites.
6096

6097
    This checks that the instance is in the cluster and is not running.
6098

6099
    """
6100
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6101
    assert instance is not None, \
6102
      "Cannot retrieve locked instance %s" % self.op.instance_name
6103
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6104
                     " offline, cannot reinstall")
6105
    for node in instance.secondary_nodes:
6106
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6107
                       " cannot reinstall")
6108

    
6109
    if instance.disk_template == constants.DT_DISKLESS:
6110
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6111
                                 self.op.instance_name,
6112
                                 errors.ECODE_INVAL)
6113
    _CheckInstanceDown(self, instance, "cannot reinstall")
6114

    
6115
    if self.op.os_type is not None:
6116
      # OS verification
6117
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6118
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6119
      instance_os = self.op.os_type
6120
    else:
6121
      instance_os = instance.os
6122

    
6123
    nodelist = list(instance.all_nodes)
6124

    
6125
    if self.op.osparams:
6126
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6127
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6128
      self.os_inst = i_osdict # the new dict (without defaults)
6129
    else:
6130
      self.os_inst = None
6131

    
6132
    self.instance = instance
6133

    
6134
  def Exec(self, feedback_fn):
6135
    """Reinstall the instance.
6136

6137
    """
6138
    inst = self.instance
6139

    
6140
    if self.op.os_type is not None:
6141
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6142
      inst.os = self.op.os_type
6143
      # Write to configuration
6144
      self.cfg.Update(inst, feedback_fn)
6145

    
6146
    _StartInstanceDisks(self, inst, None)
6147
    try:
6148
      feedback_fn("Running the instance OS create scripts...")
6149
      # FIXME: pass debug option from opcode to backend
6150
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6151
                                             self.op.debug_level,
6152
                                             osparams=self.os_inst)
6153
      result.Raise("Could not install OS for instance %s on node %s" %
6154
                   (inst.name, inst.primary_node))
6155
    finally:
6156
      _ShutdownInstanceDisks(self, inst)
6157

    
6158

    
6159
class LUInstanceRecreateDisks(LogicalUnit):
6160
  """Recreate an instance's missing disks.
6161

6162
  """
6163
  HPATH = "instance-recreate-disks"
6164
  HTYPE = constants.HTYPE_INSTANCE
6165
  REQ_BGL = False
6166

    
6167
  def CheckArguments(self):
6168
    # normalise the disk list
6169
    self.op.disks = sorted(frozenset(self.op.disks))
6170

    
6171
  def ExpandNames(self):
6172
    self._ExpandAndLockInstance()
6173
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6174
    if self.op.nodes:
6175
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6176
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6177
    else:
6178
      self.needed_locks[locking.LEVEL_NODE] = []
6179

    
6180
  def DeclareLocks(self, level):
6181
    if level == locking.LEVEL_NODE:
6182
      # if we replace the nodes, we only need to lock the old primary,
6183
      # otherwise we need to lock all nodes for disk re-creation
6184
      primary_only = bool(self.op.nodes)
6185
      self._LockInstancesNodes(primary_only=primary_only)
6186

    
6187
  def BuildHooksEnv(self):
6188
    """Build hooks env.
6189

6190
    This runs on master, primary and secondary nodes of the instance.
6191

6192
    """
6193
    return _BuildInstanceHookEnvByObject(self, self.instance)
6194

    
6195
  def BuildHooksNodes(self):
6196
    """Build hooks nodes.
6197

6198
    """
6199
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6200
    return (nl, nl)
6201

    
6202
  def CheckPrereq(self):
6203
    """Check prerequisites.
6204

6205
    This checks that the instance is in the cluster and is not running.
6206

6207
    """
6208
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6209
    assert instance is not None, \
6210
      "Cannot retrieve locked instance %s" % self.op.instance_name
6211
    if self.op.nodes:
6212
      if len(self.op.nodes) != len(instance.all_nodes):
6213
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6214
                                   " %d replacement nodes were specified" %
6215
                                   (instance.name, len(instance.all_nodes),
6216
                                    len(self.op.nodes)),
6217
                                   errors.ECODE_INVAL)
6218
      assert instance.disk_template != constants.DT_DRBD8 or \
6219
          len(self.op.nodes) == 2
6220
      assert instance.disk_template != constants.DT_PLAIN or \
6221
          len(self.op.nodes) == 1
6222
      primary_node = self.op.nodes[0]
6223
    else:
6224
      primary_node = instance.primary_node
6225
    _CheckNodeOnline(self, primary_node)
6226

    
6227
    if instance.disk_template == constants.DT_DISKLESS:
6228
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6229
                                 self.op.instance_name, errors.ECODE_INVAL)
6230
    # if we replace nodes *and* the old primary is offline, we don't
6231
    # check
6232
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6233
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6234
    if not (self.op.nodes and old_pnode.offline):
6235
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6236

    
6237
    if not self.op.disks:
6238
      self.op.disks = range(len(instance.disks))
6239
    else:
6240
      for idx in self.op.disks:
6241
        if idx >= len(instance.disks):
6242
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6243
                                     errors.ECODE_INVAL)
6244
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6245
      raise errors.OpPrereqError("Can't recreate disks partially and"
6246
                                 " change the nodes at the same time",
6247
                                 errors.ECODE_INVAL)
6248
    self.instance = instance
6249

    
6250
  def Exec(self, feedback_fn):
6251
    """Recreate the disks.
6252

6253
    """
6254
    instance = self.instance
6255

    
6256
    to_skip = []
6257
    mods = [] # keeps track of needed logical_id changes
6258

    
6259
    for idx, disk in enumerate(instance.disks):
6260
      if idx not in self.op.disks: # disk idx has not been passed in
6261
        to_skip.append(idx)
6262
        continue
6263
      # update secondaries for disks, if needed
6264
      if self.op.nodes:
6265
        if disk.dev_type == constants.LD_DRBD8:
6266
          # need to update the nodes and minors
6267
          assert len(self.op.nodes) == 2
6268
          assert len(disk.logical_id) == 6 # otherwise disk internals
6269
                                           # have changed
6270
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6271
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6272
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6273
                    new_minors[0], new_minors[1], old_secret)
6274
          assert len(disk.logical_id) == len(new_id)
6275
          mods.append((idx, new_id))
6276

    
6277
    # now that we have passed all asserts above, we can apply the mods
6278
    # in a single run (to avoid partial changes)
6279
    for idx, new_id in mods:
6280
      instance.disks[idx].logical_id = new_id
6281

    
6282
    # change primary node, if needed
6283
    if self.op.nodes:
6284
      instance.primary_node = self.op.nodes[0]
6285
      self.LogWarning("Changing the instance's nodes, you will have to"
6286
                      " remove any disks left on the older nodes manually")
6287

    
6288
    if self.op.nodes:
6289
      self.cfg.Update(instance, feedback_fn)
6290

    
6291
    _CreateDisks(self, instance, to_skip=to_skip)
6292

    
6293

    
6294
class LUInstanceRename(LogicalUnit):
6295
  """Rename an instance.
6296

6297
  """
6298
  HPATH = "instance-rename"
6299
  HTYPE = constants.HTYPE_INSTANCE
6300

    
6301
  def CheckArguments(self):
6302
    """Check arguments.
6303

6304
    """
6305
    if self.op.ip_check and not self.op.name_check:
6306
      # TODO: make the ip check more flexible and not depend on the name check
6307
      raise errors.OpPrereqError("IP address check requires a name check",
6308
                                 errors.ECODE_INVAL)
6309

    
6310
  def BuildHooksEnv(self):
6311
    """Build hooks env.
6312

6313
    This runs on master, primary and secondary nodes of the instance.
6314

6315
    """
6316
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6317
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6318
    return env
6319

    
6320
  def BuildHooksNodes(self):
6321
    """Build hooks nodes.
6322

6323
    """
6324
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6325
    return (nl, nl)
6326

    
6327
  def CheckPrereq(self):
6328
    """Check prerequisites.
6329

6330
    This checks that the instance is in the cluster and is not running.
6331

6332
    """
6333
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6334
                                                self.op.instance_name)
6335
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6336
    assert instance is not None
6337
    _CheckNodeOnline(self, instance.primary_node)
6338
    _CheckInstanceDown(self, instance, "cannot rename")
6339
    self.instance = instance
6340

    
6341
    new_name = self.op.new_name
6342
    if self.op.name_check:
6343
      hostname = netutils.GetHostname(name=new_name)
6344
      if hostname != new_name:
6345
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6346
                     hostname.name)
6347
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6348
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6349
                                    " same as given hostname '%s'") %
6350
                                    (hostname.name, self.op.new_name),
6351
                                    errors.ECODE_INVAL)
6352
      new_name = self.op.new_name = hostname.name
6353
      if (self.op.ip_check and
6354
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6355
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6356
                                   (hostname.ip, new_name),
6357
                                   errors.ECODE_NOTUNIQUE)
6358

    
6359
    instance_list = self.cfg.GetInstanceList()
6360
    if new_name in instance_list and new_name != instance.name:
6361
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6362
                                 new_name, errors.ECODE_EXISTS)
6363

    
6364
  def Exec(self, feedback_fn):
6365
    """Rename the instance.
6366

6367
    """
6368
    inst = self.instance
6369
    old_name = inst.name
6370

    
6371
    rename_file_storage = False
6372
    if (inst.disk_template in constants.DTS_FILEBASED and
6373
        self.op.new_name != inst.name):
6374
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6375
      rename_file_storage = True
6376

    
6377
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6378
    # Change the instance lock. This is definitely safe while we hold the BGL.
6379
    # Otherwise the new lock would have to be added in acquired mode.
6380
    assert self.REQ_BGL
6381
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6382
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6383

    
6384
    # re-read the instance from the configuration after rename
6385
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6386

    
6387
    if rename_file_storage:
6388
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6389
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6390
                                                     old_file_storage_dir,
6391
                                                     new_file_storage_dir)
6392
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6393
                   " (but the instance has been renamed in Ganeti)" %
6394
                   (inst.primary_node, old_file_storage_dir,
6395
                    new_file_storage_dir))
6396

    
6397
    _StartInstanceDisks(self, inst, None)
6398
    try:
6399
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6400
                                                 old_name, self.op.debug_level)
6401
      msg = result.fail_msg
6402
      if msg:
6403
        msg = ("Could not run OS rename script for instance %s on node %s"
6404
               " (but the instance has been renamed in Ganeti): %s" %
6405
               (inst.name, inst.primary_node, msg))
6406
        self.proc.LogWarning(msg)
6407
    finally:
6408
      _ShutdownInstanceDisks(self, inst)
6409

    
6410
    return inst.name
6411

    
6412

    
6413
class LUInstanceRemove(LogicalUnit):
6414
  """Remove an instance.
6415

6416
  """
6417
  HPATH = "instance-remove"
6418
  HTYPE = constants.HTYPE_INSTANCE
6419
  REQ_BGL = False
6420

    
6421
  def ExpandNames(self):
6422
    self._ExpandAndLockInstance()
6423
    self.needed_locks[locking.LEVEL_NODE] = []
6424
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6425

    
6426
  def DeclareLocks(self, level):
6427
    if level == locking.LEVEL_NODE:
6428
      self._LockInstancesNodes()
6429

    
6430
  def BuildHooksEnv(self):
6431
    """Build hooks env.
6432

6433
    This runs on master, primary and secondary nodes of the instance.
6434

6435
    """
6436
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6437
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6438
    return env
6439

    
6440
  def BuildHooksNodes(self):
6441
    """Build hooks nodes.
6442

6443
    """
6444
    nl = [self.cfg.GetMasterNode()]
6445
    nl_post = list(self.instance.all_nodes) + nl
6446
    return (nl, nl_post)
6447

    
6448
  def CheckPrereq(self):
6449
    """Check prerequisites.
6450

6451
    This checks that the instance is in the cluster.
6452

6453
    """
6454
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6455
    assert self.instance is not None, \
6456
      "Cannot retrieve locked instance %s" % self.op.instance_name
6457

    
6458
  def Exec(self, feedback_fn):
6459
    """Remove the instance.
6460

6461
    """
6462
    instance = self.instance
6463
    logging.info("Shutting down instance %s on node %s",
6464
                 instance.name, instance.primary_node)
6465

    
6466
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6467
                                             self.op.shutdown_timeout)
6468
    msg = result.fail_msg
6469
    if msg:
6470
      if self.op.ignore_failures:
6471
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6472
      else:
6473
        raise errors.OpExecError("Could not shutdown instance %s on"
6474
                                 " node %s: %s" %
6475
                                 (instance.name, instance.primary_node, msg))
6476

    
6477
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6478

    
6479

    
6480
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6481
  """Utility function to remove an instance.
6482

6483
  """
6484
  logging.info("Removing block devices for instance %s", instance.name)
6485

    
6486
  if not _RemoveDisks(lu, instance):
6487
    if not ignore_failures:
6488
      raise errors.OpExecError("Can't remove instance's disks")
6489
    feedback_fn("Warning: can't remove instance's disks")
6490

    
6491
  logging.info("Removing instance %s out of cluster config", instance.name)
6492

    
6493
  lu.cfg.RemoveInstance(instance.name)
6494

    
6495
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6496
    "Instance lock removal conflict"
6497

    
6498
  # Remove lock for the instance
6499
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6500

    
6501

    
6502
class LUInstanceQuery(NoHooksLU):
6503
  """Logical unit for querying instances.
6504

6505
  """
6506
  # pylint: disable-msg=W0142
6507
  REQ_BGL = False
6508

    
6509
  def CheckArguments(self):
6510
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6511
                             self.op.output_fields, self.op.use_locking)
6512

    
6513
  def ExpandNames(self):
6514
    self.iq.ExpandNames(self)
6515

    
6516
  def DeclareLocks(self, level):
6517
    self.iq.DeclareLocks(self, level)
6518

    
6519
  def Exec(self, feedback_fn):
6520
    return self.iq.OldStyleQuery(self)
6521

    
6522

    
6523
class LUInstanceFailover(LogicalUnit):
6524
  """Failover an instance.
6525

6526
  """
6527
  HPATH = "instance-failover"
6528
  HTYPE = constants.HTYPE_INSTANCE
6529
  REQ_BGL = False
6530

    
6531
  def CheckArguments(self):
6532
    """Check the arguments.
6533

6534
    """
6535
    self.iallocator = getattr(self.op, "iallocator", None)
6536
    self.target_node = getattr(self.op, "target_node", None)
6537

    
6538
  def ExpandNames(self):
6539
    self._ExpandAndLockInstance()
6540

    
6541
    if self.op.target_node is not None:
6542
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6543

    
6544
    self.needed_locks[locking.LEVEL_NODE] = []
6545
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6546

    
6547
    ignore_consistency = self.op.ignore_consistency
6548
    shutdown_timeout = self.op.shutdown_timeout
6549
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6550
                                       cleanup=False,
6551
                                       failover=True,
6552
                                       ignore_consistency=ignore_consistency,
6553
                                       shutdown_timeout=shutdown_timeout)
6554
    self.tasklets = [self._migrater]
6555

    
6556
  def DeclareLocks(self, level):
6557
    if level == locking.LEVEL_NODE:
6558
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6559
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6560
        if self.op.target_node is None:
6561
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6562
        else:
6563
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6564
                                                   self.op.target_node]
6565
        del self.recalculate_locks[locking.LEVEL_NODE]
6566
      else:
6567
        self._LockInstancesNodes()
6568

    
6569
  def BuildHooksEnv(self):
6570
    """Build hooks env.
6571

6572
    This runs on master, primary and secondary nodes of the instance.
6573

6574
    """
6575
    instance = self._migrater.instance
6576
    source_node = instance.primary_node
6577
    target_node = self.op.target_node
6578
    env = {
6579
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6580
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6581
      "OLD_PRIMARY": source_node,
6582
      "NEW_PRIMARY": target_node,
6583
      }
6584

    
6585
    if instance.disk_template in constants.DTS_INT_MIRROR:
6586
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6587
      env["NEW_SECONDARY"] = source_node
6588
    else:
6589
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6590

    
6591
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6592

    
6593
    return env
6594

    
6595
  def BuildHooksNodes(self):
6596
    """Build hooks nodes.
6597

6598
    """
6599
    instance = self._migrater.instance
6600
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6601
    return (nl, nl + [instance.primary_node])
6602

    
6603

    
6604
class LUInstanceMigrate(LogicalUnit):
6605
  """Migrate an instance.
6606

6607
  This is migration without shutting down, compared to the failover,
6608
  which is done with shutdown.
6609

6610
  """
6611
  HPATH = "instance-migrate"
6612
  HTYPE = constants.HTYPE_INSTANCE
6613
  REQ_BGL = False
6614

    
6615
  def ExpandNames(self):
6616
    self._ExpandAndLockInstance()
6617

    
6618
    if self.op.target_node is not None:
6619
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6620

    
6621
    self.needed_locks[locking.LEVEL_NODE] = []
6622
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6623

    
6624
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6625
                                       cleanup=self.op.cleanup,
6626
                                       failover=False,
6627
                                       fallback=self.op.allow_failover)
6628
    self.tasklets = [self._migrater]
6629

    
6630
  def DeclareLocks(self, level):
6631
    if level == locking.LEVEL_NODE:
6632
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6633
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6634
        if self.op.target_node is None:
6635
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6636
        else:
6637
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6638
                                                   self.op.target_node]
6639
        del self.recalculate_locks[locking.LEVEL_NODE]
6640
      else:
6641
        self._LockInstancesNodes()
6642

    
6643
  def BuildHooksEnv(self):
6644
    """Build hooks env.
6645

6646
    This runs on master, primary and secondary nodes of the instance.
6647

6648
    """
6649
    instance = self._migrater.instance
6650
    source_node = instance.primary_node
6651
    target_node = self.op.target_node
6652
    env = _BuildInstanceHookEnvByObject(self, instance)
6653
    env.update({
6654
      "MIGRATE_LIVE": self._migrater.live,
6655
      "MIGRATE_CLEANUP": self.op.cleanup,
6656
      "OLD_PRIMARY": source_node,
6657
      "NEW_PRIMARY": target_node,
6658
      })
6659

    
6660
    if instance.disk_template in constants.DTS_INT_MIRROR:
6661
      env["OLD_SECONDARY"] = target_node
6662
      env["NEW_SECONDARY"] = source_node
6663
    else:
6664
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6665

    
6666
    return env
6667

    
6668
  def BuildHooksNodes(self):
6669
    """Build hooks nodes.
6670

6671
    """
6672
    instance = self._migrater.instance
6673
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6674
    return (nl, nl + [instance.primary_node])
6675

    
6676

    
6677
class LUInstanceMove(LogicalUnit):
6678
  """Move an instance by data-copying.
6679

6680
  """
6681
  HPATH = "instance-move"
6682
  HTYPE = constants.HTYPE_INSTANCE
6683
  REQ_BGL = False
6684

    
6685
  def ExpandNames(self):
6686
    self._ExpandAndLockInstance()
6687
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6688
    self.op.target_node = target_node
6689
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6690
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6691

    
6692
  def DeclareLocks(self, level):
6693
    if level == locking.LEVEL_NODE:
6694
      self._LockInstancesNodes(primary_only=True)
6695

    
6696
  def BuildHooksEnv(self):
6697
    """Build hooks env.
6698

6699
    This runs on master, primary and secondary nodes of the instance.
6700

6701
    """
6702
    env = {
6703
      "TARGET_NODE": self.op.target_node,
6704
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6705
      }
6706
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6707
    return env
6708

    
6709
  def BuildHooksNodes(self):
6710
    """Build hooks nodes.
6711

6712
    """
6713
    nl = [
6714
      self.cfg.GetMasterNode(),
6715
      self.instance.primary_node,
6716
      self.op.target_node,
6717
      ]
6718
    return (nl, nl)
6719

    
6720
  def CheckPrereq(self):
6721
    """Check prerequisites.
6722

6723
    This checks that the instance is in the cluster.
6724

6725
    """
6726
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6727
    assert self.instance is not None, \
6728
      "Cannot retrieve locked instance %s" % self.op.instance_name
6729

    
6730
    node = self.cfg.GetNodeInfo(self.op.target_node)
6731
    assert node is not None, \
6732
      "Cannot retrieve locked node %s" % self.op.target_node
6733

    
6734
    self.target_node = target_node = node.name
6735

    
6736
    if target_node == instance.primary_node:
6737
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6738
                                 (instance.name, target_node),
6739
                                 errors.ECODE_STATE)
6740

    
6741
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6742

    
6743
    for idx, dsk in enumerate(instance.disks):
6744
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6745
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6746
                                   " cannot copy" % idx, errors.ECODE_STATE)
6747

    
6748
    _CheckNodeOnline(self, target_node)
6749
    _CheckNodeNotDrained(self, target_node)
6750
    _CheckNodeVmCapable(self, target_node)
6751

    
6752
    if instance.admin_up:
6753
      # check memory requirements on the secondary node
6754
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6755
                           instance.name, bep[constants.BE_MEMORY],
6756
                           instance.hypervisor)
6757
    else:
6758
      self.LogInfo("Not checking memory on the secondary node as"
6759
                   " instance will not be started")
6760

    
6761
    # check bridge existance
6762
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6763

    
6764
  def Exec(self, feedback_fn):
6765
    """Move an instance.
6766

6767
    The move is done by shutting it down on its present node, copying
6768
    the data over (slow) and starting it on the new node.
6769

6770
    """
6771
    instance = self.instance
6772

    
6773
    source_node = instance.primary_node
6774
    target_node = self.target_node
6775

    
6776
    self.LogInfo("Shutting down instance %s on source node %s",
6777
                 instance.name, source_node)
6778

    
6779
    result = self.rpc.call_instance_shutdown(source_node, instance,
6780
                                             self.op.shutdown_timeout)
6781
    msg = result.fail_msg
6782
    if msg:
6783
      if self.op.ignore_consistency:
6784
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6785
                             " Proceeding anyway. Please make sure node"
6786
                             " %s is down. Error details: %s",
6787
                             instance.name, source_node, source_node, msg)
6788
      else:
6789
        raise errors.OpExecError("Could not shutdown instance %s on"
6790
                                 " node %s: %s" %
6791
                                 (instance.name, source_node, msg))
6792

    
6793
    # create the target disks
6794
    try:
6795
      _CreateDisks(self, instance, target_node=target_node)
6796
    except errors.OpExecError:
6797
      self.LogWarning("Device creation failed, reverting...")
6798
      try:
6799
        _RemoveDisks(self, instance, target_node=target_node)
6800
      finally:
6801
        self.cfg.ReleaseDRBDMinors(instance.name)
6802
        raise
6803

    
6804
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6805

    
6806
    errs = []
6807
    # activate, get path, copy the data over
6808
    for idx, disk in enumerate(instance.disks):
6809
      self.LogInfo("Copying data for disk %d", idx)
6810
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6811
                                               instance.name, True, idx)
6812
      if result.fail_msg:
6813
        self.LogWarning("Can't assemble newly created disk %d: %s",
6814
                        idx, result.fail_msg)
6815
        errs.append(result.fail_msg)
6816
        break
6817
      dev_path = result.payload
6818
      result = self.rpc.call_blockdev_export(source_node, disk,
6819
                                             target_node, dev_path,
6820
                                             cluster_name)
6821
      if result.fail_msg:
6822
        self.LogWarning("Can't copy data over for disk %d: %s",
6823
                        idx, result.fail_msg)
6824
        errs.append(result.fail_msg)
6825
        break
6826

    
6827
    if errs:
6828
      self.LogWarning("Some disks failed to copy, aborting")
6829
      try:
6830
        _RemoveDisks(self, instance, target_node=target_node)
6831
      finally:
6832
        self.cfg.ReleaseDRBDMinors(instance.name)
6833
        raise errors.OpExecError("Errors during disk copy: %s" %
6834
                                 (",".join(errs),))
6835

    
6836
    instance.primary_node = target_node
6837
    self.cfg.Update(instance, feedback_fn)
6838

    
6839
    self.LogInfo("Removing the disks on the original node")
6840
    _RemoveDisks(self, instance, target_node=source_node)
6841

    
6842
    # Only start the instance if it's marked as up
6843
    if instance.admin_up:
6844
      self.LogInfo("Starting instance %s on node %s",
6845
                   instance.name, target_node)
6846

    
6847
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6848
                                           ignore_secondaries=True)
6849
      if not disks_ok:
6850
        _ShutdownInstanceDisks(self, instance)
6851
        raise errors.OpExecError("Can't activate the instance's disks")
6852

    
6853
      result = self.rpc.call_instance_start(target_node, instance,
6854
                                            None, None, False)
6855
      msg = result.fail_msg
6856
      if msg:
6857
        _ShutdownInstanceDisks(self, instance)
6858
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6859
                                 (instance.name, target_node, msg))
6860

    
6861

    
6862
class LUNodeMigrate(LogicalUnit):
6863
  """Migrate all instances from a node.
6864

6865
  """
6866
  HPATH = "node-migrate"
6867
  HTYPE = constants.HTYPE_NODE
6868
  REQ_BGL = False
6869

    
6870
  def CheckArguments(self):
6871
    pass
6872

    
6873
  def ExpandNames(self):
6874
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6875

    
6876
    self.share_locks = _ShareAll()
6877
    self.needed_locks = {
6878
      locking.LEVEL_NODE: [self.op.node_name],
6879
      }
6880

    
6881
  def BuildHooksEnv(self):
6882
    """Build hooks env.
6883

6884
    This runs on the master, the primary and all the secondaries.
6885

6886
    """
6887
    return {
6888
      "NODE_NAME": self.op.node_name,
6889
      }
6890

    
6891
  def BuildHooksNodes(self):
6892
    """Build hooks nodes.
6893

6894
    """
6895
    nl = [self.cfg.GetMasterNode()]
6896
    return (nl, nl)
6897

    
6898
  def CheckPrereq(self):
6899
    pass
6900

    
6901
  def Exec(self, feedback_fn):
6902
    # Prepare jobs for migration instances
6903
    jobs = [
6904
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
6905
                                 mode=self.op.mode,
6906
                                 live=self.op.live,
6907
                                 iallocator=self.op.iallocator,
6908
                                 target_node=self.op.target_node)]
6909
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6910
      ]
6911

    
6912
    # TODO: Run iallocator in this opcode and pass correct placement options to
6913
    # OpInstanceMigrate. Since other jobs can modify the cluster between
6914
    # running the iallocator and the actual migration, a good consistency model
6915
    # will have to be found.
6916

    
6917
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
6918
            frozenset([self.op.node_name]))
6919

    
6920
    return ResultWithJobs(jobs)
6921

    
6922

    
6923
class TLMigrateInstance(Tasklet):
6924
  """Tasklet class for instance migration.
6925

6926
  @type live: boolean
6927
  @ivar live: whether the migration will be done live or non-live;
6928
      this variable is initalized only after CheckPrereq has run
6929
  @type cleanup: boolean
6930
  @ivar cleanup: Wheater we cleanup from a failed migration
6931
  @type iallocator: string
6932
  @ivar iallocator: The iallocator used to determine target_node
6933
  @type target_node: string
6934
  @ivar target_node: If given, the target_node to reallocate the instance to
6935
  @type failover: boolean
6936
  @ivar failover: Whether operation results in failover or migration
6937
  @type fallback: boolean
6938
  @ivar fallback: Whether fallback to failover is allowed if migration not
6939
                  possible
6940
  @type ignore_consistency: boolean
6941
  @ivar ignore_consistency: Wheter we should ignore consistency between source
6942
                            and target node
6943
  @type shutdown_timeout: int
6944
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
6945

6946
  """
6947
  def __init__(self, lu, instance_name, cleanup=False,
6948
               failover=False, fallback=False,
6949
               ignore_consistency=False,
6950
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6951
    """Initializes this class.
6952

6953
    """
6954
    Tasklet.__init__(self, lu)
6955

    
6956
    # Parameters
6957
    self.instance_name = instance_name
6958
    self.cleanup = cleanup
6959
    self.live = False # will be overridden later
6960
    self.failover = failover
6961
    self.fallback = fallback
6962
    self.ignore_consistency = ignore_consistency
6963
    self.shutdown_timeout = shutdown_timeout
6964

    
6965
  def CheckPrereq(self):
6966
    """Check prerequisites.
6967

6968
    This checks that the instance is in the cluster.
6969

6970
    """
6971
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6972
    instance = self.cfg.GetInstanceInfo(instance_name)
6973
    assert instance is not None
6974
    self.instance = instance
6975

    
6976
    if (not self.cleanup and not instance.admin_up and not self.failover and
6977
        self.fallback):
6978
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6979
                      " to failover")
6980
      self.failover = True
6981

    
6982
    if instance.disk_template not in constants.DTS_MIRRORED:
6983
      if self.failover:
6984
        text = "failovers"
6985
      else:
6986
        text = "migrations"
6987
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6988
                                 " %s" % (instance.disk_template, text),
6989
                                 errors.ECODE_STATE)
6990

    
6991
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6992
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6993

    
6994
      if self.lu.op.iallocator:
6995
        self._RunAllocator()
6996
      else:
6997
        # We set set self.target_node as it is required by
6998
        # BuildHooksEnv
6999
        self.target_node = self.lu.op.target_node
7000

    
7001
      # self.target_node is already populated, either directly or by the
7002
      # iallocator run
7003
      target_node = self.target_node
7004
      if self.target_node == instance.primary_node:
7005
        raise errors.OpPrereqError("Cannot migrate instance %s"
7006
                                   " to its primary (%s)" %
7007
                                   (instance.name, instance.primary_node))
7008

    
7009
      if len(self.lu.tasklets) == 1:
7010
        # It is safe to release locks only when we're the only tasklet
7011
        # in the LU
7012
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7013
                      keep=[instance.primary_node, self.target_node])
7014

    
7015
    else:
7016
      secondary_nodes = instance.secondary_nodes
7017
      if not secondary_nodes:
7018
        raise errors.ConfigurationError("No secondary node but using"
7019
                                        " %s disk template" %
7020
                                        instance.disk_template)
7021
      target_node = secondary_nodes[0]
7022
      if self.lu.op.iallocator or (self.lu.op.target_node and
7023
                                   self.lu.op.target_node != target_node):
7024
        if self.failover:
7025
          text = "failed over"
7026
        else:
7027
          text = "migrated"
7028
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7029
                                   " be %s to arbitrary nodes"
7030
                                   " (neither an iallocator nor a target"
7031
                                   " node can be passed)" %
7032
                                   (instance.disk_template, text),
7033
                                   errors.ECODE_INVAL)
7034

    
7035
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7036

    
7037
    # check memory requirements on the secondary node
7038
    if not self.failover or instance.admin_up:
7039
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7040
                           instance.name, i_be[constants.BE_MEMORY],
7041
                           instance.hypervisor)
7042
    else:
7043
      self.lu.LogInfo("Not checking memory on the secondary node as"
7044
                      " instance will not be started")
7045

    
7046
    # check bridge existance
7047
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7048

    
7049
    if not self.cleanup:
7050
      _CheckNodeNotDrained(self.lu, target_node)
7051
      if not self.failover:
7052
        result = self.rpc.call_instance_migratable(instance.primary_node,
7053
                                                   instance)
7054
        if result.fail_msg and self.fallback:
7055
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7056
                          " failover")
7057
          self.failover = True
7058
        else:
7059
          result.Raise("Can't migrate, please use failover",
7060
                       prereq=True, ecode=errors.ECODE_STATE)
7061

    
7062
    assert not (self.failover and self.cleanup)
7063

    
7064
    if not self.failover:
7065
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7066
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7067
                                   " parameters are accepted",
7068
                                   errors.ECODE_INVAL)
7069
      if self.lu.op.live is not None:
7070
        if self.lu.op.live:
7071
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7072
        else:
7073
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7074
        # reset the 'live' parameter to None so that repeated
7075
        # invocations of CheckPrereq do not raise an exception
7076
        self.lu.op.live = None
7077
      elif self.lu.op.mode is None:
7078
        # read the default value from the hypervisor
7079
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7080
                                                skip_globals=False)
7081
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7082

    
7083
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7084
    else:
7085
      # Failover is never live
7086
      self.live = False
7087

    
7088
  def _RunAllocator(self):
7089
    """Run the allocator based on input opcode.
7090

7091
    """
7092
    ial = IAllocator(self.cfg, self.rpc,
7093
                     mode=constants.IALLOCATOR_MODE_RELOC,
7094
                     name=self.instance_name,
7095
                     # TODO See why hail breaks with a single node below
7096
                     relocate_from=[self.instance.primary_node,
7097
                                    self.instance.primary_node],
7098
                     )
7099

    
7100
    ial.Run(self.lu.op.iallocator)
7101

    
7102
    if not ial.success:
7103
      raise errors.OpPrereqError("Can't compute nodes using"
7104
                                 " iallocator '%s': %s" %
7105
                                 (self.lu.op.iallocator, ial.info),
7106
                                 errors.ECODE_NORES)
7107
    if len(ial.result) != ial.required_nodes:
7108
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7109
                                 " of nodes (%s), required %s" %
7110
                                 (self.lu.op.iallocator, len(ial.result),
7111
                                  ial.required_nodes), errors.ECODE_FAULT)
7112
    self.target_node = ial.result[0]
7113
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7114
                 self.instance_name, self.lu.op.iallocator,
7115
                 utils.CommaJoin(ial.result))
7116

    
7117
  def _WaitUntilSync(self):
7118
    """Poll with custom rpc for disk sync.
7119

7120
    This uses our own step-based rpc call.
7121

7122
    """
7123
    self.feedback_fn("* wait until resync is done")
7124
    all_done = False
7125
    while not all_done:
7126
      all_done = True
7127
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7128
                                            self.nodes_ip,
7129
                                            self.instance.disks)
7130
      min_percent = 100
7131
      for node, nres in result.items():
7132
        nres.Raise("Cannot resync disks on node %s" % node)
7133
        node_done, node_percent = nres.payload
7134
        all_done = all_done and node_done
7135
        if node_percent is not None:
7136
          min_percent = min(min_percent, node_percent)
7137
      if not all_done:
7138
        if min_percent < 100:
7139
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7140
        time.sleep(2)
7141

    
7142
  def _EnsureSecondary(self, node):
7143
    """Demote a node to secondary.
7144

7145
    """
7146
    self.feedback_fn("* switching node %s to secondary mode" % node)
7147

    
7148
    for dev in self.instance.disks:
7149
      self.cfg.SetDiskID(dev, node)
7150

    
7151
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7152
                                          self.instance.disks)
7153
    result.Raise("Cannot change disk to secondary on node %s" % node)
7154

    
7155
  def _GoStandalone(self):
7156
    """Disconnect from the network.
7157

7158
    """
7159
    self.feedback_fn("* changing into standalone mode")
7160
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7161
                                               self.instance.disks)
7162
    for node, nres in result.items():
7163
      nres.Raise("Cannot disconnect disks node %s" % node)
7164

    
7165
  def _GoReconnect(self, multimaster):
7166
    """Reconnect to the network.
7167

7168
    """
7169
    if multimaster:
7170
      msg = "dual-master"
7171
    else:
7172
      msg = "single-master"
7173
    self.feedback_fn("* changing disks into %s mode" % msg)
7174
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7175
                                           self.instance.disks,
7176
                                           self.instance.name, multimaster)
7177
    for node, nres in result.items():
7178
      nres.Raise("Cannot change disks config on node %s" % node)
7179

    
7180
  def _ExecCleanup(self):
7181
    """Try to cleanup after a failed migration.
7182

7183
    The cleanup is done by:
7184
      - check that the instance is running only on one node
7185
        (and update the config if needed)
7186
      - change disks on its secondary node to secondary
7187
      - wait until disks are fully synchronized
7188
      - disconnect from the network
7189
      - change disks into single-master mode
7190
      - wait again until disks are fully synchronized
7191

7192
    """
7193
    instance = self.instance
7194
    target_node = self.target_node
7195
    source_node = self.source_node
7196

    
7197
    # check running on only one node
7198
    self.feedback_fn("* checking where the instance actually runs"
7199
                     " (if this hangs, the hypervisor might be in"
7200
                     " a bad state)")
7201
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7202
    for node, result in ins_l.items():
7203
      result.Raise("Can't contact node %s" % node)
7204

    
7205
    runningon_source = instance.name in ins_l[source_node].payload
7206
    runningon_target = instance.name in ins_l[target_node].payload
7207

    
7208
    if runningon_source and runningon_target:
7209
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7210
                               " or the hypervisor is confused; you will have"
7211
                               " to ensure manually that it runs only on one"
7212
                               " and restart this operation")
7213

    
7214
    if not (runningon_source or runningon_target):
7215
      raise errors.OpExecError("Instance does not seem to be running at all;"
7216
                               " in this case it's safer to repair by"
7217
                               " running 'gnt-instance stop' to ensure disk"
7218
                               " shutdown, and then restarting it")
7219

    
7220
    if runningon_target:
7221
      # the migration has actually succeeded, we need to update the config
7222
      self.feedback_fn("* instance running on secondary node (%s),"
7223
                       " updating config" % target_node)
7224
      instance.primary_node = target_node
7225
      self.cfg.Update(instance, self.feedback_fn)
7226
      demoted_node = source_node
7227
    else:
7228
      self.feedback_fn("* instance confirmed to be running on its"
7229
                       " primary node (%s)" % source_node)
7230
      demoted_node = target_node
7231

    
7232
    if instance.disk_template in constants.DTS_INT_MIRROR:
7233
      self._EnsureSecondary(demoted_node)
7234
      try:
7235
        self._WaitUntilSync()
7236
      except errors.OpExecError:
7237
        # we ignore here errors, since if the device is standalone, it
7238
        # won't be able to sync
7239
        pass
7240
      self._GoStandalone()
7241
      self._GoReconnect(False)
7242
      self._WaitUntilSync()
7243

    
7244
    self.feedback_fn("* done")
7245

    
7246
  def _RevertDiskStatus(self):
7247
    """Try to revert the disk status after a failed migration.
7248

7249
    """
7250
    target_node = self.target_node
7251
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7252
      return
7253

    
7254
    try:
7255
      self._EnsureSecondary(target_node)
7256
      self._GoStandalone()
7257
      self._GoReconnect(False)
7258
      self._WaitUntilSync()
7259
    except errors.OpExecError, err:
7260
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7261
                         " please try to recover the instance manually;"
7262
                         " error '%s'" % str(err))
7263

    
7264
  def _AbortMigration(self):
7265
    """Call the hypervisor code to abort a started migration.
7266

7267
    """
7268
    instance = self.instance
7269
    target_node = self.target_node
7270
    migration_info = self.migration_info
7271

    
7272
    abort_result = self.rpc.call_finalize_migration(target_node,
7273
                                                    instance,
7274
                                                    migration_info,
7275
                                                    False)
7276
    abort_msg = abort_result.fail_msg
7277
    if abort_msg:
7278
      logging.error("Aborting migration failed on target node %s: %s",
7279
                    target_node, abort_msg)
7280
      # Don't raise an exception here, as we stil have to try to revert the
7281
      # disk status, even if this step failed.
7282

    
7283
  def _ExecMigration(self):
7284
    """Migrate an instance.
7285

7286
    The migrate is done by:
7287
      - change the disks into dual-master mode
7288
      - wait until disks are fully synchronized again
7289
      - migrate the instance
7290
      - change disks on the new secondary node (the old primary) to secondary
7291
      - wait until disks are fully synchronized
7292
      - change disks into single-master mode
7293

7294
    """
7295
    instance = self.instance
7296
    target_node = self.target_node
7297
    source_node = self.source_node
7298

    
7299
    self.feedback_fn("* checking disk consistency between source and target")
7300
    for dev in instance.disks:
7301
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7302
        raise errors.OpExecError("Disk %s is degraded or not fully"
7303
                                 " synchronized on target node,"
7304
                                 " aborting migration" % dev.iv_name)
7305

    
7306
    # First get the migration information from the remote node
7307
    result = self.rpc.call_migration_info(source_node, instance)
7308
    msg = result.fail_msg
7309
    if msg:
7310
      log_err = ("Failed fetching source migration information from %s: %s" %
7311
                 (source_node, msg))
7312
      logging.error(log_err)
7313
      raise errors.OpExecError(log_err)
7314

    
7315
    self.migration_info = migration_info = result.payload
7316

    
7317
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7318
      # Then switch the disks to master/master mode
7319
      self._EnsureSecondary(target_node)
7320
      self._GoStandalone()
7321
      self._GoReconnect(True)
7322
      self._WaitUntilSync()
7323

    
7324
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7325
    result = self.rpc.call_accept_instance(target_node,
7326
                                           instance,
7327
                                           migration_info,
7328
                                           self.nodes_ip[target_node])
7329

    
7330
    msg = result.fail_msg
7331
    if msg:
7332
      logging.error("Instance pre-migration failed, trying to revert"
7333
                    " disk status: %s", msg)
7334
      self.feedback_fn("Pre-migration failed, aborting")
7335
      self._AbortMigration()
7336
      self._RevertDiskStatus()
7337
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7338
                               (instance.name, msg))
7339

    
7340
    self.feedback_fn("* migrating instance to %s" % target_node)
7341
    result = self.rpc.call_instance_migrate(source_node, instance,
7342
                                            self.nodes_ip[target_node],
7343
                                            self.live)
7344
    msg = result.fail_msg
7345
    if msg:
7346
      logging.error("Instance migration failed, trying to revert"
7347
                    " disk status: %s", msg)
7348
      self.feedback_fn("Migration failed, aborting")
7349
      self._AbortMigration()
7350
      self._RevertDiskStatus()
7351
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7352
                               (instance.name, msg))
7353

    
7354
    instance.primary_node = target_node
7355
    # distribute new instance config to the other nodes
7356
    self.cfg.Update(instance, self.feedback_fn)
7357

    
7358
    result = self.rpc.call_finalize_migration(target_node,
7359
                                              instance,
7360
                                              migration_info,
7361
                                              True)
7362
    msg = result.fail_msg
7363
    if msg:
7364
      logging.error("Instance migration succeeded, but finalization failed:"
7365
                    " %s", msg)
7366
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7367
                               msg)
7368

    
7369
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7370
      self._EnsureSecondary(source_node)
7371
      self._WaitUntilSync()
7372
      self._GoStandalone()
7373
      self._GoReconnect(False)
7374
      self._WaitUntilSync()
7375

    
7376
    self.feedback_fn("* done")
7377

    
7378
  def _ExecFailover(self):
7379
    """Failover an instance.
7380

7381
    The failover is done by shutting it down on its present node and
7382
    starting it on the secondary.
7383

7384
    """
7385
    instance = self.instance
7386
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7387

    
7388
    source_node = instance.primary_node
7389
    target_node = self.target_node
7390

    
7391
    if instance.admin_up:
7392
      self.feedback_fn("* checking disk consistency between source and target")
7393
      for dev in instance.disks:
7394
        # for drbd, these are drbd over lvm
7395
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7396
          if primary_node.offline:
7397
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7398
                             " target node %s" %
7399
                             (primary_node.name, dev.iv_name, target_node))
7400
          elif not self.ignore_consistency:
7401
            raise errors.OpExecError("Disk %s is degraded on target node,"
7402
                                     " aborting failover" % dev.iv_name)
7403
    else:
7404
      self.feedback_fn("* not checking disk consistency as instance is not"
7405
                       " running")
7406

    
7407
    self.feedback_fn("* shutting down instance on source node")
7408
    logging.info("Shutting down instance %s on node %s",
7409
                 instance.name, source_node)
7410

    
7411
    result = self.rpc.call_instance_shutdown(source_node, instance,
7412
                                             self.shutdown_timeout)
7413
    msg = result.fail_msg
7414
    if msg:
7415
      if self.ignore_consistency or primary_node.offline:
7416
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7417
                           " proceeding anyway; please make sure node"
7418
                           " %s is down; error details: %s",
7419
                           instance.name, source_node, source_node, msg)
7420
      else:
7421
        raise errors.OpExecError("Could not shutdown instance %s on"
7422
                                 " node %s: %s" %
7423
                                 (instance.name, source_node, msg))
7424

    
7425
    self.feedback_fn("* deactivating the instance's disks on source node")
7426
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7427
      raise errors.OpExecError("Can't shut down the instance's disks")
7428

    
7429
    instance.primary_node = target_node
7430
    # distribute new instance config to the other nodes
7431
    self.cfg.Update(instance, self.feedback_fn)
7432

    
7433
    # Only start the instance if it's marked as up
7434
    if instance.admin_up:
7435
      self.feedback_fn("* activating the instance's disks on target node %s" %
7436
                       target_node)
7437
      logging.info("Starting instance %s on node %s",
7438
                   instance.name, target_node)
7439

    
7440
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7441
                                           ignore_secondaries=True)
7442
      if not disks_ok:
7443
        _ShutdownInstanceDisks(self.lu, instance)
7444
        raise errors.OpExecError("Can't activate the instance's disks")
7445

    
7446
      self.feedback_fn("* starting the instance on the target node %s" %
7447
                       target_node)
7448
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7449
                                            False)
7450
      msg = result.fail_msg
7451
      if msg:
7452
        _ShutdownInstanceDisks(self.lu, instance)
7453
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7454
                                 (instance.name, target_node, msg))
7455

    
7456
  def Exec(self, feedback_fn):
7457
    """Perform the migration.
7458

7459
    """
7460
    self.feedback_fn = feedback_fn
7461
    self.source_node = self.instance.primary_node
7462

    
7463
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7464
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7465
      self.target_node = self.instance.secondary_nodes[0]
7466
      # Otherwise self.target_node has been populated either
7467
      # directly, or through an iallocator.
7468

    
7469
    self.all_nodes = [self.source_node, self.target_node]
7470
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7471
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7472

    
7473
    if self.failover:
7474
      feedback_fn("Failover instance %s" % self.instance.name)
7475
      self._ExecFailover()
7476
    else:
7477
      feedback_fn("Migrating instance %s" % self.instance.name)
7478

    
7479
      if self.cleanup:
7480
        return self._ExecCleanup()
7481
      else:
7482
        return self._ExecMigration()
7483

    
7484

    
7485
def _CreateBlockDev(lu, node, instance, device, force_create,
7486
                    info, force_open):
7487
  """Create a tree of block devices on a given node.
7488

7489
  If this device type has to be created on secondaries, create it and
7490
  all its children.
7491

7492
  If not, just recurse to children keeping the same 'force' value.
7493

7494
  @param lu: the lu on whose behalf we execute
7495
  @param node: the node on which to create the device
7496
  @type instance: L{objects.Instance}
7497
  @param instance: the instance which owns the device
7498
  @type device: L{objects.Disk}
7499
  @param device: the device to create
7500
  @type force_create: boolean
7501
  @param force_create: whether to force creation of this device; this
7502
      will be change to True whenever we find a device which has
7503
      CreateOnSecondary() attribute
7504
  @param info: the extra 'metadata' we should attach to the device
7505
      (this will be represented as a LVM tag)
7506
  @type force_open: boolean
7507
  @param force_open: this parameter will be passes to the
7508
      L{backend.BlockdevCreate} function where it specifies
7509
      whether we run on primary or not, and it affects both
7510
      the child assembly and the device own Open() execution
7511

7512
  """
7513
  if device.CreateOnSecondary():
7514
    force_create = True
7515

    
7516
  if device.children:
7517
    for child in device.children:
7518
      _CreateBlockDev(lu, node, instance, child, force_create,
7519
                      info, force_open)
7520

    
7521
  if not force_create:
7522
    return
7523

    
7524
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7525

    
7526

    
7527
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7528
  """Create a single block device on a given node.
7529

7530
  This will not recurse over children of the device, so they must be
7531
  created in advance.
7532

7533
  @param lu: the lu on whose behalf we execute
7534
  @param node: the node on which to create the device
7535
  @type instance: L{objects.Instance}
7536
  @param instance: the instance which owns the device
7537
  @type device: L{objects.Disk}
7538
  @param device: the device to create
7539
  @param info: the extra 'metadata' we should attach to the device
7540
      (this will be represented as a LVM tag)
7541
  @type force_open: boolean
7542
  @param force_open: this parameter will be passes to the
7543
      L{backend.BlockdevCreate} function where it specifies
7544
      whether we run on primary or not, and it affects both
7545
      the child assembly and the device own Open() execution
7546

7547
  """
7548
  lu.cfg.SetDiskID(device, node)
7549
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7550
                                       instance.name, force_open, info)
7551
  result.Raise("Can't create block device %s on"
7552
               " node %s for instance %s" % (device, node, instance.name))
7553
  if device.physical_id is None:
7554
    device.physical_id = result.payload
7555

    
7556

    
7557
def _GenerateUniqueNames(lu, exts):
7558
  """Generate a suitable LV name.
7559

7560
  This will generate a logical volume name for the given instance.
7561

7562
  """
7563
  results = []
7564
  for val in exts:
7565
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7566
    results.append("%s%s" % (new_id, val))
7567
  return results
7568

    
7569

    
7570
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7571
                         iv_name, p_minor, s_minor):
7572
  """Generate a drbd8 device complete with its children.
7573

7574
  """
7575
  assert len(vgnames) == len(names) == 2
7576
  port = lu.cfg.AllocatePort()
7577
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7578
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7579
                          logical_id=(vgnames[0], names[0]))
7580
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7581
                          logical_id=(vgnames[1], names[1]))
7582
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7583
                          logical_id=(primary, secondary, port,
7584
                                      p_minor, s_minor,
7585
                                      shared_secret),
7586
                          children=[dev_data, dev_meta],
7587
                          iv_name=iv_name)
7588
  return drbd_dev
7589

    
7590

    
7591
def _GenerateDiskTemplate(lu, template_name,
7592
                          instance_name, primary_node,
7593
                          secondary_nodes, disk_info,
7594
                          file_storage_dir, file_driver,
7595
                          base_index, feedback_fn):
7596
  """Generate the entire disk layout for a given template type.
7597

7598
  """
7599
  #TODO: compute space requirements
7600

    
7601
  vgname = lu.cfg.GetVGName()
7602
  disk_count = len(disk_info)
7603
  disks = []
7604
  if template_name == constants.DT_DISKLESS:
7605
    pass
7606
  elif template_name == constants.DT_PLAIN:
7607
    if len(secondary_nodes) != 0:
7608
      raise errors.ProgrammerError("Wrong template configuration")
7609

    
7610
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7611
                                      for i in range(disk_count)])
7612
    for idx, disk in enumerate(disk_info):
7613
      disk_index = idx + base_index
7614
      vg = disk.get(constants.IDISK_VG, vgname)
7615
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7616
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7617
                              size=disk[constants.IDISK_SIZE],
7618
                              logical_id=(vg, names[idx]),
7619
                              iv_name="disk/%d" % disk_index,
7620
                              mode=disk[constants.IDISK_MODE])
7621
      disks.append(disk_dev)
7622
  elif template_name == constants.DT_DRBD8:
7623
    if len(secondary_nodes) != 1:
7624
      raise errors.ProgrammerError("Wrong template configuration")
7625
    remote_node = secondary_nodes[0]
7626
    minors = lu.cfg.AllocateDRBDMinor(
7627
      [primary_node, remote_node] * len(disk_info), instance_name)
7628

    
7629
    names = []
7630
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7631
                                               for i in range(disk_count)]):
7632
      names.append(lv_prefix + "_data")
7633
      names.append(lv_prefix + "_meta")
7634
    for idx, disk in enumerate(disk_info):
7635
      disk_index = idx + base_index
7636
      data_vg = disk.get(constants.IDISK_VG, vgname)
7637
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7638
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7639
                                      disk[constants.IDISK_SIZE],
7640
                                      [data_vg, meta_vg],
7641
                                      names[idx * 2:idx * 2 + 2],
7642
                                      "disk/%d" % disk_index,
7643
                                      minors[idx * 2], minors[idx * 2 + 1])
7644
      disk_dev.mode = disk[constants.IDISK_MODE]
7645
      disks.append(disk_dev)
7646
  elif template_name == constants.DT_FILE:
7647
    if len(secondary_nodes) != 0:
7648
      raise errors.ProgrammerError("Wrong template configuration")
7649

    
7650
    opcodes.RequireFileStorage()
7651

    
7652
    for idx, disk in enumerate(disk_info):
7653
      disk_index = idx + base_index
7654
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7655
                              size=disk[constants.IDISK_SIZE],
7656
                              iv_name="disk/%d" % disk_index,
7657
                              logical_id=(file_driver,
7658
                                          "%s/disk%d" % (file_storage_dir,
7659
                                                         disk_index)),
7660
                              mode=disk[constants.IDISK_MODE])
7661
      disks.append(disk_dev)
7662
  elif template_name == constants.DT_SHARED_FILE:
7663
    if len(secondary_nodes) != 0:
7664
      raise errors.ProgrammerError("Wrong template configuration")
7665

    
7666
    opcodes.RequireSharedFileStorage()
7667

    
7668
    for idx, disk in enumerate(disk_info):
7669
      disk_index = idx + base_index
7670
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7671
                              size=disk[constants.IDISK_SIZE],
7672
                              iv_name="disk/%d" % disk_index,
7673
                              logical_id=(file_driver,
7674
                                          "%s/disk%d" % (file_storage_dir,
7675
                                                         disk_index)),
7676
                              mode=disk[constants.IDISK_MODE])
7677
      disks.append(disk_dev)
7678
  elif template_name == constants.DT_BLOCK:
7679
    if len(secondary_nodes) != 0:
7680
      raise errors.ProgrammerError("Wrong template configuration")
7681

    
7682
    for idx, disk in enumerate(disk_info):
7683
      disk_index = idx + base_index
7684
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7685
                              size=disk[constants.IDISK_SIZE],
7686
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7687
                                          disk[constants.IDISK_ADOPT]),
7688
                              iv_name="disk/%d" % disk_index,
7689
                              mode=disk[constants.IDISK_MODE])
7690
      disks.append(disk_dev)
7691

    
7692
  else:
7693
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7694
  return disks
7695

    
7696

    
7697
def _GetInstanceInfoText(instance):
7698
  """Compute that text that should be added to the disk's metadata.
7699

7700
  """
7701
  return "originstname+%s" % instance.name
7702

    
7703

    
7704
def _CalcEta(time_taken, written, total_size):
7705
  """Calculates the ETA based on size written and total size.
7706

7707
  @param time_taken: The time taken so far
7708
  @param written: amount written so far
7709
  @param total_size: The total size of data to be written
7710
  @return: The remaining time in seconds
7711

7712
  """
7713
  avg_time = time_taken / float(written)
7714
  return (total_size - written) * avg_time
7715

    
7716

    
7717
def _WipeDisks(lu, instance):
7718
  """Wipes instance disks.
7719

7720
  @type lu: L{LogicalUnit}
7721
  @param lu: the logical unit on whose behalf we execute
7722
  @type instance: L{objects.Instance}
7723
  @param instance: the instance whose disks we should create
7724
  @return: the success of the wipe
7725

7726
  """
7727
  node = instance.primary_node
7728

    
7729
  for device in instance.disks:
7730
    lu.cfg.SetDiskID(device, node)
7731

    
7732
  logging.info("Pause sync of instance %s disks", instance.name)
7733
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7734

    
7735
  for idx, success in enumerate(result.payload):
7736
    if not success:
7737
      logging.warn("pause-sync of instance %s for disks %d failed",
7738
                   instance.name, idx)
7739

    
7740
  try:
7741
    for idx, device in enumerate(instance.disks):
7742
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7743
      # MAX_WIPE_CHUNK at max
7744
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7745
                            constants.MIN_WIPE_CHUNK_PERCENT)
7746
      # we _must_ make this an int, otherwise rounding errors will
7747
      # occur
7748
      wipe_chunk_size = int(wipe_chunk_size)
7749

    
7750
      lu.LogInfo("* Wiping disk %d", idx)
7751
      logging.info("Wiping disk %d for instance %s, node %s using"
7752
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7753

    
7754
      offset = 0
7755
      size = device.size
7756
      last_output = 0
7757
      start_time = time.time()
7758

    
7759
      while offset < size:
7760
        wipe_size = min(wipe_chunk_size, size - offset)
7761
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7762
                      idx, offset, wipe_size)
7763
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7764
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7765
                     (idx, offset, wipe_size))
7766
        now = time.time()
7767
        offset += wipe_size
7768
        if now - last_output >= 60:
7769
          eta = _CalcEta(now - start_time, offset, size)
7770
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7771
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7772
          last_output = now
7773
  finally:
7774
    logging.info("Resume sync of instance %s disks", instance.name)
7775

    
7776
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7777

    
7778
    for idx, success in enumerate(result.payload):
7779
      if not success:
7780
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7781
                      " look at the status and troubleshoot the issue", idx)
7782
        logging.warn("resume-sync of instance %s for disks %d failed",
7783
                     instance.name, idx)
7784

    
7785

    
7786
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7787
  """Create all disks for an instance.
7788

7789
  This abstracts away some work from AddInstance.
7790

7791
  @type lu: L{LogicalUnit}
7792
  @param lu: the logical unit on whose behalf we execute
7793
  @type instance: L{objects.Instance}
7794
  @param instance: the instance whose disks we should create
7795
  @type to_skip: list
7796
  @param to_skip: list of indices to skip
7797
  @type target_node: string
7798
  @param target_node: if passed, overrides the target node for creation
7799
  @rtype: boolean
7800
  @return: the success of the creation
7801

7802
  """
7803
  info = _GetInstanceInfoText(instance)
7804
  if target_node is None:
7805
    pnode = instance.primary_node
7806
    all_nodes = instance.all_nodes
7807
  else:
7808
    pnode = target_node
7809
    all_nodes = [pnode]
7810

    
7811
  if instance.disk_template in constants.DTS_FILEBASED:
7812
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7813
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7814

    
7815
    result.Raise("Failed to create directory '%s' on"
7816
                 " node %s" % (file_storage_dir, pnode))
7817

    
7818
  # Note: this needs to be kept in sync with adding of disks in
7819
  # LUInstanceSetParams
7820
  for idx, device in enumerate(instance.disks):
7821
    if to_skip and idx in to_skip:
7822
      continue
7823
    logging.info("Creating volume %s for instance %s",
7824
                 device.iv_name, instance.name)
7825
    #HARDCODE
7826
    for node in all_nodes:
7827
      f_create = node == pnode
7828
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7829

    
7830

    
7831
def _RemoveDisks(lu, instance, target_node=None):
7832
  """Remove all disks for an instance.
7833

7834
  This abstracts away some work from `AddInstance()` and
7835
  `RemoveInstance()`. Note that in case some of the devices couldn't
7836
  be removed, the removal will continue with the other ones (compare
7837
  with `_CreateDisks()`).
7838

7839
  @type lu: L{LogicalUnit}
7840
  @param lu: the logical unit on whose behalf we execute
7841
  @type instance: L{objects.Instance}
7842
  @param instance: the instance whose disks we should remove
7843
  @type target_node: string
7844
  @param target_node: used to override the node on which to remove the disks
7845
  @rtype: boolean
7846
  @return: the success of the removal
7847

7848
  """
7849
  logging.info("Removing block devices for instance %s", instance.name)
7850

    
7851
  all_result = True
7852
  for device in instance.disks:
7853
    if target_node:
7854
      edata = [(target_node, device)]
7855
    else:
7856
      edata = device.ComputeNodeTree(instance.primary_node)
7857
    for node, disk in edata:
7858
      lu.cfg.SetDiskID(disk, node)
7859
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7860
      if msg:
7861
        lu.LogWarning("Could not remove block device %s on node %s,"
7862
                      " continuing anyway: %s", device.iv_name, node, msg)
7863
        all_result = False
7864

    
7865
  if instance.disk_template == constants.DT_FILE:
7866
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7867
    if target_node:
7868
      tgt = target_node
7869
    else:
7870
      tgt = instance.primary_node
7871
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7872
    if result.fail_msg:
7873
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7874
                    file_storage_dir, instance.primary_node, result.fail_msg)
7875
      all_result = False
7876

    
7877
  return all_result
7878

    
7879

    
7880
def _ComputeDiskSizePerVG(disk_template, disks):
7881
  """Compute disk size requirements in the volume group
7882

7883
  """
7884
  def _compute(disks, payload):
7885
    """Universal algorithm.
7886

7887
    """
7888
    vgs = {}
7889
    for disk in disks:
7890
      vgs[disk[constants.IDISK_VG]] = \
7891
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7892

    
7893
    return vgs
7894

    
7895
  # Required free disk space as a function of disk and swap space
7896
  req_size_dict = {
7897
    constants.DT_DISKLESS: {},
7898
    constants.DT_PLAIN: _compute(disks, 0),
7899
    # 128 MB are added for drbd metadata for each disk
7900
    constants.DT_DRBD8: _compute(disks, 128),
7901
    constants.DT_FILE: {},
7902
    constants.DT_SHARED_FILE: {},
7903
  }
7904

    
7905
  if disk_template not in req_size_dict:
7906
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7907
                                 " is unknown" %  disk_template)
7908

    
7909
  return req_size_dict[disk_template]
7910

    
7911

    
7912
def _ComputeDiskSize(disk_template, disks):
7913
  """Compute disk size requirements in the volume group
7914

7915
  """
7916
  # Required free disk space as a function of disk and swap space
7917
  req_size_dict = {
7918
    constants.DT_DISKLESS: None,
7919
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7920
    # 128 MB are added for drbd metadata for each disk
7921
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7922
    constants.DT_FILE: None,
7923
    constants.DT_SHARED_FILE: 0,
7924
    constants.DT_BLOCK: 0,
7925
  }
7926

    
7927
  if disk_template not in req_size_dict:
7928
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7929
                                 " is unknown" %  disk_template)
7930

    
7931
  return req_size_dict[disk_template]
7932

    
7933

    
7934
def _FilterVmNodes(lu, nodenames):
7935
  """Filters out non-vm_capable nodes from a list.
7936

7937
  @type lu: L{LogicalUnit}
7938
  @param lu: the logical unit for which we check
7939
  @type nodenames: list
7940
  @param nodenames: the list of nodes on which we should check
7941
  @rtype: list
7942
  @return: the list of vm-capable nodes
7943

7944
  """
7945
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7946
  return [name for name in nodenames if name not in vm_nodes]
7947

    
7948

    
7949
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7950
  """Hypervisor parameter validation.
7951

7952
  This function abstract the hypervisor parameter validation to be
7953
  used in both instance create and instance modify.
7954

7955
  @type lu: L{LogicalUnit}
7956
  @param lu: the logical unit for which we check
7957
  @type nodenames: list
7958
  @param nodenames: the list of nodes on which we should check
7959
  @type hvname: string
7960
  @param hvname: the name of the hypervisor we should use
7961
  @type hvparams: dict
7962
  @param hvparams: the parameters which we need to check
7963
  @raise errors.OpPrereqError: if the parameters are not valid
7964

7965
  """
7966
  nodenames = _FilterVmNodes(lu, nodenames)
7967
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7968
                                                  hvname,
7969
                                                  hvparams)
7970
  for node in nodenames:
7971
    info = hvinfo[node]
7972
    if info.offline:
7973
      continue
7974
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7975

    
7976

    
7977
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7978
  """OS parameters validation.
7979

7980
  @type lu: L{LogicalUnit}
7981
  @param lu: the logical unit for which we check
7982
  @type required: boolean
7983
  @param required: whether the validation should fail if the OS is not
7984
      found
7985
  @type nodenames: list
7986
  @param nodenames: the list of nodes on which we should check
7987
  @type osname: string
7988
  @param osname: the name of the hypervisor we should use
7989
  @type osparams: dict
7990
  @param osparams: the parameters which we need to check
7991
  @raise errors.OpPrereqError: if the parameters are not valid
7992

7993
  """
7994
  nodenames = _FilterVmNodes(lu, nodenames)
7995
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7996
                                   [constants.OS_VALIDATE_PARAMETERS],
7997
                                   osparams)
7998
  for node, nres in result.items():
7999
    # we don't check for offline cases since this should be run only
8000
    # against the master node and/or an instance's nodes
8001
    nres.Raise("OS Parameters validation failed on node %s" % node)
8002
    if not nres.payload:
8003
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8004
                 osname, node)
8005

    
8006

    
8007
class LUInstanceCreate(LogicalUnit):
8008
  """Create an instance.
8009

8010
  """
8011
  HPATH = "instance-add"
8012
  HTYPE = constants.HTYPE_INSTANCE
8013
  REQ_BGL = False
8014

    
8015
  def CheckArguments(self):
8016
    """Check arguments.
8017

8018
    """
8019
    # do not require name_check to ease forward/backward compatibility
8020
    # for tools
8021
    if self.op.no_install and self.op.start:
8022
      self.LogInfo("No-installation mode selected, disabling startup")
8023
      self.op.start = False
8024
    # validate/normalize the instance name
8025
    self.op.instance_name = \
8026
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8027

    
8028
    if self.op.ip_check and not self.op.name_check:
8029
      # TODO: make the ip check more flexible and not depend on the name check
8030
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8031
                                 " check", errors.ECODE_INVAL)
8032

    
8033
    # check nics' parameter names
8034
    for nic in self.op.nics:
8035
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8036

    
8037
    # check disks. parameter names and consistent adopt/no-adopt strategy
8038
    has_adopt = has_no_adopt = False
8039
    for disk in self.op.disks:
8040
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8041
      if constants.IDISK_ADOPT in disk:
8042
        has_adopt = True
8043
      else:
8044
        has_no_adopt = True
8045
    if has_adopt and has_no_adopt:
8046
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8047
                                 errors.ECODE_INVAL)
8048
    if has_adopt:
8049
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8050
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8051
                                   " '%s' disk template" %
8052
                                   self.op.disk_template,
8053
                                   errors.ECODE_INVAL)
8054
      if self.op.iallocator is not None:
8055
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8056
                                   " iallocator script", errors.ECODE_INVAL)
8057
      if self.op.mode == constants.INSTANCE_IMPORT:
8058
        raise errors.OpPrereqError("Disk adoption not allowed for"
8059
                                   " instance import", errors.ECODE_INVAL)
8060
    else:
8061
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8062
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8063
                                   " but no 'adopt' parameter given" %
8064
                                   self.op.disk_template,
8065
                                   errors.ECODE_INVAL)
8066

    
8067
    self.adopt_disks = has_adopt
8068

    
8069
    # instance name verification
8070
    if self.op.name_check:
8071
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8072
      self.op.instance_name = self.hostname1.name
8073
      # used in CheckPrereq for ip ping check
8074
      self.check_ip = self.hostname1.ip
8075
    else:
8076
      self.check_ip = None
8077

    
8078
    # file storage checks
8079
    if (self.op.file_driver and
8080
        not self.op.file_driver in constants.FILE_DRIVER):
8081
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8082
                                 self.op.file_driver, errors.ECODE_INVAL)
8083

    
8084
    if self.op.disk_template == constants.DT_FILE:
8085
      opcodes.RequireFileStorage()
8086
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8087
      opcodes.RequireSharedFileStorage()
8088

    
8089
    ### Node/iallocator related checks
8090
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8091

    
8092
    if self.op.pnode is not None:
8093
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8094
        if self.op.snode is None:
8095
          raise errors.OpPrereqError("The networked disk templates need"
8096
                                     " a mirror node", errors.ECODE_INVAL)
8097
      elif self.op.snode:
8098
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8099
                        " template")
8100
        self.op.snode = None
8101

    
8102
    self._cds = _GetClusterDomainSecret()
8103

    
8104
    if self.op.mode == constants.INSTANCE_IMPORT:
8105
      # On import force_variant must be True, because if we forced it at
8106
      # initial install, our only chance when importing it back is that it
8107
      # works again!
8108
      self.op.force_variant = True
8109

    
8110
      if self.op.no_install:
8111
        self.LogInfo("No-installation mode has no effect during import")
8112

    
8113
    elif self.op.mode == constants.INSTANCE_CREATE:
8114
      if self.op.os_type is None:
8115
        raise errors.OpPrereqError("No guest OS specified",
8116
                                   errors.ECODE_INVAL)
8117
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8118
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8119
                                   " installation" % self.op.os_type,
8120
                                   errors.ECODE_STATE)
8121
      if self.op.disk_template is None:
8122
        raise errors.OpPrereqError("No disk template specified",
8123
                                   errors.ECODE_INVAL)
8124

    
8125
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8126
      # Check handshake to ensure both clusters have the same domain secret
8127
      src_handshake = self.op.source_handshake
8128
      if not src_handshake:
8129
        raise errors.OpPrereqError("Missing source handshake",
8130
                                   errors.ECODE_INVAL)
8131

    
8132
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8133
                                                           src_handshake)
8134
      if errmsg:
8135
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8136
                                   errors.ECODE_INVAL)
8137

    
8138
      # Load and check source CA
8139
      self.source_x509_ca_pem = self.op.source_x509_ca
8140
      if not self.source_x509_ca_pem:
8141
        raise errors.OpPrereqError("Missing source X509 CA",
8142
                                   errors.ECODE_INVAL)
8143

    
8144
      try:
8145
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8146
                                                    self._cds)
8147
      except OpenSSL.crypto.Error, err:
8148
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8149
                                   (err, ), errors.ECODE_INVAL)
8150

    
8151
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8152
      if errcode is not None:
8153
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8154
                                   errors.ECODE_INVAL)
8155

    
8156
      self.source_x509_ca = cert
8157

    
8158
      src_instance_name = self.op.source_instance_name
8159
      if not src_instance_name:
8160
        raise errors.OpPrereqError("Missing source instance name",
8161
                                   errors.ECODE_INVAL)
8162

    
8163
      self.source_instance_name = \
8164
          netutils.GetHostname(name=src_instance_name).name
8165

    
8166
    else:
8167
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8168
                                 self.op.mode, errors.ECODE_INVAL)
8169

    
8170
  def ExpandNames(self):
8171
    """ExpandNames for CreateInstance.
8172

8173
    Figure out the right locks for instance creation.
8174

8175
    """
8176
    self.needed_locks = {}
8177

    
8178
    instance_name = self.op.instance_name
8179
    # this is just a preventive check, but someone might still add this
8180
    # instance in the meantime, and creation will fail at lock-add time
8181
    if instance_name in self.cfg.GetInstanceList():
8182
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8183
                                 instance_name, errors.ECODE_EXISTS)
8184

    
8185
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8186

    
8187
    if self.op.iallocator:
8188
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8189
    else:
8190
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8191
      nodelist = [self.op.pnode]
8192
      if self.op.snode is not None:
8193
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8194
        nodelist.append(self.op.snode)
8195
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8196

    
8197
    # in case of import lock the source node too
8198
    if self.op.mode == constants.INSTANCE_IMPORT:
8199
      src_node = self.op.src_node
8200
      src_path = self.op.src_path
8201

    
8202
      if src_path is None:
8203
        self.op.src_path = src_path = self.op.instance_name
8204

    
8205
      if src_node is None:
8206
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8207
        self.op.src_node = None
8208
        if os.path.isabs(src_path):
8209
          raise errors.OpPrereqError("Importing an instance from an absolute"
8210
                                     " path requires a source node option",
8211
                                     errors.ECODE_INVAL)
8212
      else:
8213
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8214
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8215
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8216
        if not os.path.isabs(src_path):
8217
          self.op.src_path = src_path = \
8218
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8219

    
8220
  def _RunAllocator(self):
8221
    """Run the allocator based on input opcode.
8222

8223
    """
8224
    nics = [n.ToDict() for n in self.nics]
8225
    ial = IAllocator(self.cfg, self.rpc,
8226
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8227
                     name=self.op.instance_name,
8228
                     disk_template=self.op.disk_template,
8229
                     tags=self.op.tags,
8230
                     os=self.op.os_type,
8231
                     vcpus=self.be_full[constants.BE_VCPUS],
8232
                     memory=self.be_full[constants.BE_MEMORY],
8233
                     disks=self.disks,
8234
                     nics=nics,
8235
                     hypervisor=self.op.hypervisor,
8236
                     )
8237

    
8238
    ial.Run(self.op.iallocator)
8239

    
8240
    if not ial.success:
8241
      raise errors.OpPrereqError("Can't compute nodes using"
8242
                                 " iallocator '%s': %s" %
8243
                                 (self.op.iallocator, ial.info),
8244
                                 errors.ECODE_NORES)
8245
    if len(ial.result) != ial.required_nodes:
8246
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8247
                                 " of nodes (%s), required %s" %
8248
                                 (self.op.iallocator, len(ial.result),
8249
                                  ial.required_nodes), errors.ECODE_FAULT)
8250
    self.op.pnode = ial.result[0]
8251
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8252
                 self.op.instance_name, self.op.iallocator,
8253
                 utils.CommaJoin(ial.result))
8254
    if ial.required_nodes == 2:
8255
      self.op.snode = ial.result[1]
8256

    
8257
  def BuildHooksEnv(self):
8258
    """Build hooks env.
8259

8260
    This runs on master, primary and secondary nodes of the instance.
8261

8262
    """
8263
    env = {
8264
      "ADD_MODE": self.op.mode,
8265
      }
8266
    if self.op.mode == constants.INSTANCE_IMPORT:
8267
      env["SRC_NODE"] = self.op.src_node
8268
      env["SRC_PATH"] = self.op.src_path
8269
      env["SRC_IMAGES"] = self.src_images
8270

    
8271
    env.update(_BuildInstanceHookEnv(
8272
      name=self.op.instance_name,
8273
      primary_node=self.op.pnode,
8274
      secondary_nodes=self.secondaries,
8275
      status=self.op.start,
8276
      os_type=self.op.os_type,
8277
      memory=self.be_full[constants.BE_MEMORY],
8278
      vcpus=self.be_full[constants.BE_VCPUS],
8279
      nics=_NICListToTuple(self, self.nics),
8280
      disk_template=self.op.disk_template,
8281
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8282
             for d in self.disks],
8283
      bep=self.be_full,
8284
      hvp=self.hv_full,
8285
      hypervisor_name=self.op.hypervisor,
8286
      tags=self.op.tags,
8287
    ))
8288

    
8289
    return env
8290

    
8291
  def BuildHooksNodes(self):
8292
    """Build hooks nodes.
8293

8294
    """
8295
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8296
    return nl, nl
8297

    
8298
  def _ReadExportInfo(self):
8299
    """Reads the export information from disk.
8300

8301
    It will override the opcode source node and path with the actual
8302
    information, if these two were not specified before.
8303

8304
    @return: the export information
8305

8306
    """
8307
    assert self.op.mode == constants.INSTANCE_IMPORT
8308

    
8309
    src_node = self.op.src_node
8310
    src_path = self.op.src_path
8311

    
8312
    if src_node is None:
8313
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8314
      exp_list = self.rpc.call_export_list(locked_nodes)
8315
      found = False
8316
      for node in exp_list:
8317
        if exp_list[node].fail_msg:
8318
          continue
8319
        if src_path in exp_list[node].payload:
8320
          found = True
8321
          self.op.src_node = src_node = node
8322
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8323
                                                       src_path)
8324
          break
8325
      if not found:
8326
        raise errors.OpPrereqError("No export found for relative path %s" %
8327
                                    src_path, errors.ECODE_INVAL)
8328

    
8329
    _CheckNodeOnline(self, src_node)
8330
    result = self.rpc.call_export_info(src_node, src_path)
8331
    result.Raise("No export or invalid export found in dir %s" % src_path)
8332

    
8333
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8334
    if not export_info.has_section(constants.INISECT_EXP):
8335
      raise errors.ProgrammerError("Corrupted export config",
8336
                                   errors.ECODE_ENVIRON)
8337

    
8338
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8339
    if (int(ei_version) != constants.EXPORT_VERSION):
8340
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8341
                                 (ei_version, constants.EXPORT_VERSION),
8342
                                 errors.ECODE_ENVIRON)
8343
    return export_info
8344

    
8345
  def _ReadExportParams(self, einfo):
8346
    """Use export parameters as defaults.
8347

8348
    In case the opcode doesn't specify (as in override) some instance
8349
    parameters, then try to use them from the export information, if
8350
    that declares them.
8351

8352
    """
8353
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8354

    
8355
    if self.op.disk_template is None:
8356
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8357
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8358
                                          "disk_template")
8359
      else:
8360
        raise errors.OpPrereqError("No disk template specified and the export"
8361
                                   " is missing the disk_template information",
8362
                                   errors.ECODE_INVAL)
8363

    
8364
    if not self.op.disks:
8365
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8366
        disks = []
8367
        # TODO: import the disk iv_name too
8368
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8369
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8370
          disks.append({constants.IDISK_SIZE: disk_sz})
8371
        self.op.disks = disks
8372
      else:
8373
        raise errors.OpPrereqError("No disk info specified and the export"
8374
                                   " is missing the disk information",
8375
                                   errors.ECODE_INVAL)
8376

    
8377
    if (not self.op.nics and
8378
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8379
      nics = []
8380
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8381
        ndict = {}
8382
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8383
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8384
          ndict[name] = v
8385
        nics.append(ndict)
8386
      self.op.nics = nics
8387

    
8388
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8389
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8390

    
8391
    if (self.op.hypervisor is None and
8392
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8393
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8394

    
8395
    if einfo.has_section(constants.INISECT_HYP):
8396
      # use the export parameters but do not override the ones
8397
      # specified by the user
8398
      for name, value in einfo.items(constants.INISECT_HYP):
8399
        if name not in self.op.hvparams:
8400
          self.op.hvparams[name] = value
8401

    
8402
    if einfo.has_section(constants.INISECT_BEP):
8403
      # use the parameters, without overriding
8404
      for name, value in einfo.items(constants.INISECT_BEP):
8405
        if name not in self.op.beparams:
8406
          self.op.beparams[name] = value
8407
    else:
8408
      # try to read the parameters old style, from the main section
8409
      for name in constants.BES_PARAMETERS:
8410
        if (name not in self.op.beparams and
8411
            einfo.has_option(constants.INISECT_INS, name)):
8412
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8413

    
8414
    if einfo.has_section(constants.INISECT_OSP):
8415
      # use the parameters, without overriding
8416
      for name, value in einfo.items(constants.INISECT_OSP):
8417
        if name not in self.op.osparams:
8418
          self.op.osparams[name] = value
8419

    
8420
  def _RevertToDefaults(self, cluster):
8421
    """Revert the instance parameters to the default values.
8422

8423
    """
8424
    # hvparams
8425
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8426
    for name in self.op.hvparams.keys():
8427
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8428
        del self.op.hvparams[name]
8429
    # beparams
8430
    be_defs = cluster.SimpleFillBE({})
8431
    for name in self.op.beparams.keys():
8432
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8433
        del self.op.beparams[name]
8434
    # nic params
8435
    nic_defs = cluster.SimpleFillNIC({})
8436
    for nic in self.op.nics:
8437
      for name in constants.NICS_PARAMETERS:
8438
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8439
          del nic[name]
8440
    # osparams
8441
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8442
    for name in self.op.osparams.keys():
8443
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8444
        del self.op.osparams[name]
8445

    
8446
  def _CalculateFileStorageDir(self):
8447
    """Calculate final instance file storage dir.
8448

8449
    """
8450
    # file storage dir calculation/check
8451
    self.instance_file_storage_dir = None
8452
    if self.op.disk_template in constants.DTS_FILEBASED:
8453
      # build the full file storage dir path
8454
      joinargs = []
8455

    
8456
      if self.op.disk_template == constants.DT_SHARED_FILE:
8457
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8458
      else:
8459
        get_fsd_fn = self.cfg.GetFileStorageDir
8460

    
8461
      cfg_storagedir = get_fsd_fn()
8462
      if not cfg_storagedir:
8463
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8464
      joinargs.append(cfg_storagedir)
8465

    
8466
      if self.op.file_storage_dir is not None:
8467
        joinargs.append(self.op.file_storage_dir)
8468

    
8469
      joinargs.append(self.op.instance_name)
8470

    
8471
      # pylint: disable-msg=W0142
8472
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8473

    
8474
  def CheckPrereq(self):
8475
    """Check prerequisites.
8476

8477
    """
8478
    self._CalculateFileStorageDir()
8479

    
8480
    if self.op.mode == constants.INSTANCE_IMPORT:
8481
      export_info = self._ReadExportInfo()
8482
      self._ReadExportParams(export_info)
8483

    
8484
    if (not self.cfg.GetVGName() and
8485
        self.op.disk_template not in constants.DTS_NOT_LVM):
8486
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8487
                                 " instances", errors.ECODE_STATE)
8488

    
8489
    if self.op.hypervisor is None:
8490
      self.op.hypervisor = self.cfg.GetHypervisorType()
8491

    
8492
    cluster = self.cfg.GetClusterInfo()
8493
    enabled_hvs = cluster.enabled_hypervisors
8494
    if self.op.hypervisor not in enabled_hvs:
8495
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8496
                                 " cluster (%s)" % (self.op.hypervisor,
8497
                                  ",".join(enabled_hvs)),
8498
                                 errors.ECODE_STATE)
8499

    
8500
    # Check tag validity
8501
    for tag in self.op.tags:
8502
      objects.TaggableObject.ValidateTag(tag)
8503

    
8504
    # check hypervisor parameter syntax (locally)
8505
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8506
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8507
                                      self.op.hvparams)
8508
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8509
    hv_type.CheckParameterSyntax(filled_hvp)
8510
    self.hv_full = filled_hvp
8511
    # check that we don't specify global parameters on an instance
8512
    _CheckGlobalHvParams(self.op.hvparams)
8513

    
8514
    # fill and remember the beparams dict
8515
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8516
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8517

    
8518
    # build os parameters
8519
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8520

    
8521
    # now that hvp/bep are in final format, let's reset to defaults,
8522
    # if told to do so
8523
    if self.op.identify_defaults:
8524
      self._RevertToDefaults(cluster)
8525

    
8526
    # NIC buildup
8527
    self.nics = []
8528
    for idx, nic in enumerate(self.op.nics):
8529
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8530
      nic_mode = nic_mode_req
8531
      if nic_mode is None:
8532
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8533

    
8534
      # in routed mode, for the first nic, the default ip is 'auto'
8535
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8536
        default_ip_mode = constants.VALUE_AUTO
8537
      else:
8538
        default_ip_mode = constants.VALUE_NONE
8539

    
8540
      # ip validity checks
8541
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8542
      if ip is None or ip.lower() == constants.VALUE_NONE:
8543
        nic_ip = None
8544
      elif ip.lower() == constants.VALUE_AUTO:
8545
        if not self.op.name_check:
8546
          raise errors.OpPrereqError("IP address set to auto but name checks"
8547
                                     " have been skipped",
8548
                                     errors.ECODE_INVAL)
8549
        nic_ip = self.hostname1.ip
8550
      else:
8551
        if not netutils.IPAddress.IsValid(ip):
8552
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8553
                                     errors.ECODE_INVAL)
8554
        nic_ip = ip
8555

    
8556
      # TODO: check the ip address for uniqueness
8557
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8558
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8559
                                   errors.ECODE_INVAL)
8560

    
8561
      # MAC address verification
8562
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8563
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8564
        mac = utils.NormalizeAndValidateMac(mac)
8565

    
8566
        try:
8567
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8568
        except errors.ReservationError:
8569
          raise errors.OpPrereqError("MAC address %s already in use"
8570
                                     " in cluster" % mac,
8571
                                     errors.ECODE_NOTUNIQUE)
8572

    
8573
      #  Build nic parameters
8574
      link = nic.get(constants.INIC_LINK, None)
8575
      nicparams = {}
8576
      if nic_mode_req:
8577
        nicparams[constants.NIC_MODE] = nic_mode_req
8578
      if link:
8579
        nicparams[constants.NIC_LINK] = link
8580

    
8581
      check_params = cluster.SimpleFillNIC(nicparams)
8582
      objects.NIC.CheckParameterSyntax(check_params)
8583
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8584

    
8585
    # disk checks/pre-build
8586
    default_vg = self.cfg.GetVGName()
8587
    self.disks = []
8588
    for disk in self.op.disks:
8589
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8590
      if mode not in constants.DISK_ACCESS_SET:
8591
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8592
                                   mode, errors.ECODE_INVAL)
8593
      size = disk.get(constants.IDISK_SIZE, None)
8594
      if size is None:
8595
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8596
      try:
8597
        size = int(size)
8598
      except (TypeError, ValueError):
8599
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8600
                                   errors.ECODE_INVAL)
8601

    
8602
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8603
      new_disk = {
8604
        constants.IDISK_SIZE: size,
8605
        constants.IDISK_MODE: mode,
8606
        constants.IDISK_VG: data_vg,
8607
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8608
        }
8609
      if constants.IDISK_ADOPT in disk:
8610
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8611
      self.disks.append(new_disk)
8612

    
8613
    if self.op.mode == constants.INSTANCE_IMPORT:
8614

    
8615
      # Check that the new instance doesn't have less disks than the export
8616
      instance_disks = len(self.disks)
8617
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8618
      if instance_disks < export_disks:
8619
        raise errors.OpPrereqError("Not enough disks to import."
8620
                                   " (instance: %d, export: %d)" %
8621
                                   (instance_disks, export_disks),
8622
                                   errors.ECODE_INVAL)
8623

    
8624
      disk_images = []
8625
      for idx in range(export_disks):
8626
        option = "disk%d_dump" % idx
8627
        if export_info.has_option(constants.INISECT_INS, option):
8628
          # FIXME: are the old os-es, disk sizes, etc. useful?
8629
          export_name = export_info.get(constants.INISECT_INS, option)
8630
          image = utils.PathJoin(self.op.src_path, export_name)
8631
          disk_images.append(image)
8632
        else:
8633
          disk_images.append(False)
8634

    
8635
      self.src_images = disk_images
8636

    
8637
      old_name = export_info.get(constants.INISECT_INS, "name")
8638
      try:
8639
        exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8640
      except (TypeError, ValueError), err:
8641
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8642
                                   " an integer: %s" % str(err),
8643
                                   errors.ECODE_STATE)
8644
      if self.op.instance_name == old_name:
8645
        for idx, nic in enumerate(self.nics):
8646
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8647
            nic_mac_ini = "nic%d_mac" % idx
8648
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8649

    
8650
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8651

    
8652
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8653
    if self.op.ip_check:
8654
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8655
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8656
                                   (self.check_ip, self.op.instance_name),
8657
                                   errors.ECODE_NOTUNIQUE)
8658

    
8659
    #### mac address generation
8660
    # By generating here the mac address both the allocator and the hooks get
8661
    # the real final mac address rather than the 'auto' or 'generate' value.
8662
    # There is a race condition between the generation and the instance object
8663
    # creation, which means that we know the mac is valid now, but we're not
8664
    # sure it will be when we actually add the instance. If things go bad
8665
    # adding the instance will abort because of a duplicate mac, and the
8666
    # creation job will fail.
8667
    for nic in self.nics:
8668
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8669
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8670

    
8671
    #### allocator run
8672

    
8673
    if self.op.iallocator is not None:
8674
      self._RunAllocator()
8675

    
8676
    #### node related checks
8677

    
8678
    # check primary node
8679
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8680
    assert self.pnode is not None, \
8681
      "Cannot retrieve locked node %s" % self.op.pnode
8682
    if pnode.offline:
8683
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8684
                                 pnode.name, errors.ECODE_STATE)
8685
    if pnode.drained:
8686
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8687
                                 pnode.name, errors.ECODE_STATE)
8688
    if not pnode.vm_capable:
8689
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8690
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8691

    
8692
    self.secondaries = []
8693

    
8694
    # mirror node verification
8695
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8696
      if self.op.snode == pnode.name:
8697
        raise errors.OpPrereqError("The secondary node cannot be the"
8698
                                   " primary node", errors.ECODE_INVAL)
8699
      _CheckNodeOnline(self, self.op.snode)
8700
      _CheckNodeNotDrained(self, self.op.snode)
8701
      _CheckNodeVmCapable(self, self.op.snode)
8702
      self.secondaries.append(self.op.snode)
8703

    
8704
    nodenames = [pnode.name] + self.secondaries
8705

    
8706
    if not self.adopt_disks:
8707
      # Check lv size requirements, if not adopting
8708
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8709
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8710

    
8711
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8712
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8713
                                disk[constants.IDISK_ADOPT])
8714
                     for disk in self.disks])
8715
      if len(all_lvs) != len(self.disks):
8716
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8717
                                   errors.ECODE_INVAL)
8718
      for lv_name in all_lvs:
8719
        try:
8720
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8721
          # to ReserveLV uses the same syntax
8722
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8723
        except errors.ReservationError:
8724
          raise errors.OpPrereqError("LV named %s used by another instance" %
8725
                                     lv_name, errors.ECODE_NOTUNIQUE)
8726

    
8727
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8728
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8729

    
8730
      node_lvs = self.rpc.call_lv_list([pnode.name],
8731
                                       vg_names.payload.keys())[pnode.name]
8732
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8733
      node_lvs = node_lvs.payload
8734

    
8735
      delta = all_lvs.difference(node_lvs.keys())
8736
      if delta:
8737
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8738
                                   utils.CommaJoin(delta),
8739
                                   errors.ECODE_INVAL)
8740
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8741
      if online_lvs:
8742
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8743
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8744
                                   errors.ECODE_STATE)
8745
      # update the size of disk based on what is found
8746
      for dsk in self.disks:
8747
        dsk[constants.IDISK_SIZE] = \
8748
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8749
                                        dsk[constants.IDISK_ADOPT])][0]))
8750

    
8751
    elif self.op.disk_template == constants.DT_BLOCK:
8752
      # Normalize and de-duplicate device paths
8753
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8754
                       for disk in self.disks])
8755
      if len(all_disks) != len(self.disks):
8756
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8757
                                   errors.ECODE_INVAL)
8758
      baddisks = [d for d in all_disks
8759
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8760
      if baddisks:
8761
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8762
                                   " cannot be adopted" %
8763
                                   (", ".join(baddisks),
8764
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8765
                                   errors.ECODE_INVAL)
8766

    
8767
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8768
                                            list(all_disks))[pnode.name]
8769
      node_disks.Raise("Cannot get block device information from node %s" %
8770
                       pnode.name)
8771
      node_disks = node_disks.payload
8772
      delta = all_disks.difference(node_disks.keys())
8773
      if delta:
8774
        raise errors.OpPrereqError("Missing block device(s): %s" %
8775
                                   utils.CommaJoin(delta),
8776
                                   errors.ECODE_INVAL)
8777
      for dsk in self.disks:
8778
        dsk[constants.IDISK_SIZE] = \
8779
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8780

    
8781
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8782

    
8783
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8784
    # check OS parameters (remotely)
8785
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8786

    
8787
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8788

    
8789
    # memory check on primary node
8790
    if self.op.start:
8791
      _CheckNodeFreeMemory(self, self.pnode.name,
8792
                           "creating instance %s" % self.op.instance_name,
8793
                           self.be_full[constants.BE_MEMORY],
8794
                           self.op.hypervisor)
8795

    
8796
    self.dry_run_result = list(nodenames)
8797

    
8798
  def Exec(self, feedback_fn):
8799
    """Create and add the instance to the cluster.
8800

8801
    """
8802
    instance = self.op.instance_name
8803
    pnode_name = self.pnode.name
8804

    
8805
    ht_kind = self.op.hypervisor
8806
    if ht_kind in constants.HTS_REQ_PORT:
8807
      network_port = self.cfg.AllocatePort()
8808
    else:
8809
      network_port = None
8810

    
8811
    disks = _GenerateDiskTemplate(self,
8812
                                  self.op.disk_template,
8813
                                  instance, pnode_name,
8814
                                  self.secondaries,
8815
                                  self.disks,
8816
                                  self.instance_file_storage_dir,
8817
                                  self.op.file_driver,
8818
                                  0,
8819
                                  feedback_fn)
8820

    
8821
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8822
                            primary_node=pnode_name,
8823
                            nics=self.nics, disks=disks,
8824
                            disk_template=self.op.disk_template,
8825
                            admin_up=False,
8826
                            network_port=network_port,
8827
                            beparams=self.op.beparams,
8828
                            hvparams=self.op.hvparams,
8829
                            hypervisor=self.op.hypervisor,
8830
                            osparams=self.op.osparams,
8831
                            )
8832

    
8833
    if self.op.tags:
8834
      for tag in self.op.tags:
8835
        iobj.AddTag(tag)
8836

    
8837
    if self.adopt_disks:
8838
      if self.op.disk_template == constants.DT_PLAIN:
8839
        # rename LVs to the newly-generated names; we need to construct
8840
        # 'fake' LV disks with the old data, plus the new unique_id
8841
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8842
        rename_to = []
8843
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8844
          rename_to.append(t_dsk.logical_id)
8845
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8846
          self.cfg.SetDiskID(t_dsk, pnode_name)
8847
        result = self.rpc.call_blockdev_rename(pnode_name,
8848
                                               zip(tmp_disks, rename_to))
8849
        result.Raise("Failed to rename adoped LVs")
8850
    else:
8851
      feedback_fn("* creating instance disks...")
8852
      try:
8853
        _CreateDisks(self, iobj)
8854
      except errors.OpExecError:
8855
        self.LogWarning("Device creation failed, reverting...")
8856
        try:
8857
          _RemoveDisks(self, iobj)
8858
        finally:
8859
          self.cfg.ReleaseDRBDMinors(instance)
8860
          raise
8861

    
8862
    feedback_fn("adding instance %s to cluster config" % instance)
8863

    
8864
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8865

    
8866
    # Declare that we don't want to remove the instance lock anymore, as we've
8867
    # added the instance to the config
8868
    del self.remove_locks[locking.LEVEL_INSTANCE]
8869

    
8870
    if self.op.mode == constants.INSTANCE_IMPORT:
8871
      # Release unused nodes
8872
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8873
    else:
8874
      # Release all nodes
8875
      _ReleaseLocks(self, locking.LEVEL_NODE)
8876

    
8877
    disk_abort = False
8878
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8879
      feedback_fn("* wiping instance disks...")
8880
      try:
8881
        _WipeDisks(self, iobj)
8882
      except errors.OpExecError, err:
8883
        logging.exception("Wiping disks failed")
8884
        self.LogWarning("Wiping instance disks failed (%s)", err)
8885
        disk_abort = True
8886

    
8887
    if disk_abort:
8888
      # Something is already wrong with the disks, don't do anything else
8889
      pass
8890
    elif self.op.wait_for_sync:
8891
      disk_abort = not _WaitForSync(self, iobj)
8892
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8893
      # make sure the disks are not degraded (still sync-ing is ok)
8894
      feedback_fn("* checking mirrors status")
8895
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8896
    else:
8897
      disk_abort = False
8898

    
8899
    if disk_abort:
8900
      _RemoveDisks(self, iobj)
8901
      self.cfg.RemoveInstance(iobj.name)
8902
      # Make sure the instance lock gets removed
8903
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8904
      raise errors.OpExecError("There are some degraded disks for"
8905
                               " this instance")
8906

    
8907
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8908
      if self.op.mode == constants.INSTANCE_CREATE:
8909
        if not self.op.no_install:
8910
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
8911
                        not self.op.wait_for_sync)
8912
          if pause_sync:
8913
            feedback_fn("* pausing disk sync to install instance OS")
8914
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8915
                                                              iobj.disks, True)
8916
            for idx, success in enumerate(result.payload):
8917
              if not success:
8918
                logging.warn("pause-sync of instance %s for disk %d failed",
8919
                             instance, idx)
8920

    
8921
          feedback_fn("* running the instance OS create scripts...")
8922
          # FIXME: pass debug option from opcode to backend
8923
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8924
                                                 self.op.debug_level)
8925
          if pause_sync:
8926
            feedback_fn("* resuming disk sync")
8927
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8928
                                                              iobj.disks, False)
8929
            for idx, success in enumerate(result.payload):
8930
              if not success:
8931
                logging.warn("resume-sync of instance %s for disk %d failed",
8932
                             instance, idx)
8933

    
8934
          result.Raise("Could not add os for instance %s"
8935
                       " on node %s" % (instance, pnode_name))
8936

    
8937
      elif self.op.mode == constants.INSTANCE_IMPORT:
8938
        feedback_fn("* running the instance OS import scripts...")
8939

    
8940
        transfers = []
8941

    
8942
        for idx, image in enumerate(self.src_images):
8943
          if not image:
8944
            continue
8945

    
8946
          # FIXME: pass debug option from opcode to backend
8947
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8948
                                             constants.IEIO_FILE, (image, ),
8949
                                             constants.IEIO_SCRIPT,
8950
                                             (iobj.disks[idx], idx),
8951
                                             None)
8952
          transfers.append(dt)
8953

    
8954
        import_result = \
8955
          masterd.instance.TransferInstanceData(self, feedback_fn,
8956
                                                self.op.src_node, pnode_name,
8957
                                                self.pnode.secondary_ip,
8958
                                                iobj, transfers)
8959
        if not compat.all(import_result):
8960
          self.LogWarning("Some disks for instance %s on node %s were not"
8961
                          " imported successfully" % (instance, pnode_name))
8962

    
8963
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8964
        feedback_fn("* preparing remote import...")
8965
        # The source cluster will stop the instance before attempting to make a
8966
        # connection. In some cases stopping an instance can take a long time,
8967
        # hence the shutdown timeout is added to the connection timeout.
8968
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8969
                           self.op.source_shutdown_timeout)
8970
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8971

    
8972
        assert iobj.primary_node == self.pnode.name
8973
        disk_results = \
8974
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8975
                                        self.source_x509_ca,
8976
                                        self._cds, timeouts)
8977
        if not compat.all(disk_results):
8978
          # TODO: Should the instance still be started, even if some disks
8979
          # failed to import (valid for local imports, too)?
8980
          self.LogWarning("Some disks for instance %s on node %s were not"
8981
                          " imported successfully" % (instance, pnode_name))
8982

    
8983
        # Run rename script on newly imported instance
8984
        assert iobj.name == instance
8985
        feedback_fn("Running rename script for %s" % instance)
8986
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8987
                                                   self.source_instance_name,
8988
                                                   self.op.debug_level)
8989
        if result.fail_msg:
8990
          self.LogWarning("Failed to run rename script for %s on node"
8991
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
8992

    
8993
      else:
8994
        # also checked in the prereq part
8995
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8996
                                     % self.op.mode)
8997

    
8998
    if self.op.start:
8999
      iobj.admin_up = True
9000
      self.cfg.Update(iobj, feedback_fn)
9001
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9002
      feedback_fn("* starting instance...")
9003
      result = self.rpc.call_instance_start(pnode_name, iobj,
9004
                                            None, None, False)
9005
      result.Raise("Could not start instance")
9006

    
9007
    return list(iobj.all_nodes)
9008

    
9009

    
9010
class LUInstanceConsole(NoHooksLU):
9011
  """Connect to an instance's console.
9012

9013
  This is somewhat special in that it returns the command line that
9014
  you need to run on the master node in order to connect to the
9015
  console.
9016

9017
  """
9018
  REQ_BGL = False
9019

    
9020
  def ExpandNames(self):
9021
    self._ExpandAndLockInstance()
9022

    
9023
  def CheckPrereq(self):
9024
    """Check prerequisites.
9025

9026
    This checks that the instance is in the cluster.
9027

9028
    """
9029
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9030
    assert self.instance is not None, \
9031
      "Cannot retrieve locked instance %s" % self.op.instance_name
9032
    _CheckNodeOnline(self, self.instance.primary_node)
9033

    
9034
  def Exec(self, feedback_fn):
9035
    """Connect to the console of an instance
9036

9037
    """
9038
    instance = self.instance
9039
    node = instance.primary_node
9040

    
9041
    node_insts = self.rpc.call_instance_list([node],
9042
                                             [instance.hypervisor])[node]
9043
    node_insts.Raise("Can't get node information from %s" % node)
9044

    
9045
    if instance.name not in node_insts.payload:
9046
      if instance.admin_up:
9047
        state = constants.INSTST_ERRORDOWN
9048
      else:
9049
        state = constants.INSTST_ADMINDOWN
9050
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9051
                               (instance.name, state))
9052

    
9053
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9054

    
9055
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9056

    
9057

    
9058
def _GetInstanceConsole(cluster, instance):
9059
  """Returns console information for an instance.
9060

9061
  @type cluster: L{objects.Cluster}
9062
  @type instance: L{objects.Instance}
9063
  @rtype: dict
9064

9065
  """
9066
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9067
  # beparams and hvparams are passed separately, to avoid editing the
9068
  # instance and then saving the defaults in the instance itself.
9069
  hvparams = cluster.FillHV(instance)
9070
  beparams = cluster.FillBE(instance)
9071
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9072

    
9073
  assert console.instance == instance.name
9074
  assert console.Validate()
9075

    
9076
  return console.ToDict()
9077

    
9078

    
9079
class LUInstanceReplaceDisks(LogicalUnit):
9080
  """Replace the disks of an instance.
9081

9082
  """
9083
  HPATH = "mirrors-replace"
9084
  HTYPE = constants.HTYPE_INSTANCE
9085
  REQ_BGL = False
9086

    
9087
  def CheckArguments(self):
9088
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9089
                                  self.op.iallocator)
9090

    
9091
  def ExpandNames(self):
9092
    self._ExpandAndLockInstance()
9093

    
9094
    assert locking.LEVEL_NODE not in self.needed_locks
9095
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9096

    
9097
    assert self.op.iallocator is None or self.op.remote_node is None, \
9098
      "Conflicting options"
9099

    
9100
    if self.op.remote_node is not None:
9101
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9102

    
9103
      # Warning: do not remove the locking of the new secondary here
9104
      # unless DRBD8.AddChildren is changed to work in parallel;
9105
      # currently it doesn't since parallel invocations of
9106
      # FindUnusedMinor will conflict
9107
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9108
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9109
    else:
9110
      self.needed_locks[locking.LEVEL_NODE] = []
9111
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9112

    
9113
      if self.op.iallocator is not None:
9114
        # iallocator will select a new node in the same group
9115
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9116

    
9117
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9118
                                   self.op.iallocator, self.op.remote_node,
9119
                                   self.op.disks, False, self.op.early_release)
9120

    
9121
    self.tasklets = [self.replacer]
9122

    
9123
  def DeclareLocks(self, level):
9124
    if level == locking.LEVEL_NODEGROUP:
9125
      assert self.op.remote_node is None
9126
      assert self.op.iallocator is not None
9127
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9128

    
9129
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9130
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9131
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9132

    
9133
    elif level == locking.LEVEL_NODE:
9134
      if self.op.iallocator is not None:
9135
        assert self.op.remote_node is None
9136
        assert not self.needed_locks[locking.LEVEL_NODE]
9137

    
9138
        # Lock member nodes of all locked groups
9139
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9140
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9141
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9142
      else:
9143
        self._LockInstancesNodes()
9144

    
9145
  def BuildHooksEnv(self):
9146
    """Build hooks env.
9147

9148
    This runs on the master, the primary and all the secondaries.
9149

9150
    """
9151
    instance = self.replacer.instance
9152
    env = {
9153
      "MODE": self.op.mode,
9154
      "NEW_SECONDARY": self.op.remote_node,
9155
      "OLD_SECONDARY": instance.secondary_nodes[0],
9156
      }
9157
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9158
    return env
9159

    
9160
  def BuildHooksNodes(self):
9161
    """Build hooks nodes.
9162

9163
    """
9164
    instance = self.replacer.instance
9165
    nl = [
9166
      self.cfg.GetMasterNode(),
9167
      instance.primary_node,
9168
      ]
9169
    if self.op.remote_node is not None:
9170
      nl.append(self.op.remote_node)
9171
    return nl, nl
9172

    
9173
  def CheckPrereq(self):
9174
    """Check prerequisites.
9175

9176
    """
9177
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9178
            self.op.iallocator is None)
9179

    
9180
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9181
    if owned_groups:
9182
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9183

    
9184
    return LogicalUnit.CheckPrereq(self)
9185

    
9186

    
9187
class TLReplaceDisks(Tasklet):
9188
  """Replaces disks for an instance.
9189

9190
  Note: Locking is not within the scope of this class.
9191

9192
  """
9193
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9194
               disks, delay_iallocator, early_release):
9195
    """Initializes this class.
9196

9197
    """
9198
    Tasklet.__init__(self, lu)
9199

    
9200
    # Parameters
9201
    self.instance_name = instance_name
9202
    self.mode = mode
9203
    self.iallocator_name = iallocator_name
9204
    self.remote_node = remote_node
9205
    self.disks = disks
9206
    self.delay_iallocator = delay_iallocator
9207
    self.early_release = early_release
9208

    
9209
    # Runtime data
9210
    self.instance = None
9211
    self.new_node = None
9212
    self.target_node = None
9213
    self.other_node = None
9214
    self.remote_node_info = None
9215
    self.node_secondary_ip = None
9216

    
9217
  @staticmethod
9218
  def CheckArguments(mode, remote_node, iallocator):
9219
    """Helper function for users of this class.
9220

9221
    """
9222
    # check for valid parameter combination
9223
    if mode == constants.REPLACE_DISK_CHG:
9224
      if remote_node is None and iallocator is None:
9225
        raise errors.OpPrereqError("When changing the secondary either an"
9226
                                   " iallocator script must be used or the"
9227
                                   " new node given", errors.ECODE_INVAL)
9228

    
9229
      if remote_node is not None and iallocator is not None:
9230
        raise errors.OpPrereqError("Give either the iallocator or the new"
9231
                                   " secondary, not both", errors.ECODE_INVAL)
9232

    
9233
    elif remote_node is not None or iallocator is not None:
9234
      # Not replacing the secondary
9235
      raise errors.OpPrereqError("The iallocator and new node options can"
9236
                                 " only be used when changing the"
9237
                                 " secondary node", errors.ECODE_INVAL)
9238

    
9239
  @staticmethod
9240
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9241
    """Compute a new secondary node using an IAllocator.
9242

9243
    """
9244
    ial = IAllocator(lu.cfg, lu.rpc,
9245
                     mode=constants.IALLOCATOR_MODE_RELOC,
9246
                     name=instance_name,
9247
                     relocate_from=list(relocate_from))
9248

    
9249
    ial.Run(iallocator_name)
9250

    
9251
    if not ial.success:
9252
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9253
                                 " %s" % (iallocator_name, ial.info),
9254
                                 errors.ECODE_NORES)
9255

    
9256
    if len(ial.result) != ial.required_nodes:
9257
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9258
                                 " of nodes (%s), required %s" %
9259
                                 (iallocator_name,
9260
                                  len(ial.result), ial.required_nodes),
9261
                                 errors.ECODE_FAULT)
9262

    
9263
    remote_node_name = ial.result[0]
9264

    
9265
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9266
               instance_name, remote_node_name)
9267

    
9268
    return remote_node_name
9269

    
9270
  def _FindFaultyDisks(self, node_name):
9271
    """Wrapper for L{_FindFaultyInstanceDisks}.
9272

9273
    """
9274
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9275
                                    node_name, True)
9276

    
9277
  def _CheckDisksActivated(self, instance):
9278
    """Checks if the instance disks are activated.
9279

9280
    @param instance: The instance to check disks
9281
    @return: True if they are activated, False otherwise
9282

9283
    """
9284
    nodes = instance.all_nodes
9285

    
9286
    for idx, dev in enumerate(instance.disks):
9287
      for node in nodes:
9288
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9289
        self.cfg.SetDiskID(dev, node)
9290

    
9291
        result = self.rpc.call_blockdev_find(node, dev)
9292

    
9293
        if result.offline:
9294
          continue
9295
        elif result.fail_msg or not result.payload:
9296
          return False
9297

    
9298
    return True
9299

    
9300
  def CheckPrereq(self):
9301
    """Check prerequisites.
9302

9303
    This checks that the instance is in the cluster.
9304

9305
    """
9306
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9307
    assert instance is not None, \
9308
      "Cannot retrieve locked instance %s" % self.instance_name
9309

    
9310
    if instance.disk_template != constants.DT_DRBD8:
9311
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9312
                                 " instances", errors.ECODE_INVAL)
9313

    
9314
    if len(instance.secondary_nodes) != 1:
9315
      raise errors.OpPrereqError("The instance has a strange layout,"
9316
                                 " expected one secondary but found %d" %
9317
                                 len(instance.secondary_nodes),
9318
                                 errors.ECODE_FAULT)
9319

    
9320
    if not self.delay_iallocator:
9321
      self._CheckPrereq2()
9322

    
9323
  def _CheckPrereq2(self):
9324
    """Check prerequisites, second part.
9325

9326
    This function should always be part of CheckPrereq. It was separated and is
9327
    now called from Exec because during node evacuation iallocator was only
9328
    called with an unmodified cluster model, not taking planned changes into
9329
    account.
9330

9331
    """
9332
    instance = self.instance
9333
    secondary_node = instance.secondary_nodes[0]
9334

    
9335
    if self.iallocator_name is None:
9336
      remote_node = self.remote_node
9337
    else:
9338
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9339
                                       instance.name, instance.secondary_nodes)
9340

    
9341
    if remote_node is None:
9342
      self.remote_node_info = None
9343
    else:
9344
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9345
             "Remote node '%s' is not locked" % remote_node
9346

    
9347
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9348
      assert self.remote_node_info is not None, \
9349
        "Cannot retrieve locked node %s" % remote_node
9350

    
9351
    if remote_node == self.instance.primary_node:
9352
      raise errors.OpPrereqError("The specified node is the primary node of"
9353
                                 " the instance", errors.ECODE_INVAL)
9354

    
9355
    if remote_node == secondary_node:
9356
      raise errors.OpPrereqError("The specified node is already the"
9357
                                 " secondary node of the instance",
9358
                                 errors.ECODE_INVAL)
9359

    
9360
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9361
                                    constants.REPLACE_DISK_CHG):
9362
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9363
                                 errors.ECODE_INVAL)
9364

    
9365
    if self.mode == constants.REPLACE_DISK_AUTO:
9366
      if not self._CheckDisksActivated(instance):
9367
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9368
                                   " first" % self.instance_name,
9369
                                   errors.ECODE_STATE)
9370
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9371
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9372

    
9373
      if faulty_primary and faulty_secondary:
9374
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9375
                                   " one node and can not be repaired"
9376
                                   " automatically" % self.instance_name,
9377
                                   errors.ECODE_STATE)
9378

    
9379
      if faulty_primary:
9380
        self.disks = faulty_primary
9381
        self.target_node = instance.primary_node
9382
        self.other_node = secondary_node
9383
        check_nodes = [self.target_node, self.other_node]
9384
      elif faulty_secondary:
9385
        self.disks = faulty_secondary
9386
        self.target_node = secondary_node
9387
        self.other_node = instance.primary_node
9388
        check_nodes = [self.target_node, self.other_node]
9389
      else:
9390
        self.disks = []
9391
        check_nodes = []
9392

    
9393
    else:
9394
      # Non-automatic modes
9395
      if self.mode == constants.REPLACE_DISK_PRI:
9396
        self.target_node = instance.primary_node
9397
        self.other_node = secondary_node
9398
        check_nodes = [self.target_node, self.other_node]
9399

    
9400
      elif self.mode == constants.REPLACE_DISK_SEC:
9401
        self.target_node = secondary_node
9402
        self.other_node = instance.primary_node
9403
        check_nodes = [self.target_node, self.other_node]
9404

    
9405
      elif self.mode == constants.REPLACE_DISK_CHG:
9406
        self.new_node = remote_node
9407
        self.other_node = instance.primary_node
9408
        self.target_node = secondary_node
9409
        check_nodes = [self.new_node, self.other_node]
9410

    
9411
        _CheckNodeNotDrained(self.lu, remote_node)
9412
        _CheckNodeVmCapable(self.lu, remote_node)
9413

    
9414
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9415
        assert old_node_info is not None
9416
        if old_node_info.offline and not self.early_release:
9417
          # doesn't make sense to delay the release
9418
          self.early_release = True
9419
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9420
                          " early-release mode", secondary_node)
9421

    
9422
      else:
9423
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9424
                                     self.mode)
9425

    
9426
      # If not specified all disks should be replaced
9427
      if not self.disks:
9428
        self.disks = range(len(self.instance.disks))
9429

    
9430
    for node in check_nodes:
9431
      _CheckNodeOnline(self.lu, node)
9432

    
9433
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9434
                                                          self.other_node,
9435
                                                          self.target_node]
9436
                              if node_name is not None)
9437

    
9438
    # Release unneeded node locks
9439
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9440

    
9441
    # Release any owned node group
9442
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9443
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9444

    
9445
    # Check whether disks are valid
9446
    for disk_idx in self.disks:
9447
      instance.FindDisk(disk_idx)
9448

    
9449
    # Get secondary node IP addresses
9450
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9451
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9452

    
9453
  def Exec(self, feedback_fn):
9454
    """Execute disk replacement.
9455

9456
    This dispatches the disk replacement to the appropriate handler.
9457

9458
    """
9459
    if self.delay_iallocator:
9460
      self._CheckPrereq2()
9461

    
9462
    if __debug__:
9463
      # Verify owned locks before starting operation
9464
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9465
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9466
          ("Incorrect node locks, owning %s, expected %s" %
9467
           (owned_nodes, self.node_secondary_ip.keys()))
9468

    
9469
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9470
      assert list(owned_instances) == [self.instance_name], \
9471
          "Instance '%s' not locked" % self.instance_name
9472

    
9473
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9474
          "Should not own any node group lock at this point"
9475

    
9476
    if not self.disks:
9477
      feedback_fn("No disks need replacement")
9478
      return
9479

    
9480
    feedback_fn("Replacing disk(s) %s for %s" %
9481
                (utils.CommaJoin(self.disks), self.instance.name))
9482

    
9483
    activate_disks = (not self.instance.admin_up)
9484

    
9485
    # Activate the instance disks if we're replacing them on a down instance
9486
    if activate_disks:
9487
      _StartInstanceDisks(self.lu, self.instance, True)
9488

    
9489
    try:
9490
      # Should we replace the secondary node?
9491
      if self.new_node is not None:
9492
        fn = self._ExecDrbd8Secondary
9493
      else:
9494
        fn = self._ExecDrbd8DiskOnly
9495

    
9496
      result = fn(feedback_fn)
9497
    finally:
9498
      # Deactivate the instance disks if we're replacing them on a
9499
      # down instance
9500
      if activate_disks:
9501
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9502

    
9503
    if __debug__:
9504
      # Verify owned locks
9505
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9506
      nodes = frozenset(self.node_secondary_ip)
9507
      assert ((self.early_release and not owned_nodes) or
9508
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9509
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9510
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9511

    
9512
    return result
9513

    
9514
  def _CheckVolumeGroup(self, nodes):
9515
    self.lu.LogInfo("Checking volume groups")
9516

    
9517
    vgname = self.cfg.GetVGName()
9518

    
9519
    # Make sure volume group exists on all involved nodes
9520
    results = self.rpc.call_vg_list(nodes)
9521
    if not results:
9522
      raise errors.OpExecError("Can't list volume groups on the nodes")
9523

    
9524
    for node in nodes:
9525
      res = results[node]
9526
      res.Raise("Error checking node %s" % node)
9527
      if vgname not in res.payload:
9528
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9529
                                 (vgname, node))
9530

    
9531
  def _CheckDisksExistence(self, nodes):
9532
    # Check disk existence
9533
    for idx, dev in enumerate(self.instance.disks):
9534
      if idx not in self.disks:
9535
        continue
9536

    
9537
      for node in nodes:
9538
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9539
        self.cfg.SetDiskID(dev, node)
9540

    
9541
        result = self.rpc.call_blockdev_find(node, dev)
9542

    
9543
        msg = result.fail_msg
9544
        if msg or not result.payload:
9545
          if not msg:
9546
            msg = "disk not found"
9547
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9548
                                   (idx, node, msg))
9549

    
9550
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9551
    for idx, dev in enumerate(self.instance.disks):
9552
      if idx not in self.disks:
9553
        continue
9554

    
9555
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9556
                      (idx, node_name))
9557

    
9558
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9559
                                   ldisk=ldisk):
9560
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9561
                                 " replace disks for instance %s" %
9562
                                 (node_name, self.instance.name))
9563

    
9564
  def _CreateNewStorage(self, node_name):
9565
    """Create new storage on the primary or secondary node.
9566

9567
    This is only used for same-node replaces, not for changing the
9568
    secondary node, hence we don't want to modify the existing disk.
9569

9570
    """
9571
    iv_names = {}
9572

    
9573
    for idx, dev in enumerate(self.instance.disks):
9574
      if idx not in self.disks:
9575
        continue
9576

    
9577
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9578

    
9579
      self.cfg.SetDiskID(dev, node_name)
9580

    
9581
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9582
      names = _GenerateUniqueNames(self.lu, lv_names)
9583

    
9584
      vg_data = dev.children[0].logical_id[0]
9585
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9586
                             logical_id=(vg_data, names[0]))
9587
      vg_meta = dev.children[1].logical_id[0]
9588
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9589
                             logical_id=(vg_meta, names[1]))
9590

    
9591
      new_lvs = [lv_data, lv_meta]
9592
      old_lvs = [child.Copy() for child in dev.children]
9593
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9594

    
9595
      # we pass force_create=True to force the LVM creation
9596
      for new_lv in new_lvs:
9597
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9598
                        _GetInstanceInfoText(self.instance), False)
9599

    
9600
    return iv_names
9601

    
9602
  def _CheckDevices(self, node_name, iv_names):
9603
    for name, (dev, _, _) in iv_names.iteritems():
9604
      self.cfg.SetDiskID(dev, node_name)
9605

    
9606
      result = self.rpc.call_blockdev_find(node_name, dev)
9607

    
9608
      msg = result.fail_msg
9609
      if msg or not result.payload:
9610
        if not msg:
9611
          msg = "disk not found"
9612
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9613
                                 (name, msg))
9614

    
9615
      if result.payload.is_degraded:
9616
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9617

    
9618
  def _RemoveOldStorage(self, node_name, iv_names):
9619
    for name, (_, old_lvs, _) in iv_names.iteritems():
9620
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9621

    
9622
      for lv in old_lvs:
9623
        self.cfg.SetDiskID(lv, node_name)
9624

    
9625
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9626
        if msg:
9627
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9628
                             hint="remove unused LVs manually")
9629

    
9630
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
9631
    """Replace a disk on the primary or secondary for DRBD 8.
9632

9633
    The algorithm for replace is quite complicated:
9634

9635
      1. for each disk to be replaced:
9636

9637
        1. create new LVs on the target node with unique names
9638
        1. detach old LVs from the drbd device
9639
        1. rename old LVs to name_replaced.<time_t>
9640
        1. rename new LVs to old LVs
9641
        1. attach the new LVs (with the old names now) to the drbd device
9642

9643
      1. wait for sync across all devices
9644

9645
      1. for each modified disk:
9646

9647
        1. remove old LVs (which have the name name_replaces.<time_t>)
9648

9649
    Failures are not very well handled.
9650

9651
    """
9652
    steps_total = 6
9653

    
9654
    # Step: check device activation
9655
    self.lu.LogStep(1, steps_total, "Check device existence")
9656
    self._CheckDisksExistence([self.other_node, self.target_node])
9657
    self._CheckVolumeGroup([self.target_node, self.other_node])
9658

    
9659
    # Step: check other node consistency
9660
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9661
    self._CheckDisksConsistency(self.other_node,
9662
                                self.other_node == self.instance.primary_node,
9663
                                False)
9664

    
9665
    # Step: create new storage
9666
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9667
    iv_names = self._CreateNewStorage(self.target_node)
9668

    
9669
    # Step: for each lv, detach+rename*2+attach
9670
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9671
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9672
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9673

    
9674
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9675
                                                     old_lvs)
9676
      result.Raise("Can't detach drbd from local storage on node"
9677
                   " %s for device %s" % (self.target_node, dev.iv_name))
9678
      #dev.children = []
9679
      #cfg.Update(instance)
9680

    
9681
      # ok, we created the new LVs, so now we know we have the needed
9682
      # storage; as such, we proceed on the target node to rename
9683
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9684
      # using the assumption that logical_id == physical_id (which in
9685
      # turn is the unique_id on that node)
9686

    
9687
      # FIXME(iustin): use a better name for the replaced LVs
9688
      temp_suffix = int(time.time())
9689
      ren_fn = lambda d, suff: (d.physical_id[0],
9690
                                d.physical_id[1] + "_replaced-%s" % suff)
9691

    
9692
      # Build the rename list based on what LVs exist on the node
9693
      rename_old_to_new = []
9694
      for to_ren in old_lvs:
9695
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9696
        if not result.fail_msg and result.payload:
9697
          # device exists
9698
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9699

    
9700
      self.lu.LogInfo("Renaming the old LVs on the target node")
9701
      result = self.rpc.call_blockdev_rename(self.target_node,
9702
                                             rename_old_to_new)
9703
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9704

    
9705
      # Now we rename the new LVs to the old LVs
9706
      self.lu.LogInfo("Renaming the new LVs on the target node")
9707
      rename_new_to_old = [(new, old.physical_id)
9708
                           for old, new in zip(old_lvs, new_lvs)]
9709
      result = self.rpc.call_blockdev_rename(self.target_node,
9710
                                             rename_new_to_old)
9711
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9712

    
9713
      # Intermediate steps of in memory modifications
9714
      for old, new in zip(old_lvs, new_lvs):
9715
        new.logical_id = old.logical_id
9716
        self.cfg.SetDiskID(new, self.target_node)
9717

    
9718
      # We need to modify old_lvs so that removal later removes the
9719
      # right LVs, not the newly added ones; note that old_lvs is a
9720
      # copy here
9721
      for disk in old_lvs:
9722
        disk.logical_id = ren_fn(disk, temp_suffix)
9723
        self.cfg.SetDiskID(disk, self.target_node)
9724

    
9725
      # Now that the new lvs have the old name, we can add them to the device
9726
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9727
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9728
                                                  new_lvs)
9729
      msg = result.fail_msg
9730
      if msg:
9731
        for new_lv in new_lvs:
9732
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9733
                                               new_lv).fail_msg
9734
          if msg2:
9735
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9736
                               hint=("cleanup manually the unused logical"
9737
                                     "volumes"))
9738
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9739

    
9740
    cstep = 5
9741
    if self.early_release:
9742
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9743
      cstep += 1
9744
      self._RemoveOldStorage(self.target_node, iv_names)
9745
      # WARNING: we release both node locks here, do not do other RPCs
9746
      # than WaitForSync to the primary node
9747
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9748
                    names=[self.target_node, self.other_node])
9749

    
9750
    # Wait for sync
9751
    # This can fail as the old devices are degraded and _WaitForSync
9752
    # does a combined result over all disks, so we don't check its return value
9753
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9754
    cstep += 1
9755
    _WaitForSync(self.lu, self.instance)
9756

    
9757
    # Check all devices manually
9758
    self._CheckDevices(self.instance.primary_node, iv_names)
9759

    
9760
    # Step: remove old storage
9761
    if not self.early_release:
9762
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9763
      cstep += 1
9764
      self._RemoveOldStorage(self.target_node, iv_names)
9765

    
9766
  def _ExecDrbd8Secondary(self, feedback_fn):
9767
    """Replace the secondary node for DRBD 8.
9768

9769
    The algorithm for replace is quite complicated:
9770
      - for all disks of the instance:
9771
        - create new LVs on the new node with same names
9772
        - shutdown the drbd device on the old secondary
9773
        - disconnect the drbd network on the primary
9774
        - create the drbd device on the new secondary
9775
        - network attach the drbd on the primary, using an artifice:
9776
          the drbd code for Attach() will connect to the network if it
9777
          finds a device which is connected to the good local disks but
9778
          not network enabled
9779
      - wait for sync across all devices
9780
      - remove all disks from the old secondary
9781

9782
    Failures are not very well handled.
9783

9784
    """
9785
    steps_total = 6
9786

    
9787
    # Step: check device activation
9788
    self.lu.LogStep(1, steps_total, "Check device existence")
9789
    self._CheckDisksExistence([self.instance.primary_node])
9790
    self._CheckVolumeGroup([self.instance.primary_node])
9791

    
9792
    # Step: check other node consistency
9793
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9794
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9795

    
9796
    # Step: create new storage
9797
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9798
    for idx, dev in enumerate(self.instance.disks):
9799
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9800
                      (self.new_node, idx))
9801
      # we pass force_create=True to force LVM creation
9802
      for new_lv in dev.children:
9803
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9804
                        _GetInstanceInfoText(self.instance), False)
9805

    
9806
    # Step 4: dbrd minors and drbd setups changes
9807
    # after this, we must manually remove the drbd minors on both the
9808
    # error and the success paths
9809
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9810
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9811
                                         for dev in self.instance.disks],
9812
                                        self.instance.name)
9813
    logging.debug("Allocated minors %r", minors)
9814

    
9815
    iv_names = {}
9816
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9817
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9818
                      (self.new_node, idx))
9819
      # create new devices on new_node; note that we create two IDs:
9820
      # one without port, so the drbd will be activated without
9821
      # networking information on the new node at this stage, and one
9822
      # with network, for the latter activation in step 4
9823
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9824
      if self.instance.primary_node == o_node1:
9825
        p_minor = o_minor1
9826
      else:
9827
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9828
        p_minor = o_minor2
9829

    
9830
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9831
                      p_minor, new_minor, o_secret)
9832
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9833
                    p_minor, new_minor, o_secret)
9834

    
9835
      iv_names[idx] = (dev, dev.children, new_net_id)
9836
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9837
                    new_net_id)
9838
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9839
                              logical_id=new_alone_id,
9840
                              children=dev.children,
9841
                              size=dev.size)
9842
      try:
9843
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9844
                              _GetInstanceInfoText(self.instance), False)
9845
      except errors.GenericError:
9846
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9847
        raise
9848

    
9849
    # We have new devices, shutdown the drbd on the old secondary
9850
    for idx, dev in enumerate(self.instance.disks):
9851
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9852
      self.cfg.SetDiskID(dev, self.target_node)
9853
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9854
      if msg:
9855
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9856
                           "node: %s" % (idx, msg),
9857
                           hint=("Please cleanup this device manually as"
9858
                                 " soon as possible"))
9859

    
9860
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9861
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9862
                                               self.node_secondary_ip,
9863
                                               self.instance.disks)\
9864
                                              [self.instance.primary_node]
9865

    
9866
    msg = result.fail_msg
9867
    if msg:
9868
      # detaches didn't succeed (unlikely)
9869
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9870
      raise errors.OpExecError("Can't detach the disks from the network on"
9871
                               " old node: %s" % (msg,))
9872

    
9873
    # if we managed to detach at least one, we update all the disks of
9874
    # the instance to point to the new secondary
9875
    self.lu.LogInfo("Updating instance configuration")
9876
    for dev, _, new_logical_id in iv_names.itervalues():
9877
      dev.logical_id = new_logical_id
9878
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9879

    
9880
    self.cfg.Update(self.instance, feedback_fn)
9881

    
9882
    # and now perform the drbd attach
9883
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9884
                    " (standalone => connected)")
9885
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9886
                                            self.new_node],
9887
                                           self.node_secondary_ip,
9888
                                           self.instance.disks,
9889
                                           self.instance.name,
9890
                                           False)
9891
    for to_node, to_result in result.items():
9892
      msg = to_result.fail_msg
9893
      if msg:
9894
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9895
                           to_node, msg,
9896
                           hint=("please do a gnt-instance info to see the"
9897
                                 " status of disks"))
9898
    cstep = 5
9899
    if self.early_release:
9900
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9901
      cstep += 1
9902
      self._RemoveOldStorage(self.target_node, iv_names)
9903
      # WARNING: we release all node locks here, do not do other RPCs
9904
      # than WaitForSync to the primary node
9905
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9906
                    names=[self.instance.primary_node,
9907
                           self.target_node,
9908
                           self.new_node])
9909

    
9910
    # Wait for sync
9911
    # This can fail as the old devices are degraded and _WaitForSync
9912
    # does a combined result over all disks, so we don't check its return value
9913
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9914
    cstep += 1
9915
    _WaitForSync(self.lu, self.instance)
9916

    
9917
    # Check all devices manually
9918
    self._CheckDevices(self.instance.primary_node, iv_names)
9919

    
9920
    # Step: remove old storage
9921
    if not self.early_release:
9922
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9923
      self._RemoveOldStorage(self.target_node, iv_names)
9924

    
9925

    
9926
class LURepairNodeStorage(NoHooksLU):
9927
  """Repairs the volume group on a node.
9928

9929
  """
9930
  REQ_BGL = False
9931

    
9932
  def CheckArguments(self):
9933
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9934

    
9935
    storage_type = self.op.storage_type
9936

    
9937
    if (constants.SO_FIX_CONSISTENCY not in
9938
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9939
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
9940
                                 " repaired" % storage_type,
9941
                                 errors.ECODE_INVAL)
9942

    
9943
  def ExpandNames(self):
9944
    self.needed_locks = {
9945
      locking.LEVEL_NODE: [self.op.node_name],
9946
      }
9947

    
9948
  def _CheckFaultyDisks(self, instance, node_name):
9949
    """Ensure faulty disks abort the opcode or at least warn."""
9950
    try:
9951
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9952
                                  node_name, True):
9953
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9954
                                   " node '%s'" % (instance.name, node_name),
9955
                                   errors.ECODE_STATE)
9956
    except errors.OpPrereqError, err:
9957
      if self.op.ignore_consistency:
9958
        self.proc.LogWarning(str(err.args[0]))
9959
      else:
9960
        raise
9961

    
9962
  def CheckPrereq(self):
9963
    """Check prerequisites.
9964

9965
    """
9966
    # Check whether any instance on this node has faulty disks
9967
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9968
      if not inst.admin_up:
9969
        continue
9970
      check_nodes = set(inst.all_nodes)
9971
      check_nodes.discard(self.op.node_name)
9972
      for inst_node_name in check_nodes:
9973
        self._CheckFaultyDisks(inst, inst_node_name)
9974

    
9975
  def Exec(self, feedback_fn):
9976
    feedback_fn("Repairing storage unit '%s' on %s ..." %
9977
                (self.op.name, self.op.node_name))
9978

    
9979
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9980
    result = self.rpc.call_storage_execute(self.op.node_name,
9981
                                           self.op.storage_type, st_args,
9982
                                           self.op.name,
9983
                                           constants.SO_FIX_CONSISTENCY)
9984
    result.Raise("Failed to repair storage unit '%s' on %s" %
9985
                 (self.op.name, self.op.node_name))
9986

    
9987

    
9988
class LUNodeEvacuate(NoHooksLU):
9989
  """Evacuates instances off a list of nodes.
9990

9991
  """
9992
  REQ_BGL = False
9993

    
9994
  def CheckArguments(self):
9995
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9996

    
9997
  def ExpandNames(self):
9998
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9999

    
10000
    if self.op.remote_node is not None:
10001
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10002
      assert self.op.remote_node
10003

    
10004
      if self.op.remote_node == self.op.node_name:
10005
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10006
                                   " secondary node", errors.ECODE_INVAL)
10007

    
10008
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10009
        raise errors.OpPrereqError("Without the use of an iallocator only"
10010
                                   " secondary instances can be evacuated",
10011
                                   errors.ECODE_INVAL)
10012

    
10013
    # Declare locks
10014
    self.share_locks = _ShareAll()
10015
    self.needed_locks = {
10016
      locking.LEVEL_INSTANCE: [],
10017
      locking.LEVEL_NODEGROUP: [],
10018
      locking.LEVEL_NODE: [],
10019
      }
10020

    
10021
    if self.op.remote_node is None:
10022
      # Iallocator will choose any node(s) in the same group
10023
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10024
    else:
10025
      group_nodes = frozenset([self.op.remote_node])
10026

    
10027
    # Determine nodes to be locked
10028
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10029

    
10030
  def _DetermineInstances(self):
10031
    """Builds list of instances to operate on.
10032

10033
    """
10034
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10035

    
10036
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10037
      # Primary instances only
10038
      inst_fn = _GetNodePrimaryInstances
10039
      assert self.op.remote_node is None, \
10040
        "Evacuating primary instances requires iallocator"
10041
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10042
      # Secondary instances only
10043
      inst_fn = _GetNodeSecondaryInstances
10044
    else:
10045
      # All instances
10046
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10047
      inst_fn = _GetNodeInstances
10048

    
10049
    return inst_fn(self.cfg, self.op.node_name)
10050

    
10051
  def DeclareLocks(self, level):
10052
    if level == locking.LEVEL_INSTANCE:
10053
      # Lock instances optimistically, needs verification once node and group
10054
      # locks have been acquired
10055
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10056
        set(i.name for i in self._DetermineInstances())
10057

    
10058
    elif level == locking.LEVEL_NODEGROUP:
10059
      # Lock node groups optimistically, needs verification once nodes have
10060
      # been acquired
10061
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10062
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10063

    
10064
    elif level == locking.LEVEL_NODE:
10065
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10066

    
10067
  def CheckPrereq(self):
10068
    # Verify locks
10069
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10070
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10071
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10072

    
10073
    assert owned_nodes == self.lock_nodes
10074

    
10075
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10076
    if owned_groups != wanted_groups:
10077
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10078
                               " current groups are '%s', used to be '%s'" %
10079
                               (utils.CommaJoin(wanted_groups),
10080
                                utils.CommaJoin(owned_groups)))
10081

    
10082
    # Determine affected instances
10083
    self.instances = self._DetermineInstances()
10084
    self.instance_names = [i.name for i in self.instances]
10085

    
10086
    if set(self.instance_names) != owned_instances:
10087
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10088
                               " were acquired, current instances are '%s',"
10089
                               " used to be '%s'" %
10090
                               (self.op.node_name,
10091
                                utils.CommaJoin(self.instance_names),
10092
                                utils.CommaJoin(owned_instances)))
10093

    
10094
    if self.instance_names:
10095
      self.LogInfo("Evacuating instances from node '%s': %s",
10096
                   self.op.node_name,
10097
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10098
    else:
10099
      self.LogInfo("No instances to evacuate from node '%s'",
10100
                   self.op.node_name)
10101

    
10102
    if self.op.remote_node is not None:
10103
      for i in self.instances:
10104
        if i.primary_node == self.op.remote_node:
10105
          raise errors.OpPrereqError("Node %s is the primary node of"
10106
                                     " instance %s, cannot use it as"
10107
                                     " secondary" %
10108
                                     (self.op.remote_node, i.name),
10109
                                     errors.ECODE_INVAL)
10110

    
10111
  def Exec(self, feedback_fn):
10112
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10113

    
10114
    if not self.instance_names:
10115
      # No instances to evacuate
10116
      jobs = []
10117

    
10118
    elif self.op.iallocator is not None:
10119
      # TODO: Implement relocation to other group
10120
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10121
                       evac_mode=self.op.mode,
10122
                       instances=list(self.instance_names))
10123

    
10124
      ial.Run(self.op.iallocator)
10125

    
10126
      if not ial.success:
10127
        raise errors.OpPrereqError("Can't compute node evacuation using"
10128
                                   " iallocator '%s': %s" %
10129
                                   (self.op.iallocator, ial.info),
10130
                                   errors.ECODE_NORES)
10131

    
10132
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10133

    
10134
    elif self.op.remote_node is not None:
10135
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10136
      jobs = [
10137
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10138
                                        remote_node=self.op.remote_node,
10139
                                        disks=[],
10140
                                        mode=constants.REPLACE_DISK_CHG,
10141
                                        early_release=self.op.early_release)]
10142
        for instance_name in self.instance_names
10143
        ]
10144

    
10145
    else:
10146
      raise errors.ProgrammerError("No iallocator or remote node")
10147

    
10148
    return ResultWithJobs(jobs)
10149

    
10150

    
10151
def _SetOpEarlyRelease(early_release, op):
10152
  """Sets C{early_release} flag on opcodes if available.
10153

10154
  """
10155
  try:
10156
    op.early_release = early_release
10157
  except AttributeError:
10158
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10159

    
10160
  return op
10161

    
10162

    
10163
def _NodeEvacDest(use_nodes, group, nodes):
10164
  """Returns group or nodes depending on caller's choice.
10165

10166
  """
10167
  if use_nodes:
10168
    return utils.CommaJoin(nodes)
10169
  else:
10170
    return group
10171

    
10172

    
10173
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10174
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10175

10176
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10177
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10178

10179
  @type lu: L{LogicalUnit}
10180
  @param lu: Logical unit instance
10181
  @type alloc_result: tuple/list
10182
  @param alloc_result: Result from iallocator
10183
  @type early_release: bool
10184
  @param early_release: Whether to release locks early if possible
10185
  @type use_nodes: bool
10186
  @param use_nodes: Whether to display node names instead of groups
10187

10188
  """
10189
  (moved, failed, jobs) = alloc_result
10190

    
10191
  if failed:
10192
    lu.LogWarning("Unable to evacuate instances %s",
10193
                  utils.CommaJoin("%s (%s)" % (name, reason)
10194
                                  for (name, reason) in failed))
10195

    
10196
  if moved:
10197
    lu.LogInfo("Instances to be moved: %s",
10198
               utils.CommaJoin("%s (to %s)" %
10199
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10200
                               for (name, group, nodes) in moved))
10201

    
10202
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10203
              map(opcodes.OpCode.LoadOpCode, ops))
10204
          for ops in jobs]
10205

    
10206

    
10207
class LUInstanceGrowDisk(LogicalUnit):
10208
  """Grow a disk of an instance.
10209

10210
  """
10211
  HPATH = "disk-grow"
10212
  HTYPE = constants.HTYPE_INSTANCE
10213
  REQ_BGL = False
10214

    
10215
  def ExpandNames(self):
10216
    self._ExpandAndLockInstance()
10217
    self.needed_locks[locking.LEVEL_NODE] = []
10218
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10219

    
10220
  def DeclareLocks(self, level):
10221
    if level == locking.LEVEL_NODE:
10222
      self._LockInstancesNodes()
10223

    
10224
  def BuildHooksEnv(self):
10225
    """Build hooks env.
10226

10227
    This runs on the master, the primary and all the secondaries.
10228

10229
    """
10230
    env = {
10231
      "DISK": self.op.disk,
10232
      "AMOUNT": self.op.amount,
10233
      }
10234
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10235
    return env
10236

    
10237
  def BuildHooksNodes(self):
10238
    """Build hooks nodes.
10239

10240
    """
10241
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10242
    return (nl, nl)
10243

    
10244
  def CheckPrereq(self):
10245
    """Check prerequisites.
10246

10247
    This checks that the instance is in the cluster.
10248

10249
    """
10250
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10251
    assert instance is not None, \
10252
      "Cannot retrieve locked instance %s" % self.op.instance_name
10253
    nodenames = list(instance.all_nodes)
10254
    for node in nodenames:
10255
      _CheckNodeOnline(self, node)
10256

    
10257
    self.instance = instance
10258

    
10259
    if instance.disk_template not in constants.DTS_GROWABLE:
10260
      raise errors.OpPrereqError("Instance's disk layout does not support"
10261
                                 " growing", errors.ECODE_INVAL)
10262

    
10263
    self.disk = instance.FindDisk(self.op.disk)
10264

    
10265
    if instance.disk_template not in (constants.DT_FILE,
10266
                                      constants.DT_SHARED_FILE):
10267
      # TODO: check the free disk space for file, when that feature will be
10268
      # supported
10269
      _CheckNodesFreeDiskPerVG(self, nodenames,
10270
                               self.disk.ComputeGrowth(self.op.amount))
10271

    
10272
  def Exec(self, feedback_fn):
10273
    """Execute disk grow.
10274

10275
    """
10276
    instance = self.instance
10277
    disk = self.disk
10278

    
10279
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10280
    if not disks_ok:
10281
      raise errors.OpExecError("Cannot activate block device to grow")
10282

    
10283
    # First run all grow ops in dry-run mode
10284
    for node in instance.all_nodes:
10285
      self.cfg.SetDiskID(disk, node)
10286
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10287
      result.Raise("Grow request failed to node %s" % node)
10288

    
10289
    # We know that (as far as we can test) operations across different
10290
    # nodes will succeed, time to run it for real
10291
    for node in instance.all_nodes:
10292
      self.cfg.SetDiskID(disk, node)
10293
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10294
      result.Raise("Grow request failed to node %s" % node)
10295

    
10296
      # TODO: Rewrite code to work properly
10297
      # DRBD goes into sync mode for a short amount of time after executing the
10298
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10299
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10300
      # time is a work-around.
10301
      time.sleep(5)
10302

    
10303
    disk.RecordGrow(self.op.amount)
10304
    self.cfg.Update(instance, feedback_fn)
10305
    if self.op.wait_for_sync:
10306
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10307
      if disk_abort:
10308
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10309
                             " status; please check the instance")
10310
      if not instance.admin_up:
10311
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10312
    elif not instance.admin_up:
10313
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10314
                           " not supposed to be running because no wait for"
10315
                           " sync mode was requested")
10316

    
10317

    
10318
class LUInstanceQueryData(NoHooksLU):
10319
  """Query runtime instance data.
10320

10321
  """
10322
  REQ_BGL = False
10323

    
10324
  def ExpandNames(self):
10325
    self.needed_locks = {}
10326

    
10327
    # Use locking if requested or when non-static information is wanted
10328
    if not (self.op.static or self.op.use_locking):
10329
      self.LogWarning("Non-static data requested, locks need to be acquired")
10330
      self.op.use_locking = True
10331

    
10332
    if self.op.instances or not self.op.use_locking:
10333
      # Expand instance names right here
10334
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10335
    else:
10336
      # Will use acquired locks
10337
      self.wanted_names = None
10338

    
10339
    if self.op.use_locking:
10340
      self.share_locks = _ShareAll()
10341

    
10342
      if self.wanted_names is None:
10343
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10344
      else:
10345
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10346

    
10347
      self.needed_locks[locking.LEVEL_NODE] = []
10348
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10349

    
10350
  def DeclareLocks(self, level):
10351
    if self.op.use_locking and level == locking.LEVEL_NODE:
10352
      self._LockInstancesNodes()
10353

    
10354
  def CheckPrereq(self):
10355
    """Check prerequisites.
10356

10357
    This only checks the optional instance list against the existing names.
10358

10359
    """
10360
    if self.wanted_names is None:
10361
      assert self.op.use_locking, "Locking was not used"
10362
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10363

    
10364
    self.wanted_instances = \
10365
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10366

    
10367
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10368
    """Returns the status of a block device
10369

10370
    """
10371
    if self.op.static or not node:
10372
      return None
10373

    
10374
    self.cfg.SetDiskID(dev, node)
10375

    
10376
    result = self.rpc.call_blockdev_find(node, dev)
10377
    if result.offline:
10378
      return None
10379

    
10380
    result.Raise("Can't compute disk status for %s" % instance_name)
10381

    
10382
    status = result.payload
10383
    if status is None:
10384
      return None
10385

    
10386
    return (status.dev_path, status.major, status.minor,
10387
            status.sync_percent, status.estimated_time,
10388
            status.is_degraded, status.ldisk_status)
10389

    
10390
  def _ComputeDiskStatus(self, instance, snode, dev):
10391
    """Compute block device status.
10392

10393
    """
10394
    if dev.dev_type in constants.LDS_DRBD:
10395
      # we change the snode then (otherwise we use the one passed in)
10396
      if dev.logical_id[0] == instance.primary_node:
10397
        snode = dev.logical_id[1]
10398
      else:
10399
        snode = dev.logical_id[0]
10400

    
10401
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10402
                                              instance.name, dev)
10403
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10404

    
10405
    if dev.children:
10406
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10407
                                        instance, snode),
10408
                         dev.children)
10409
    else:
10410
      dev_children = []
10411

    
10412
    return {
10413
      "iv_name": dev.iv_name,
10414
      "dev_type": dev.dev_type,
10415
      "logical_id": dev.logical_id,
10416
      "physical_id": dev.physical_id,
10417
      "pstatus": dev_pstatus,
10418
      "sstatus": dev_sstatus,
10419
      "children": dev_children,
10420
      "mode": dev.mode,
10421
      "size": dev.size,
10422
      }
10423

    
10424
  def Exec(self, feedback_fn):
10425
    """Gather and return data"""
10426
    result = {}
10427

    
10428
    cluster = self.cfg.GetClusterInfo()
10429

    
10430
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10431
                                          for i in self.wanted_instances)
10432
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10433
      if self.op.static or pnode.offline:
10434
        remote_state = None
10435
        if pnode.offline:
10436
          self.LogWarning("Primary node %s is marked offline, returning static"
10437
                          " information only for instance %s" %
10438
                          (pnode.name, instance.name))
10439
      else:
10440
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10441
                                                  instance.name,
10442
                                                  instance.hypervisor)
10443
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10444
        remote_info = remote_info.payload
10445
        if remote_info and "state" in remote_info:
10446
          remote_state = "up"
10447
        else:
10448
          remote_state = "down"
10449

    
10450
      if instance.admin_up:
10451
        config_state = "up"
10452
      else:
10453
        config_state = "down"
10454

    
10455
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10456
                  instance.disks)
10457

    
10458
      result[instance.name] = {
10459
        "name": instance.name,
10460
        "config_state": config_state,
10461
        "run_state": remote_state,
10462
        "pnode": instance.primary_node,
10463
        "snodes": instance.secondary_nodes,
10464
        "os": instance.os,
10465
        # this happens to be the same format used for hooks
10466
        "nics": _NICListToTuple(self, instance.nics),
10467
        "disk_template": instance.disk_template,
10468
        "disks": disks,
10469
        "hypervisor": instance.hypervisor,
10470
        "network_port": instance.network_port,
10471
        "hv_instance": instance.hvparams,
10472
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10473
        "be_instance": instance.beparams,
10474
        "be_actual": cluster.FillBE(instance),
10475
        "os_instance": instance.osparams,
10476
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10477
        "serial_no": instance.serial_no,
10478
        "mtime": instance.mtime,
10479
        "ctime": instance.ctime,
10480
        "uuid": instance.uuid,
10481
        }
10482

    
10483
    return result
10484

    
10485

    
10486
class LUInstanceSetParams(LogicalUnit):
10487
  """Modifies an instances's parameters.
10488

10489
  """
10490
  HPATH = "instance-modify"
10491
  HTYPE = constants.HTYPE_INSTANCE
10492
  REQ_BGL = False
10493

    
10494
  def CheckArguments(self):
10495
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10496
            self.op.hvparams or self.op.beparams or self.op.os_name):
10497
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10498

    
10499
    if self.op.hvparams:
10500
      _CheckGlobalHvParams(self.op.hvparams)
10501

    
10502
    # Disk validation
10503
    disk_addremove = 0
10504
    for disk_op, disk_dict in self.op.disks:
10505
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10506
      if disk_op == constants.DDM_REMOVE:
10507
        disk_addremove += 1
10508
        continue
10509
      elif disk_op == constants.DDM_ADD:
10510
        disk_addremove += 1
10511
      else:
10512
        if not isinstance(disk_op, int):
10513
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10514
        if not isinstance(disk_dict, dict):
10515
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10516
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10517

    
10518
      if disk_op == constants.DDM_ADD:
10519
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10520
        if mode not in constants.DISK_ACCESS_SET:
10521
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10522
                                     errors.ECODE_INVAL)
10523
        size = disk_dict.get(constants.IDISK_SIZE, None)
10524
        if size is None:
10525
          raise errors.OpPrereqError("Required disk parameter size missing",
10526
                                     errors.ECODE_INVAL)
10527
        try:
10528
          size = int(size)
10529
        except (TypeError, ValueError), err:
10530
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10531
                                     str(err), errors.ECODE_INVAL)
10532
        disk_dict[constants.IDISK_SIZE] = size
10533
      else:
10534
        # modification of disk
10535
        if constants.IDISK_SIZE in disk_dict:
10536
          raise errors.OpPrereqError("Disk size change not possible, use"
10537
                                     " grow-disk", errors.ECODE_INVAL)
10538

    
10539
    if disk_addremove > 1:
10540
      raise errors.OpPrereqError("Only one disk add or remove operation"
10541
                                 " supported at a time", errors.ECODE_INVAL)
10542

    
10543
    if self.op.disks and self.op.disk_template is not None:
10544
      raise errors.OpPrereqError("Disk template conversion and other disk"
10545
                                 " changes not supported at the same time",
10546
                                 errors.ECODE_INVAL)
10547

    
10548
    if (self.op.disk_template and
10549
        self.op.disk_template in constants.DTS_INT_MIRROR and
10550
        self.op.remote_node is None):
10551
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10552
                                 " one requires specifying a secondary node",
10553
                                 errors.ECODE_INVAL)
10554

    
10555
    # NIC validation
10556
    nic_addremove = 0
10557
    for nic_op, nic_dict in self.op.nics:
10558
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10559
      if nic_op == constants.DDM_REMOVE:
10560
        nic_addremove += 1
10561
        continue
10562
      elif nic_op == constants.DDM_ADD:
10563
        nic_addremove += 1
10564
      else:
10565
        if not isinstance(nic_op, int):
10566
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10567
        if not isinstance(nic_dict, dict):
10568
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10569
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10570

    
10571
      # nic_dict should be a dict
10572
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10573
      if nic_ip is not None:
10574
        if nic_ip.lower() == constants.VALUE_NONE:
10575
          nic_dict[constants.INIC_IP] = None
10576
        else:
10577
          if not netutils.IPAddress.IsValid(nic_ip):
10578
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10579
                                       errors.ECODE_INVAL)
10580

    
10581
      nic_bridge = nic_dict.get("bridge", None)
10582
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10583
      if nic_bridge and nic_link:
10584
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10585
                                   " at the same time", errors.ECODE_INVAL)
10586
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10587
        nic_dict["bridge"] = None
10588
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10589
        nic_dict[constants.INIC_LINK] = None
10590

    
10591
      if nic_op == constants.DDM_ADD:
10592
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10593
        if nic_mac is None:
10594
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10595

    
10596
      if constants.INIC_MAC in nic_dict:
10597
        nic_mac = nic_dict[constants.INIC_MAC]
10598
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10599
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10600

    
10601
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10602
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10603
                                     " modifying an existing nic",
10604
                                     errors.ECODE_INVAL)
10605

    
10606
    if nic_addremove > 1:
10607
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10608
                                 " supported at a time", errors.ECODE_INVAL)
10609

    
10610
  def ExpandNames(self):
10611
    self._ExpandAndLockInstance()
10612
    self.needed_locks[locking.LEVEL_NODE] = []
10613
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10614

    
10615
  def DeclareLocks(self, level):
10616
    if level == locking.LEVEL_NODE:
10617
      self._LockInstancesNodes()
10618
      if self.op.disk_template and self.op.remote_node:
10619
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10620
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10621

    
10622
  def BuildHooksEnv(self):
10623
    """Build hooks env.
10624

10625
    This runs on the master, primary and secondaries.
10626

10627
    """
10628
    args = dict()
10629
    if constants.BE_MEMORY in self.be_new:
10630
      args["memory"] = self.be_new[constants.BE_MEMORY]
10631
    if constants.BE_VCPUS in self.be_new:
10632
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10633
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10634
    # information at all.
10635
    if self.op.nics:
10636
      args["nics"] = []
10637
      nic_override = dict(self.op.nics)
10638
      for idx, nic in enumerate(self.instance.nics):
10639
        if idx in nic_override:
10640
          this_nic_override = nic_override[idx]
10641
        else:
10642
          this_nic_override = {}
10643
        if constants.INIC_IP in this_nic_override:
10644
          ip = this_nic_override[constants.INIC_IP]
10645
        else:
10646
          ip = nic.ip
10647
        if constants.INIC_MAC in this_nic_override:
10648
          mac = this_nic_override[constants.INIC_MAC]
10649
        else:
10650
          mac = nic.mac
10651
        if idx in self.nic_pnew:
10652
          nicparams = self.nic_pnew[idx]
10653
        else:
10654
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10655
        mode = nicparams[constants.NIC_MODE]
10656
        link = nicparams[constants.NIC_LINK]
10657
        args["nics"].append((ip, mac, mode, link))
10658
      if constants.DDM_ADD in nic_override:
10659
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10660
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10661
        nicparams = self.nic_pnew[constants.DDM_ADD]
10662
        mode = nicparams[constants.NIC_MODE]
10663
        link = nicparams[constants.NIC_LINK]
10664
        args["nics"].append((ip, mac, mode, link))
10665
      elif constants.DDM_REMOVE in nic_override:
10666
        del args["nics"][-1]
10667

    
10668
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10669
    if self.op.disk_template:
10670
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10671

    
10672
    return env
10673

    
10674
  def BuildHooksNodes(self):
10675
    """Build hooks nodes.
10676

10677
    """
10678
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10679
    return (nl, nl)
10680

    
10681
  def CheckPrereq(self):
10682
    """Check prerequisites.
10683

10684
    This only checks the instance list against the existing names.
10685

10686
    """
10687
    # checking the new params on the primary/secondary nodes
10688

    
10689
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10690
    cluster = self.cluster = self.cfg.GetClusterInfo()
10691
    assert self.instance is not None, \
10692
      "Cannot retrieve locked instance %s" % self.op.instance_name
10693
    pnode = instance.primary_node
10694
    nodelist = list(instance.all_nodes)
10695

    
10696
    # OS change
10697
    if self.op.os_name and not self.op.force:
10698
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10699
                      self.op.force_variant)
10700
      instance_os = self.op.os_name
10701
    else:
10702
      instance_os = instance.os
10703

    
10704
    if self.op.disk_template:
10705
      if instance.disk_template == self.op.disk_template:
10706
        raise errors.OpPrereqError("Instance already has disk template %s" %
10707
                                   instance.disk_template, errors.ECODE_INVAL)
10708

    
10709
      if (instance.disk_template,
10710
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10711
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10712
                                   " %s to %s" % (instance.disk_template,
10713
                                                  self.op.disk_template),
10714
                                   errors.ECODE_INVAL)
10715
      _CheckInstanceDown(self, instance, "cannot change disk template")
10716
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10717
        if self.op.remote_node == pnode:
10718
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10719
                                     " as the primary node of the instance" %
10720
                                     self.op.remote_node, errors.ECODE_STATE)
10721
        _CheckNodeOnline(self, self.op.remote_node)
10722
        _CheckNodeNotDrained(self, self.op.remote_node)
10723
        # FIXME: here we assume that the old instance type is DT_PLAIN
10724
        assert instance.disk_template == constants.DT_PLAIN
10725
        disks = [{constants.IDISK_SIZE: d.size,
10726
                  constants.IDISK_VG: d.logical_id[0]}
10727
                 for d in instance.disks]
10728
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10729
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10730

    
10731
    # hvparams processing
10732
    if self.op.hvparams:
10733
      hv_type = instance.hypervisor
10734
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10735
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10736
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10737

    
10738
      # local check
10739
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10740
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10741
      self.hv_new = hv_new # the new actual values
10742
      self.hv_inst = i_hvdict # the new dict (without defaults)
10743
    else:
10744
      self.hv_new = self.hv_inst = {}
10745

    
10746
    # beparams processing
10747
    if self.op.beparams:
10748
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10749
                                   use_none=True)
10750
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10751
      be_new = cluster.SimpleFillBE(i_bedict)
10752
      self.be_new = be_new # the new actual values
10753
      self.be_inst = i_bedict # the new dict (without defaults)
10754
    else:
10755
      self.be_new = self.be_inst = {}
10756
    be_old = cluster.FillBE(instance)
10757

    
10758
    # osparams processing
10759
    if self.op.osparams:
10760
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10761
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10762
      self.os_inst = i_osdict # the new dict (without defaults)
10763
    else:
10764
      self.os_inst = {}
10765

    
10766
    self.warn = []
10767

    
10768
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10769
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10770
      mem_check_list = [pnode]
10771
      if be_new[constants.BE_AUTO_BALANCE]:
10772
        # either we changed auto_balance to yes or it was from before
10773
        mem_check_list.extend(instance.secondary_nodes)
10774
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10775
                                                  instance.hypervisor)
10776
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10777
                                         instance.hypervisor)
10778
      pninfo = nodeinfo[pnode]
10779
      msg = pninfo.fail_msg
10780
      if msg:
10781
        # Assume the primary node is unreachable and go ahead
10782
        self.warn.append("Can't get info from primary node %s: %s" %
10783
                         (pnode,  msg))
10784
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
10785
        self.warn.append("Node data from primary node %s doesn't contain"
10786
                         " free memory information" % pnode)
10787
      elif instance_info.fail_msg:
10788
        self.warn.append("Can't get instance runtime information: %s" %
10789
                        instance_info.fail_msg)
10790
      else:
10791
        if instance_info.payload:
10792
          current_mem = int(instance_info.payload["memory"])
10793
        else:
10794
          # Assume instance not running
10795
          # (there is a slight race condition here, but it's not very probable,
10796
          # and we have no other way to check)
10797
          current_mem = 0
10798
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10799
                    pninfo.payload["memory_free"])
10800
        if miss_mem > 0:
10801
          raise errors.OpPrereqError("This change will prevent the instance"
10802
                                     " from starting, due to %d MB of memory"
10803
                                     " missing on its primary node" % miss_mem,
10804
                                     errors.ECODE_NORES)
10805

    
10806
      if be_new[constants.BE_AUTO_BALANCE]:
10807
        for node, nres in nodeinfo.items():
10808
          if node not in instance.secondary_nodes:
10809
            continue
10810
          nres.Raise("Can't get info from secondary node %s" % node,
10811
                     prereq=True, ecode=errors.ECODE_STATE)
10812
          if not isinstance(nres.payload.get("memory_free", None), int):
10813
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10814
                                       " memory information" % node,
10815
                                       errors.ECODE_STATE)
10816
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10817
            raise errors.OpPrereqError("This change will prevent the instance"
10818
                                       " from failover to its secondary node"
10819
                                       " %s, due to not enough memory" % node,
10820
                                       errors.ECODE_STATE)
10821

    
10822
    # NIC processing
10823
    self.nic_pnew = {}
10824
    self.nic_pinst = {}
10825
    for nic_op, nic_dict in self.op.nics:
10826
      if nic_op == constants.DDM_REMOVE:
10827
        if not instance.nics:
10828
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10829
                                     errors.ECODE_INVAL)
10830
        continue
10831
      if nic_op != constants.DDM_ADD:
10832
        # an existing nic
10833
        if not instance.nics:
10834
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10835
                                     " no NICs" % nic_op,
10836
                                     errors.ECODE_INVAL)
10837
        if nic_op < 0 or nic_op >= len(instance.nics):
10838
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10839
                                     " are 0 to %d" %
10840
                                     (nic_op, len(instance.nics) - 1),
10841
                                     errors.ECODE_INVAL)
10842
        old_nic_params = instance.nics[nic_op].nicparams
10843
        old_nic_ip = instance.nics[nic_op].ip
10844
      else:
10845
        old_nic_params = {}
10846
        old_nic_ip = None
10847

    
10848
      update_params_dict = dict([(key, nic_dict[key])
10849
                                 for key in constants.NICS_PARAMETERS
10850
                                 if key in nic_dict])
10851

    
10852
      if "bridge" in nic_dict:
10853
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10854

    
10855
      new_nic_params = _GetUpdatedParams(old_nic_params,
10856
                                         update_params_dict)
10857
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10858
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10859
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10860
      self.nic_pinst[nic_op] = new_nic_params
10861
      self.nic_pnew[nic_op] = new_filled_nic_params
10862
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10863

    
10864
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
10865
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10866
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10867
        if msg:
10868
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10869
          if self.op.force:
10870
            self.warn.append(msg)
10871
          else:
10872
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10873
      if new_nic_mode == constants.NIC_MODE_ROUTED:
10874
        if constants.INIC_IP in nic_dict:
10875
          nic_ip = nic_dict[constants.INIC_IP]
10876
        else:
10877
          nic_ip = old_nic_ip
10878
        if nic_ip is None:
10879
          raise errors.OpPrereqError("Cannot set the nic ip to None"
10880
                                     " on a routed nic", errors.ECODE_INVAL)
10881
      if constants.INIC_MAC in nic_dict:
10882
        nic_mac = nic_dict[constants.INIC_MAC]
10883
        if nic_mac is None:
10884
          raise errors.OpPrereqError("Cannot set the nic mac to None",
10885
                                     errors.ECODE_INVAL)
10886
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10887
          # otherwise generate the mac
10888
          nic_dict[constants.INIC_MAC] = \
10889
            self.cfg.GenerateMAC(self.proc.GetECId())
10890
        else:
10891
          # or validate/reserve the current one
10892
          try:
10893
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10894
          except errors.ReservationError:
10895
            raise errors.OpPrereqError("MAC address %s already in use"
10896
                                       " in cluster" % nic_mac,
10897
                                       errors.ECODE_NOTUNIQUE)
10898

    
10899
    # DISK processing
10900
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10901
      raise errors.OpPrereqError("Disk operations not supported for"
10902
                                 " diskless instances",
10903
                                 errors.ECODE_INVAL)
10904
    for disk_op, _ in self.op.disks:
10905
      if disk_op == constants.DDM_REMOVE:
10906
        if len(instance.disks) == 1:
10907
          raise errors.OpPrereqError("Cannot remove the last disk of"
10908
                                     " an instance", errors.ECODE_INVAL)
10909
        _CheckInstanceDown(self, instance, "cannot remove disks")
10910

    
10911
      if (disk_op == constants.DDM_ADD and
10912
          len(instance.disks) >= constants.MAX_DISKS):
10913
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10914
                                   " add more" % constants.MAX_DISKS,
10915
                                   errors.ECODE_STATE)
10916
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10917
        # an existing disk
10918
        if disk_op < 0 or disk_op >= len(instance.disks):
10919
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
10920
                                     " are 0 to %d" %
10921
                                     (disk_op, len(instance.disks)),
10922
                                     errors.ECODE_INVAL)
10923

    
10924
    return
10925

    
10926
  def _ConvertPlainToDrbd(self, feedback_fn):
10927
    """Converts an instance from plain to drbd.
10928

10929
    """
10930
    feedback_fn("Converting template to drbd")
10931
    instance = self.instance
10932
    pnode = instance.primary_node
10933
    snode = self.op.remote_node
10934

    
10935
    # create a fake disk info for _GenerateDiskTemplate
10936
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10937
                  constants.IDISK_VG: d.logical_id[0]}
10938
                 for d in instance.disks]
10939
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10940
                                      instance.name, pnode, [snode],
10941
                                      disk_info, None, None, 0, feedback_fn)
10942
    info = _GetInstanceInfoText(instance)
10943
    feedback_fn("Creating aditional volumes...")
10944
    # first, create the missing data and meta devices
10945
    for disk in new_disks:
10946
      # unfortunately this is... not too nice
10947
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10948
                            info, True)
10949
      for child in disk.children:
10950
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
10951
    # at this stage, all new LVs have been created, we can rename the
10952
    # old ones
10953
    feedback_fn("Renaming original volumes...")
10954
    rename_list = [(o, n.children[0].logical_id)
10955
                   for (o, n) in zip(instance.disks, new_disks)]
10956
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10957
    result.Raise("Failed to rename original LVs")
10958

    
10959
    feedback_fn("Initializing DRBD devices...")
10960
    # all child devices are in place, we can now create the DRBD devices
10961
    for disk in new_disks:
10962
      for node in [pnode, snode]:
10963
        f_create = node == pnode
10964
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10965

    
10966
    # at this point, the instance has been modified
10967
    instance.disk_template = constants.DT_DRBD8
10968
    instance.disks = new_disks
10969
    self.cfg.Update(instance, feedback_fn)
10970

    
10971
    # disks are created, waiting for sync
10972
    disk_abort = not _WaitForSync(self, instance,
10973
                                  oneshot=not self.op.wait_for_sync)
10974
    if disk_abort:
10975
      raise errors.OpExecError("There are some degraded disks for"
10976
                               " this instance, please cleanup manually")
10977

    
10978
  def _ConvertDrbdToPlain(self, feedback_fn):
10979
    """Converts an instance from drbd to plain.
10980

10981
    """
10982
    instance = self.instance
10983
    assert len(instance.secondary_nodes) == 1
10984
    pnode = instance.primary_node
10985
    snode = instance.secondary_nodes[0]
10986
    feedback_fn("Converting template to plain")
10987

    
10988
    old_disks = instance.disks
10989
    new_disks = [d.children[0] for d in old_disks]
10990

    
10991
    # copy over size and mode
10992
    for parent, child in zip(old_disks, new_disks):
10993
      child.size = parent.size
10994
      child.mode = parent.mode
10995

    
10996
    # update instance structure
10997
    instance.disks = new_disks
10998
    instance.disk_template = constants.DT_PLAIN
10999
    self.cfg.Update(instance, feedback_fn)
11000

    
11001
    feedback_fn("Removing volumes on the secondary node...")
11002
    for disk in old_disks:
11003
      self.cfg.SetDiskID(disk, snode)
11004
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11005
      if msg:
11006
        self.LogWarning("Could not remove block device %s on node %s,"
11007
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11008

    
11009
    feedback_fn("Removing unneeded volumes on the primary node...")
11010
    for idx, disk in enumerate(old_disks):
11011
      meta = disk.children[1]
11012
      self.cfg.SetDiskID(meta, pnode)
11013
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11014
      if msg:
11015
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11016
                        " continuing anyway: %s", idx, pnode, msg)
11017

    
11018
  def Exec(self, feedback_fn):
11019
    """Modifies an instance.
11020

11021
    All parameters take effect only at the next restart of the instance.
11022

11023
    """
11024
    # Process here the warnings from CheckPrereq, as we don't have a
11025
    # feedback_fn there.
11026
    for warn in self.warn:
11027
      feedback_fn("WARNING: %s" % warn)
11028

    
11029
    result = []
11030
    instance = self.instance
11031
    # disk changes
11032
    for disk_op, disk_dict in self.op.disks:
11033
      if disk_op == constants.DDM_REMOVE:
11034
        # remove the last disk
11035
        device = instance.disks.pop()
11036
        device_idx = len(instance.disks)
11037
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11038
          self.cfg.SetDiskID(disk, node)
11039
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11040
          if msg:
11041
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11042
                            " continuing anyway", device_idx, node, msg)
11043
        result.append(("disk/%d" % device_idx, "remove"))
11044
      elif disk_op == constants.DDM_ADD:
11045
        # add a new disk
11046
        if instance.disk_template in (constants.DT_FILE,
11047
                                        constants.DT_SHARED_FILE):
11048
          file_driver, file_path = instance.disks[0].logical_id
11049
          file_path = os.path.dirname(file_path)
11050
        else:
11051
          file_driver = file_path = None
11052
        disk_idx_base = len(instance.disks)
11053
        new_disk = _GenerateDiskTemplate(self,
11054
                                         instance.disk_template,
11055
                                         instance.name, instance.primary_node,
11056
                                         instance.secondary_nodes,
11057
                                         [disk_dict],
11058
                                         file_path,
11059
                                         file_driver,
11060
                                         disk_idx_base, feedback_fn)[0]
11061
        instance.disks.append(new_disk)
11062
        info = _GetInstanceInfoText(instance)
11063

    
11064
        logging.info("Creating volume %s for instance %s",
11065
                     new_disk.iv_name, instance.name)
11066
        # Note: this needs to be kept in sync with _CreateDisks
11067
        #HARDCODE
11068
        for node in instance.all_nodes:
11069
          f_create = node == instance.primary_node
11070
          try:
11071
            _CreateBlockDev(self, node, instance, new_disk,
11072
                            f_create, info, f_create)
11073
          except errors.OpExecError, err:
11074
            self.LogWarning("Failed to create volume %s (%s) on"
11075
                            " node %s: %s",
11076
                            new_disk.iv_name, new_disk, node, err)
11077
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11078
                       (new_disk.size, new_disk.mode)))
11079
      else:
11080
        # change a given disk
11081
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11082
        result.append(("disk.mode/%d" % disk_op,
11083
                       disk_dict[constants.IDISK_MODE]))
11084

    
11085
    if self.op.disk_template:
11086
      r_shut = _ShutdownInstanceDisks(self, instance)
11087
      if not r_shut:
11088
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11089
                                 " proceed with disk template conversion")
11090
      mode = (instance.disk_template, self.op.disk_template)
11091
      try:
11092
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11093
      except:
11094
        self.cfg.ReleaseDRBDMinors(instance.name)
11095
        raise
11096
      result.append(("disk_template", self.op.disk_template))
11097

    
11098
    # NIC changes
11099
    for nic_op, nic_dict in self.op.nics:
11100
      if nic_op == constants.DDM_REMOVE:
11101
        # remove the last nic
11102
        del instance.nics[-1]
11103
        result.append(("nic.%d" % len(instance.nics), "remove"))
11104
      elif nic_op == constants.DDM_ADD:
11105
        # mac and bridge should be set, by now
11106
        mac = nic_dict[constants.INIC_MAC]
11107
        ip = nic_dict.get(constants.INIC_IP, None)
11108
        nicparams = self.nic_pinst[constants.DDM_ADD]
11109
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11110
        instance.nics.append(new_nic)
11111
        result.append(("nic.%d" % (len(instance.nics) - 1),
11112
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11113
                       (new_nic.mac, new_nic.ip,
11114
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11115
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11116
                       )))
11117
      else:
11118
        for key in (constants.INIC_MAC, constants.INIC_IP):
11119
          if key in nic_dict:
11120
            setattr(instance.nics[nic_op], key, nic_dict[key])
11121
        if nic_op in self.nic_pinst:
11122
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11123
        for key, val in nic_dict.iteritems():
11124
          result.append(("nic.%s/%d" % (key, nic_op), val))
11125

    
11126
    # hvparams changes
11127
    if self.op.hvparams:
11128
      instance.hvparams = self.hv_inst
11129
      for key, val in self.op.hvparams.iteritems():
11130
        result.append(("hv/%s" % key, val))
11131

    
11132
    # beparams changes
11133
    if self.op.beparams:
11134
      instance.beparams = self.be_inst
11135
      for key, val in self.op.beparams.iteritems():
11136
        result.append(("be/%s" % key, val))
11137

    
11138
    # OS change
11139
    if self.op.os_name:
11140
      instance.os = self.op.os_name
11141

    
11142
    # osparams changes
11143
    if self.op.osparams:
11144
      instance.osparams = self.os_inst
11145
      for key, val in self.op.osparams.iteritems():
11146
        result.append(("os/%s" % key, val))
11147

    
11148
    self.cfg.Update(instance, feedback_fn)
11149

    
11150
    return result
11151

    
11152
  _DISK_CONVERSIONS = {
11153
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11154
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11155
    }
11156

    
11157

    
11158
class LUInstanceChangeGroup(LogicalUnit):
11159
  HPATH = "instance-change-group"
11160
  HTYPE = constants.HTYPE_INSTANCE
11161
  REQ_BGL = False
11162

    
11163
  def ExpandNames(self):
11164
    self.share_locks = _ShareAll()
11165
    self.needed_locks = {
11166
      locking.LEVEL_NODEGROUP: [],
11167
      locking.LEVEL_NODE: [],
11168
      }
11169

    
11170
    self._ExpandAndLockInstance()
11171

    
11172
    if self.op.target_groups:
11173
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11174
                                  self.op.target_groups)
11175
    else:
11176
      self.req_target_uuids = None
11177

    
11178
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11179

    
11180
  def DeclareLocks(self, level):
11181
    if level == locking.LEVEL_NODEGROUP:
11182
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11183

    
11184
      if self.req_target_uuids:
11185
        lock_groups = set(self.req_target_uuids)
11186

    
11187
        # Lock all groups used by instance optimistically; this requires going
11188
        # via the node before it's locked, requiring verification later on
11189
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11190
        lock_groups.update(instance_groups)
11191
      else:
11192
        # No target groups, need to lock all of them
11193
        lock_groups = locking.ALL_SET
11194

    
11195
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11196

    
11197
    elif level == locking.LEVEL_NODE:
11198
      if self.req_target_uuids:
11199
        # Lock all nodes used by instances
11200
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11201
        self._LockInstancesNodes()
11202

    
11203
        # Lock all nodes in all potential target groups
11204
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11205
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11206
        member_nodes = [node_name
11207
                        for group in lock_groups
11208
                        for node_name in self.cfg.GetNodeGroup(group).members]
11209
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11210
      else:
11211
        # Lock all nodes as all groups are potential targets
11212
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11213

    
11214
  def CheckPrereq(self):
11215
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11216
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11217
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11218

    
11219
    assert (self.req_target_uuids is None or
11220
            owned_groups.issuperset(self.req_target_uuids))
11221
    assert owned_instances == set([self.op.instance_name])
11222

    
11223
    # Get instance information
11224
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11225

    
11226
    # Check if node groups for locked instance are still correct
11227
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11228
      ("Instance %s's nodes changed while we kept the lock" %
11229
       self.op.instance_name)
11230

    
11231
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11232
                                           owned_groups)
11233

    
11234
    if self.req_target_uuids:
11235
      # User requested specific target groups
11236
      self.target_uuids = self.req_target_uuids
11237
    else:
11238
      # All groups except those used by the instance are potential targets
11239
      self.target_uuids = owned_groups - inst_groups
11240

    
11241
    conflicting_groups = self.target_uuids & inst_groups
11242
    if conflicting_groups:
11243
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11244
                                 " used by the instance '%s'" %
11245
                                 (utils.CommaJoin(conflicting_groups),
11246
                                  self.op.instance_name),
11247
                                 errors.ECODE_INVAL)
11248

    
11249
    if not self.target_uuids:
11250
      raise errors.OpPrereqError("There are no possible target groups",
11251
                                 errors.ECODE_INVAL)
11252

    
11253
  def BuildHooksEnv(self):
11254
    """Build hooks env.
11255

11256
    """
11257
    assert self.target_uuids
11258

    
11259
    env = {
11260
      "TARGET_GROUPS": " ".join(self.target_uuids),
11261
      }
11262

    
11263
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11264

    
11265
    return env
11266

    
11267
  def BuildHooksNodes(self):
11268
    """Build hooks nodes.
11269

11270
    """
11271
    mn = self.cfg.GetMasterNode()
11272
    return ([mn], [mn])
11273

    
11274
  def Exec(self, feedback_fn):
11275
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11276

    
11277
    assert instances == [self.op.instance_name], "Instance not locked"
11278

    
11279
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11280
                     instances=instances, target_groups=list(self.target_uuids))
11281

    
11282
    ial.Run(self.op.iallocator)
11283

    
11284
    if not ial.success:
11285
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11286
                                 " instance '%s' using iallocator '%s': %s" %
11287
                                 (self.op.instance_name, self.op.iallocator,
11288
                                  ial.info),
11289
                                 errors.ECODE_NORES)
11290

    
11291
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11292

    
11293
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11294
                 " instance '%s'", len(jobs), self.op.instance_name)
11295

    
11296
    return ResultWithJobs(jobs)
11297

    
11298

    
11299
class LUBackupQuery(NoHooksLU):
11300
  """Query the exports list
11301

11302
  """
11303
  REQ_BGL = False
11304

    
11305
  def ExpandNames(self):
11306
    self.needed_locks = {}
11307
    self.share_locks[locking.LEVEL_NODE] = 1
11308
    if not self.op.nodes:
11309
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11310
    else:
11311
      self.needed_locks[locking.LEVEL_NODE] = \
11312
        _GetWantedNodes(self, self.op.nodes)
11313

    
11314
  def Exec(self, feedback_fn):
11315
    """Compute the list of all the exported system images.
11316

11317
    @rtype: dict
11318
    @return: a dictionary with the structure node->(export-list)
11319
        where export-list is a list of the instances exported on
11320
        that node.
11321

11322
    """
11323
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11324
    rpcresult = self.rpc.call_export_list(self.nodes)
11325
    result = {}
11326
    for node in rpcresult:
11327
      if rpcresult[node].fail_msg:
11328
        result[node] = False
11329
      else:
11330
        result[node] = rpcresult[node].payload
11331

    
11332
    return result
11333

    
11334

    
11335
class LUBackupPrepare(NoHooksLU):
11336
  """Prepares an instance for an export and returns useful information.
11337

11338
  """
11339
  REQ_BGL = False
11340

    
11341
  def ExpandNames(self):
11342
    self._ExpandAndLockInstance()
11343

    
11344
  def CheckPrereq(self):
11345
    """Check prerequisites.
11346

11347
    """
11348
    instance_name = self.op.instance_name
11349

    
11350
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11351
    assert self.instance is not None, \
11352
          "Cannot retrieve locked instance %s" % self.op.instance_name
11353
    _CheckNodeOnline(self, self.instance.primary_node)
11354

    
11355
    self._cds = _GetClusterDomainSecret()
11356

    
11357
  def Exec(self, feedback_fn):
11358
    """Prepares an instance for an export.
11359

11360
    """
11361
    instance = self.instance
11362

    
11363
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11364
      salt = utils.GenerateSecret(8)
11365

    
11366
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11367
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11368
                                              constants.RIE_CERT_VALIDITY)
11369
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11370

    
11371
      (name, cert_pem) = result.payload
11372

    
11373
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11374
                                             cert_pem)
11375

    
11376
      return {
11377
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11378
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11379
                          salt),
11380
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11381
        }
11382

    
11383
    return None
11384

    
11385

    
11386
class LUBackupExport(LogicalUnit):
11387
  """Export an instance to an image in the cluster.
11388

11389
  """
11390
  HPATH = "instance-export"
11391
  HTYPE = constants.HTYPE_INSTANCE
11392
  REQ_BGL = False
11393

    
11394
  def CheckArguments(self):
11395
    """Check the arguments.
11396

11397
    """
11398
    self.x509_key_name = self.op.x509_key_name
11399
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11400

    
11401
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11402
      if not self.x509_key_name:
11403
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11404
                                   errors.ECODE_INVAL)
11405

    
11406
      if not self.dest_x509_ca_pem:
11407
        raise errors.OpPrereqError("Missing destination X509 CA",
11408
                                   errors.ECODE_INVAL)
11409

    
11410
  def ExpandNames(self):
11411
    self._ExpandAndLockInstance()
11412

    
11413
    # Lock all nodes for local exports
11414
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11415
      # FIXME: lock only instance primary and destination node
11416
      #
11417
      # Sad but true, for now we have do lock all nodes, as we don't know where
11418
      # the previous export might be, and in this LU we search for it and
11419
      # remove it from its current node. In the future we could fix this by:
11420
      #  - making a tasklet to search (share-lock all), then create the
11421
      #    new one, then one to remove, after
11422
      #  - removing the removal operation altogether
11423
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11424

    
11425
  def DeclareLocks(self, level):
11426
    """Last minute lock declaration."""
11427
    # All nodes are locked anyway, so nothing to do here.
11428

    
11429
  def BuildHooksEnv(self):
11430
    """Build hooks env.
11431

11432
    This will run on the master, primary node and target node.
11433

11434
    """
11435
    env = {
11436
      "EXPORT_MODE": self.op.mode,
11437
      "EXPORT_NODE": self.op.target_node,
11438
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11439
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11440
      # TODO: Generic function for boolean env variables
11441
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11442
      }
11443

    
11444
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11445

    
11446
    return env
11447

    
11448
  def BuildHooksNodes(self):
11449
    """Build hooks nodes.
11450

11451
    """
11452
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11453

    
11454
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11455
      nl.append(self.op.target_node)
11456

    
11457
    return (nl, nl)
11458

    
11459
  def CheckPrereq(self):
11460
    """Check prerequisites.
11461

11462
    This checks that the instance and node names are valid.
11463

11464
    """
11465
    instance_name = self.op.instance_name
11466

    
11467
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11468
    assert self.instance is not None, \
11469
          "Cannot retrieve locked instance %s" % self.op.instance_name
11470
    _CheckNodeOnline(self, self.instance.primary_node)
11471

    
11472
    if (self.op.remove_instance and self.instance.admin_up and
11473
        not self.op.shutdown):
11474
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11475
                                 " down before")
11476

    
11477
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11478
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11479
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11480
      assert self.dst_node is not None
11481

    
11482
      _CheckNodeOnline(self, self.dst_node.name)
11483
      _CheckNodeNotDrained(self, self.dst_node.name)
11484

    
11485
      self._cds = None
11486
      self.dest_disk_info = None
11487
      self.dest_x509_ca = None
11488

    
11489
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11490
      self.dst_node = None
11491

    
11492
      if len(self.op.target_node) != len(self.instance.disks):
11493
        raise errors.OpPrereqError(("Received destination information for %s"
11494
                                    " disks, but instance %s has %s disks") %
11495
                                   (len(self.op.target_node), instance_name,
11496
                                    len(self.instance.disks)),
11497
                                   errors.ECODE_INVAL)
11498

    
11499
      cds = _GetClusterDomainSecret()
11500

    
11501
      # Check X509 key name
11502
      try:
11503
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11504
      except (TypeError, ValueError), err:
11505
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11506

    
11507
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11508
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11509
                                   errors.ECODE_INVAL)
11510

    
11511
      # Load and verify CA
11512
      try:
11513
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11514
      except OpenSSL.crypto.Error, err:
11515
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11516
                                   (err, ), errors.ECODE_INVAL)
11517

    
11518
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11519
      if errcode is not None:
11520
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11521
                                   (msg, ), errors.ECODE_INVAL)
11522

    
11523
      self.dest_x509_ca = cert
11524

    
11525
      # Verify target information
11526
      disk_info = []
11527
      for idx, disk_data in enumerate(self.op.target_node):
11528
        try:
11529
          (host, port, magic) = \
11530
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11531
        except errors.GenericError, err:
11532
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11533
                                     (idx, err), errors.ECODE_INVAL)
11534

    
11535
        disk_info.append((host, port, magic))
11536

    
11537
      assert len(disk_info) == len(self.op.target_node)
11538
      self.dest_disk_info = disk_info
11539

    
11540
    else:
11541
      raise errors.ProgrammerError("Unhandled export mode %r" %
11542
                                   self.op.mode)
11543

    
11544
    # instance disk type verification
11545
    # TODO: Implement export support for file-based disks
11546
    for disk in self.instance.disks:
11547
      if disk.dev_type == constants.LD_FILE:
11548
        raise errors.OpPrereqError("Export not supported for instances with"
11549
                                   " file-based disks", errors.ECODE_INVAL)
11550

    
11551
  def _CleanupExports(self, feedback_fn):
11552
    """Removes exports of current instance from all other nodes.
11553

11554
    If an instance in a cluster with nodes A..D was exported to node C, its
11555
    exports will be removed from the nodes A, B and D.
11556

11557
    """
11558
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11559

    
11560
    nodelist = self.cfg.GetNodeList()
11561
    nodelist.remove(self.dst_node.name)
11562

    
11563
    # on one-node clusters nodelist will be empty after the removal
11564
    # if we proceed the backup would be removed because OpBackupQuery
11565
    # substitutes an empty list with the full cluster node list.
11566
    iname = self.instance.name
11567
    if nodelist:
11568
      feedback_fn("Removing old exports for instance %s" % iname)
11569
      exportlist = self.rpc.call_export_list(nodelist)
11570
      for node in exportlist:
11571
        if exportlist[node].fail_msg:
11572
          continue
11573
        if iname in exportlist[node].payload:
11574
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11575
          if msg:
11576
            self.LogWarning("Could not remove older export for instance %s"
11577
                            " on node %s: %s", iname, node, msg)
11578

    
11579
  def Exec(self, feedback_fn):
11580
    """Export an instance to an image in the cluster.
11581

11582
    """
11583
    assert self.op.mode in constants.EXPORT_MODES
11584

    
11585
    instance = self.instance
11586
    src_node = instance.primary_node
11587

    
11588
    if self.op.shutdown:
11589
      # shutdown the instance, but not the disks
11590
      feedback_fn("Shutting down instance %s" % instance.name)
11591
      result = self.rpc.call_instance_shutdown(src_node, instance,
11592
                                               self.op.shutdown_timeout)
11593
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11594
      result.Raise("Could not shutdown instance %s on"
11595
                   " node %s" % (instance.name, src_node))
11596

    
11597
    # set the disks ID correctly since call_instance_start needs the
11598
    # correct drbd minor to create the symlinks
11599
    for disk in instance.disks:
11600
      self.cfg.SetDiskID(disk, src_node)
11601

    
11602
    activate_disks = (not instance.admin_up)
11603

    
11604
    if activate_disks:
11605
      # Activate the instance disks if we'exporting a stopped instance
11606
      feedback_fn("Activating disks for %s" % instance.name)
11607
      _StartInstanceDisks(self, instance, None)
11608

    
11609
    try:
11610
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11611
                                                     instance)
11612

    
11613
      helper.CreateSnapshots()
11614
      try:
11615
        if (self.op.shutdown and instance.admin_up and
11616
            not self.op.remove_instance):
11617
          assert not activate_disks
11618
          feedback_fn("Starting instance %s" % instance.name)
11619
          result = self.rpc.call_instance_start(src_node, instance,
11620
                                                None, None, False)
11621
          msg = result.fail_msg
11622
          if msg:
11623
            feedback_fn("Failed to start instance: %s" % msg)
11624
            _ShutdownInstanceDisks(self, instance)
11625
            raise errors.OpExecError("Could not start instance: %s" % msg)
11626

    
11627
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11628
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11629
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11630
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11631
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11632

    
11633
          (key_name, _, _) = self.x509_key_name
11634

    
11635
          dest_ca_pem = \
11636
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11637
                                            self.dest_x509_ca)
11638

    
11639
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11640
                                                     key_name, dest_ca_pem,
11641
                                                     timeouts)
11642
      finally:
11643
        helper.Cleanup()
11644

    
11645
      # Check for backwards compatibility
11646
      assert len(dresults) == len(instance.disks)
11647
      assert compat.all(isinstance(i, bool) for i in dresults), \
11648
             "Not all results are boolean: %r" % dresults
11649

    
11650
    finally:
11651
      if activate_disks:
11652
        feedback_fn("Deactivating disks for %s" % instance.name)
11653
        _ShutdownInstanceDisks(self, instance)
11654

    
11655
    if not (compat.all(dresults) and fin_resu):
11656
      failures = []
11657
      if not fin_resu:
11658
        failures.append("export finalization")
11659
      if not compat.all(dresults):
11660
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11661
                               if not dsk)
11662
        failures.append("disk export: disk(s) %s" % fdsk)
11663

    
11664
      raise errors.OpExecError("Export failed, errors in %s" %
11665
                               utils.CommaJoin(failures))
11666

    
11667
    # At this point, the export was successful, we can cleanup/finish
11668

    
11669
    # Remove instance if requested
11670
    if self.op.remove_instance:
11671
      feedback_fn("Removing instance %s" % instance.name)
11672
      _RemoveInstance(self, feedback_fn, instance,
11673
                      self.op.ignore_remove_failures)
11674

    
11675
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11676
      self._CleanupExports(feedback_fn)
11677

    
11678
    return fin_resu, dresults
11679

    
11680

    
11681
class LUBackupRemove(NoHooksLU):
11682
  """Remove exports related to the named instance.
11683

11684
  """
11685
  REQ_BGL = False
11686

    
11687
  def ExpandNames(self):
11688
    self.needed_locks = {}
11689
    # We need all nodes to be locked in order for RemoveExport to work, but we
11690
    # don't need to lock the instance itself, as nothing will happen to it (and
11691
    # we can remove exports also for a removed instance)
11692
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11693

    
11694
  def Exec(self, feedback_fn):
11695
    """Remove any export.
11696

11697
    """
11698
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11699
    # If the instance was not found we'll try with the name that was passed in.
11700
    # This will only work if it was an FQDN, though.
11701
    fqdn_warn = False
11702
    if not instance_name:
11703
      fqdn_warn = True
11704
      instance_name = self.op.instance_name
11705

    
11706
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11707
    exportlist = self.rpc.call_export_list(locked_nodes)
11708
    found = False
11709
    for node in exportlist:
11710
      msg = exportlist[node].fail_msg
11711
      if msg:
11712
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11713
        continue
11714
      if instance_name in exportlist[node].payload:
11715
        found = True
11716
        result = self.rpc.call_export_remove(node, instance_name)
11717
        msg = result.fail_msg
11718
        if msg:
11719
          logging.error("Could not remove export for instance %s"
11720
                        " on node %s: %s", instance_name, node, msg)
11721

    
11722
    if fqdn_warn and not found:
11723
      feedback_fn("Export not found. If trying to remove an export belonging"
11724
                  " to a deleted instance please use its Fully Qualified"
11725
                  " Domain Name.")
11726

    
11727

    
11728
class LUGroupAdd(LogicalUnit):
11729
  """Logical unit for creating node groups.
11730

11731
  """
11732
  HPATH = "group-add"
11733
  HTYPE = constants.HTYPE_GROUP
11734
  REQ_BGL = False
11735

    
11736
  def ExpandNames(self):
11737
    # We need the new group's UUID here so that we can create and acquire the
11738
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11739
    # that it should not check whether the UUID exists in the configuration.
11740
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11741
    self.needed_locks = {}
11742
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11743

    
11744
  def CheckPrereq(self):
11745
    """Check prerequisites.
11746

11747
    This checks that the given group name is not an existing node group
11748
    already.
11749

11750
    """
11751
    try:
11752
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11753
    except errors.OpPrereqError:
11754
      pass
11755
    else:
11756
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11757
                                 " node group (UUID: %s)" %
11758
                                 (self.op.group_name, existing_uuid),
11759
                                 errors.ECODE_EXISTS)
11760

    
11761
    if self.op.ndparams:
11762
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11763

    
11764
  def BuildHooksEnv(self):
11765
    """Build hooks env.
11766

11767
    """
11768
    return {
11769
      "GROUP_NAME": self.op.group_name,
11770
      }
11771

    
11772
  def BuildHooksNodes(self):
11773
    """Build hooks nodes.
11774

11775
    """
11776
    mn = self.cfg.GetMasterNode()
11777
    return ([mn], [mn])
11778

    
11779
  def Exec(self, feedback_fn):
11780
    """Add the node group to the cluster.
11781

11782
    """
11783
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11784
                                  uuid=self.group_uuid,
11785
                                  alloc_policy=self.op.alloc_policy,
11786
                                  ndparams=self.op.ndparams)
11787

    
11788
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11789
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11790

    
11791

    
11792
class LUGroupAssignNodes(NoHooksLU):
11793
  """Logical unit for assigning nodes to groups.
11794

11795
  """
11796
  REQ_BGL = False
11797

    
11798
  def ExpandNames(self):
11799
    # These raise errors.OpPrereqError on their own:
11800
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11801
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11802

    
11803
    # We want to lock all the affected nodes and groups. We have readily
11804
    # available the list of nodes, and the *destination* group. To gather the
11805
    # list of "source" groups, we need to fetch node information later on.
11806
    self.needed_locks = {
11807
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11808
      locking.LEVEL_NODE: self.op.nodes,
11809
      }
11810

    
11811
  def DeclareLocks(self, level):
11812
    if level == locking.LEVEL_NODEGROUP:
11813
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11814

    
11815
      # Try to get all affected nodes' groups without having the group or node
11816
      # lock yet. Needs verification later in the code flow.
11817
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11818

    
11819
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11820

    
11821
  def CheckPrereq(self):
11822
    """Check prerequisites.
11823

11824
    """
11825
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11826
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11827
            frozenset(self.op.nodes))
11828

    
11829
    expected_locks = (set([self.group_uuid]) |
11830
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11831
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11832
    if actual_locks != expected_locks:
11833
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11834
                               " current groups are '%s', used to be '%s'" %
11835
                               (utils.CommaJoin(expected_locks),
11836
                                utils.CommaJoin(actual_locks)))
11837

    
11838
    self.node_data = self.cfg.GetAllNodesInfo()
11839
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11840
    instance_data = self.cfg.GetAllInstancesInfo()
11841

    
11842
    if self.group is None:
11843
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11844
                               (self.op.group_name, self.group_uuid))
11845

    
11846
    (new_splits, previous_splits) = \
11847
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11848
                                             for node in self.op.nodes],
11849
                                            self.node_data, instance_data)
11850

    
11851
    if new_splits:
11852
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11853

    
11854
      if not self.op.force:
11855
        raise errors.OpExecError("The following instances get split by this"
11856
                                 " change and --force was not given: %s" %
11857
                                 fmt_new_splits)
11858
      else:
11859
        self.LogWarning("This operation will split the following instances: %s",
11860
                        fmt_new_splits)
11861

    
11862
        if previous_splits:
11863
          self.LogWarning("In addition, these already-split instances continue"
11864
                          " to be split across groups: %s",
11865
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
11866

    
11867
  def Exec(self, feedback_fn):
11868
    """Assign nodes to a new group.
11869

11870
    """
11871
    for node in self.op.nodes:
11872
      self.node_data[node].group = self.group_uuid
11873

    
11874
    # FIXME: Depends on side-effects of modifying the result of
11875
    # C{cfg.GetAllNodesInfo}
11876

    
11877
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11878

    
11879
  @staticmethod
11880
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11881
    """Check for split instances after a node assignment.
11882

11883
    This method considers a series of node assignments as an atomic operation,
11884
    and returns information about split instances after applying the set of
11885
    changes.
11886

11887
    In particular, it returns information about newly split instances, and
11888
    instances that were already split, and remain so after the change.
11889

11890
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11891
    considered.
11892

11893
    @type changes: list of (node_name, new_group_uuid) pairs.
11894
    @param changes: list of node assignments to consider.
11895
    @param node_data: a dict with data for all nodes
11896
    @param instance_data: a dict with all instances to consider
11897
    @rtype: a two-tuple
11898
    @return: a list of instances that were previously okay and result split as a
11899
      consequence of this change, and a list of instances that were previously
11900
      split and this change does not fix.
11901

11902
    """
11903
    changed_nodes = dict((node, group) for node, group in changes
11904
                         if node_data[node].group != group)
11905

    
11906
    all_split_instances = set()
11907
    previously_split_instances = set()
11908

    
11909
    def InstanceNodes(instance):
11910
      return [instance.primary_node] + list(instance.secondary_nodes)
11911

    
11912
    for inst in instance_data.values():
11913
      if inst.disk_template not in constants.DTS_INT_MIRROR:
11914
        continue
11915

    
11916
      instance_nodes = InstanceNodes(inst)
11917

    
11918
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
11919
        previously_split_instances.add(inst.name)
11920

    
11921
      if len(set(changed_nodes.get(node, node_data[node].group)
11922
                 for node in instance_nodes)) > 1:
11923
        all_split_instances.add(inst.name)
11924

    
11925
    return (list(all_split_instances - previously_split_instances),
11926
            list(previously_split_instances & all_split_instances))
11927

    
11928

    
11929
class _GroupQuery(_QueryBase):
11930
  FIELDS = query.GROUP_FIELDS
11931

    
11932
  def ExpandNames(self, lu):
11933
    lu.needed_locks = {}
11934

    
11935
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11936
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11937

    
11938
    if not self.names:
11939
      self.wanted = [name_to_uuid[name]
11940
                     for name in utils.NiceSort(name_to_uuid.keys())]
11941
    else:
11942
      # Accept names to be either names or UUIDs.
11943
      missing = []
11944
      self.wanted = []
11945
      all_uuid = frozenset(self._all_groups.keys())
11946

    
11947
      for name in self.names:
11948
        if name in all_uuid:
11949
          self.wanted.append(name)
11950
        elif name in name_to_uuid:
11951
          self.wanted.append(name_to_uuid[name])
11952
        else:
11953
          missing.append(name)
11954

    
11955
      if missing:
11956
        raise errors.OpPrereqError("Some groups do not exist: %s" %
11957
                                   utils.CommaJoin(missing),
11958
                                   errors.ECODE_NOENT)
11959

    
11960
  def DeclareLocks(self, lu, level):
11961
    pass
11962

    
11963
  def _GetQueryData(self, lu):
11964
    """Computes the list of node groups and their attributes.
11965

11966
    """
11967
    do_nodes = query.GQ_NODE in self.requested_data
11968
    do_instances = query.GQ_INST in self.requested_data
11969

    
11970
    group_to_nodes = None
11971
    group_to_instances = None
11972

    
11973
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11974
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11975
    # latter GetAllInstancesInfo() is not enough, for we have to go through
11976
    # instance->node. Hence, we will need to process nodes even if we only need
11977
    # instance information.
11978
    if do_nodes or do_instances:
11979
      all_nodes = lu.cfg.GetAllNodesInfo()
11980
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11981
      node_to_group = {}
11982

    
11983
      for node in all_nodes.values():
11984
        if node.group in group_to_nodes:
11985
          group_to_nodes[node.group].append(node.name)
11986
          node_to_group[node.name] = node.group
11987

    
11988
      if do_instances:
11989
        all_instances = lu.cfg.GetAllInstancesInfo()
11990
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
11991

    
11992
        for instance in all_instances.values():
11993
          node = instance.primary_node
11994
          if node in node_to_group:
11995
            group_to_instances[node_to_group[node]].append(instance.name)
11996

    
11997
        if not do_nodes:
11998
          # Do not pass on node information if it was not requested.
11999
          group_to_nodes = None
12000

    
12001
    return query.GroupQueryData([self._all_groups[uuid]
12002
                                 for uuid in self.wanted],
12003
                                group_to_nodes, group_to_instances)
12004

    
12005

    
12006
class LUGroupQuery(NoHooksLU):
12007
  """Logical unit for querying node groups.
12008

12009
  """
12010
  REQ_BGL = False
12011

    
12012
  def CheckArguments(self):
12013
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12014
                          self.op.output_fields, False)
12015

    
12016
  def ExpandNames(self):
12017
    self.gq.ExpandNames(self)
12018

    
12019
  def Exec(self, feedback_fn):
12020
    return self.gq.OldStyleQuery(self)
12021

    
12022

    
12023
class LUGroupSetParams(LogicalUnit):
12024
  """Modifies the parameters of a node group.
12025

12026
  """
12027
  HPATH = "group-modify"
12028
  HTYPE = constants.HTYPE_GROUP
12029
  REQ_BGL = False
12030

    
12031
  def CheckArguments(self):
12032
    all_changes = [
12033
      self.op.ndparams,
12034
      self.op.alloc_policy,
12035
      ]
12036

    
12037
    if all_changes.count(None) == len(all_changes):
12038
      raise errors.OpPrereqError("Please pass at least one modification",
12039
                                 errors.ECODE_INVAL)
12040

    
12041
  def ExpandNames(self):
12042
    # This raises errors.OpPrereqError on its own:
12043
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12044

    
12045
    self.needed_locks = {
12046
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12047
      }
12048

    
12049
  def CheckPrereq(self):
12050
    """Check prerequisites.
12051

12052
    """
12053
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12054

    
12055
    if self.group is None:
12056
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12057
                               (self.op.group_name, self.group_uuid))
12058

    
12059
    if self.op.ndparams:
12060
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12061
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12062
      self.new_ndparams = new_ndparams
12063

    
12064
  def BuildHooksEnv(self):
12065
    """Build hooks env.
12066

12067
    """
12068
    return {
12069
      "GROUP_NAME": self.op.group_name,
12070
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12071
      }
12072

    
12073
  def BuildHooksNodes(self):
12074
    """Build hooks nodes.
12075

12076
    """
12077
    mn = self.cfg.GetMasterNode()
12078
    return ([mn], [mn])
12079

    
12080
  def Exec(self, feedback_fn):
12081
    """Modifies the node group.
12082

12083
    """
12084
    result = []
12085

    
12086
    if self.op.ndparams:
12087
      self.group.ndparams = self.new_ndparams
12088
      result.append(("ndparams", str(self.group.ndparams)))
12089

    
12090
    if self.op.alloc_policy:
12091
      self.group.alloc_policy = self.op.alloc_policy
12092

    
12093
    self.cfg.Update(self.group, feedback_fn)
12094
    return result
12095

    
12096

    
12097

    
12098
class LUGroupRemove(LogicalUnit):
12099
  HPATH = "group-remove"
12100
  HTYPE = constants.HTYPE_GROUP
12101
  REQ_BGL = False
12102

    
12103
  def ExpandNames(self):
12104
    # This will raises errors.OpPrereqError on its own:
12105
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12106
    self.needed_locks = {
12107
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12108
      }
12109

    
12110
  def CheckPrereq(self):
12111
    """Check prerequisites.
12112

12113
    This checks that the given group name exists as a node group, that is
12114
    empty (i.e., contains no nodes), and that is not the last group of the
12115
    cluster.
12116

12117
    """
12118
    # Verify that the group is empty.
12119
    group_nodes = [node.name
12120
                   for node in self.cfg.GetAllNodesInfo().values()
12121
                   if node.group == self.group_uuid]
12122

    
12123
    if group_nodes:
12124
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12125
                                 " nodes: %s" %
12126
                                 (self.op.group_name,
12127
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12128
                                 errors.ECODE_STATE)
12129

    
12130
    # Verify the cluster would not be left group-less.
12131
    if len(self.cfg.GetNodeGroupList()) == 1:
12132
      raise errors.OpPrereqError("Group '%s' is the only group,"
12133
                                 " cannot be removed" %
12134
                                 self.op.group_name,
12135
                                 errors.ECODE_STATE)
12136

    
12137
  def BuildHooksEnv(self):
12138
    """Build hooks env.
12139

12140
    """
12141
    return {
12142
      "GROUP_NAME": self.op.group_name,
12143
      }
12144

    
12145
  def BuildHooksNodes(self):
12146
    """Build hooks nodes.
12147

12148
    """
12149
    mn = self.cfg.GetMasterNode()
12150
    return ([mn], [mn])
12151

    
12152
  def Exec(self, feedback_fn):
12153
    """Remove the node group.
12154

12155
    """
12156
    try:
12157
      self.cfg.RemoveNodeGroup(self.group_uuid)
12158
    except errors.ConfigurationError:
12159
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12160
                               (self.op.group_name, self.group_uuid))
12161

    
12162
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12163

    
12164

    
12165
class LUGroupRename(LogicalUnit):
12166
  HPATH = "group-rename"
12167
  HTYPE = constants.HTYPE_GROUP
12168
  REQ_BGL = False
12169

    
12170
  def ExpandNames(self):
12171
    # This raises errors.OpPrereqError on its own:
12172
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12173

    
12174
    self.needed_locks = {
12175
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12176
      }
12177

    
12178
  def CheckPrereq(self):
12179
    """Check prerequisites.
12180

12181
    Ensures requested new name is not yet used.
12182

12183
    """
12184
    try:
12185
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12186
    except errors.OpPrereqError:
12187
      pass
12188
    else:
12189
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12190
                                 " node group (UUID: %s)" %
12191
                                 (self.op.new_name, new_name_uuid),
12192
                                 errors.ECODE_EXISTS)
12193

    
12194
  def BuildHooksEnv(self):
12195
    """Build hooks env.
12196

12197
    """
12198
    return {
12199
      "OLD_NAME": self.op.group_name,
12200
      "NEW_NAME": self.op.new_name,
12201
      }
12202

    
12203
  def BuildHooksNodes(self):
12204
    """Build hooks nodes.
12205

12206
    """
12207
    mn = self.cfg.GetMasterNode()
12208

    
12209
    all_nodes = self.cfg.GetAllNodesInfo()
12210
    all_nodes.pop(mn, None)
12211

    
12212
    run_nodes = [mn]
12213
    run_nodes.extend(node.name for node in all_nodes.values()
12214
                     if node.group == self.group_uuid)
12215

    
12216
    return (run_nodes, run_nodes)
12217

    
12218
  def Exec(self, feedback_fn):
12219
    """Rename the node group.
12220

12221
    """
12222
    group = self.cfg.GetNodeGroup(self.group_uuid)
12223

    
12224
    if group is None:
12225
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12226
                               (self.op.group_name, self.group_uuid))
12227

    
12228
    group.name = self.op.new_name
12229
    self.cfg.Update(group, feedback_fn)
12230

    
12231
    return self.op.new_name
12232

    
12233

    
12234
class LUGroupEvacuate(LogicalUnit):
12235
  HPATH = "group-evacuate"
12236
  HTYPE = constants.HTYPE_GROUP
12237
  REQ_BGL = False
12238

    
12239
  def ExpandNames(self):
12240
    # This raises errors.OpPrereqError on its own:
12241
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12242

    
12243
    if self.op.target_groups:
12244
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12245
                                  self.op.target_groups)
12246
    else:
12247
      self.req_target_uuids = []
12248

    
12249
    if self.group_uuid in self.req_target_uuids:
12250
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12251
                                 " as a target group (targets are %s)" %
12252
                                 (self.group_uuid,
12253
                                  utils.CommaJoin(self.req_target_uuids)),
12254
                                 errors.ECODE_INVAL)
12255

    
12256
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12257

    
12258
    self.share_locks = _ShareAll()
12259
    self.needed_locks = {
12260
      locking.LEVEL_INSTANCE: [],
12261
      locking.LEVEL_NODEGROUP: [],
12262
      locking.LEVEL_NODE: [],
12263
      }
12264

    
12265
  def DeclareLocks(self, level):
12266
    if level == locking.LEVEL_INSTANCE:
12267
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12268

    
12269
      # Lock instances optimistically, needs verification once node and group
12270
      # locks have been acquired
12271
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12272
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12273

    
12274
    elif level == locking.LEVEL_NODEGROUP:
12275
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12276

    
12277
      if self.req_target_uuids:
12278
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12279

    
12280
        # Lock all groups used by instances optimistically; this requires going
12281
        # via the node before it's locked, requiring verification later on
12282
        lock_groups.update(group_uuid
12283
                           for instance_name in
12284
                             self.owned_locks(locking.LEVEL_INSTANCE)
12285
                           for group_uuid in
12286
                             self.cfg.GetInstanceNodeGroups(instance_name))
12287
      else:
12288
        # No target groups, need to lock all of them
12289
        lock_groups = locking.ALL_SET
12290

    
12291
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12292

    
12293
    elif level == locking.LEVEL_NODE:
12294
      # This will only lock the nodes in the group to be evacuated which
12295
      # contain actual instances
12296
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12297
      self._LockInstancesNodes()
12298

    
12299
      # Lock all nodes in group to be evacuated and target groups
12300
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12301
      assert self.group_uuid in owned_groups
12302
      member_nodes = [node_name
12303
                      for group in owned_groups
12304
                      for node_name in self.cfg.GetNodeGroup(group).members]
12305
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12306

    
12307
  def CheckPrereq(self):
12308
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12309
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12310
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12311

    
12312
    assert owned_groups.issuperset(self.req_target_uuids)
12313
    assert self.group_uuid in owned_groups
12314

    
12315
    # Check if locked instances are still correct
12316
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12317

    
12318
    # Get instance information
12319
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12320

    
12321
    # Check if node groups for locked instances are still correct
12322
    for instance_name in owned_instances:
12323
      inst = self.instances[instance_name]
12324
      assert owned_nodes.issuperset(inst.all_nodes), \
12325
        "Instance %s's nodes changed while we kept the lock" % instance_name
12326

    
12327
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12328
                                             owned_groups)
12329

    
12330
      assert self.group_uuid in inst_groups, \
12331
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12332

    
12333
    if self.req_target_uuids:
12334
      # User requested specific target groups
12335
      self.target_uuids = self.req_target_uuids
12336
    else:
12337
      # All groups except the one to be evacuated are potential targets
12338
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12339
                           if group_uuid != self.group_uuid]
12340

    
12341
      if not self.target_uuids:
12342
        raise errors.OpPrereqError("There are no possible target groups",
12343
                                   errors.ECODE_INVAL)
12344

    
12345
  def BuildHooksEnv(self):
12346
    """Build hooks env.
12347

12348
    """
12349
    return {
12350
      "GROUP_NAME": self.op.group_name,
12351
      "TARGET_GROUPS": " ".join(self.target_uuids),
12352
      }
12353

    
12354
  def BuildHooksNodes(self):
12355
    """Build hooks nodes.
12356

12357
    """
12358
    mn = self.cfg.GetMasterNode()
12359

    
12360
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12361

    
12362
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12363

    
12364
    return (run_nodes, run_nodes)
12365

    
12366
  def Exec(self, feedback_fn):
12367
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12368

    
12369
    assert self.group_uuid not in self.target_uuids
12370

    
12371
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12372
                     instances=instances, target_groups=self.target_uuids)
12373

    
12374
    ial.Run(self.op.iallocator)
12375

    
12376
    if not ial.success:
12377
      raise errors.OpPrereqError("Can't compute group evacuation using"
12378
                                 " iallocator '%s': %s" %
12379
                                 (self.op.iallocator, ial.info),
12380
                                 errors.ECODE_NORES)
12381

    
12382
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12383

    
12384
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12385
                 len(jobs), self.op.group_name)
12386

    
12387
    return ResultWithJobs(jobs)
12388

    
12389

    
12390
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
12391
  """Generic tags LU.
12392

12393
  This is an abstract class which is the parent of all the other tags LUs.
12394

12395
  """
12396
  def ExpandNames(self):
12397
    self.group_uuid = None
12398
    self.needed_locks = {}
12399
    if self.op.kind == constants.TAG_NODE:
12400
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12401
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12402
    elif self.op.kind == constants.TAG_INSTANCE:
12403
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12404
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12405
    elif self.op.kind == constants.TAG_NODEGROUP:
12406
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12407

    
12408
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12409
    # not possible to acquire the BGL based on opcode parameters)
12410

    
12411
  def CheckPrereq(self):
12412
    """Check prerequisites.
12413

12414
    """
12415
    if self.op.kind == constants.TAG_CLUSTER:
12416
      self.target = self.cfg.GetClusterInfo()
12417
    elif self.op.kind == constants.TAG_NODE:
12418
      self.target = self.cfg.GetNodeInfo(self.op.name)
12419
    elif self.op.kind == constants.TAG_INSTANCE:
12420
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12421
    elif self.op.kind == constants.TAG_NODEGROUP:
12422
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12423
    else:
12424
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12425
                                 str(self.op.kind), errors.ECODE_INVAL)
12426

    
12427

    
12428
class LUTagsGet(TagsLU):
12429
  """Returns the tags of a given object.
12430

12431
  """
12432
  REQ_BGL = False
12433

    
12434
  def ExpandNames(self):
12435
    TagsLU.ExpandNames(self)
12436

    
12437
    # Share locks as this is only a read operation
12438
    self.share_locks = _ShareAll()
12439

    
12440
  def Exec(self, feedback_fn):
12441
    """Returns the tag list.
12442

12443
    """
12444
    return list(self.target.GetTags())
12445

    
12446

    
12447
class LUTagsSearch(NoHooksLU):
12448
  """Searches the tags for a given pattern.
12449

12450
  """
12451
  REQ_BGL = False
12452

    
12453
  def ExpandNames(self):
12454
    self.needed_locks = {}
12455

    
12456
  def CheckPrereq(self):
12457
    """Check prerequisites.
12458

12459
    This checks the pattern passed for validity by compiling it.
12460

12461
    """
12462
    try:
12463
      self.re = re.compile(self.op.pattern)
12464
    except re.error, err:
12465
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12466
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12467

    
12468
  def Exec(self, feedback_fn):
12469
    """Returns the tag list.
12470

12471
    """
12472
    cfg = self.cfg
12473
    tgts = [("/cluster", cfg.GetClusterInfo())]
12474
    ilist = cfg.GetAllInstancesInfo().values()
12475
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12476
    nlist = cfg.GetAllNodesInfo().values()
12477
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12478
    tgts.extend(("/nodegroup/%s" % n.name, n)
12479
                for n in cfg.GetAllNodeGroupsInfo().values())
12480
    results = []
12481
    for path, target in tgts:
12482
      for tag in target.GetTags():
12483
        if self.re.search(tag):
12484
          results.append((path, tag))
12485
    return results
12486

    
12487

    
12488
class LUTagsSet(TagsLU):
12489
  """Sets a tag on a given object.
12490

12491
  """
12492
  REQ_BGL = False
12493

    
12494
  def CheckPrereq(self):
12495
    """Check prerequisites.
12496

12497
    This checks the type and length of the tag name and value.
12498

12499
    """
12500
    TagsLU.CheckPrereq(self)
12501
    for tag in self.op.tags:
12502
      objects.TaggableObject.ValidateTag(tag)
12503

    
12504
  def Exec(self, feedback_fn):
12505
    """Sets the tag.
12506

12507
    """
12508
    try:
12509
      for tag in self.op.tags:
12510
        self.target.AddTag(tag)
12511
    except errors.TagError, err:
12512
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12513
    self.cfg.Update(self.target, feedback_fn)
12514

    
12515

    
12516
class LUTagsDel(TagsLU):
12517
  """Delete a list of tags from a given object.
12518

12519
  """
12520
  REQ_BGL = False
12521

    
12522
  def CheckPrereq(self):
12523
    """Check prerequisites.
12524

12525
    This checks that we have the given tag.
12526

12527
    """
12528
    TagsLU.CheckPrereq(self)
12529
    for tag in self.op.tags:
12530
      objects.TaggableObject.ValidateTag(tag)
12531
    del_tags = frozenset(self.op.tags)
12532
    cur_tags = self.target.GetTags()
12533

    
12534
    diff_tags = del_tags - cur_tags
12535
    if diff_tags:
12536
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12537
      raise errors.OpPrereqError("Tag(s) %s not found" %
12538
                                 (utils.CommaJoin(diff_names), ),
12539
                                 errors.ECODE_NOENT)
12540

    
12541
  def Exec(self, feedback_fn):
12542
    """Remove the tag from the object.
12543

12544
    """
12545
    for tag in self.op.tags:
12546
      self.target.RemoveTag(tag)
12547
    self.cfg.Update(self.target, feedback_fn)
12548

    
12549

    
12550
class LUTestDelay(NoHooksLU):
12551
  """Sleep for a specified amount of time.
12552

12553
  This LU sleeps on the master and/or nodes for a specified amount of
12554
  time.
12555

12556
  """
12557
  REQ_BGL = False
12558

    
12559
  def ExpandNames(self):
12560
    """Expand names and set required locks.
12561

12562
    This expands the node list, if any.
12563

12564
    """
12565
    self.needed_locks = {}
12566
    if self.op.on_nodes:
12567
      # _GetWantedNodes can be used here, but is not always appropriate to use
12568
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12569
      # more information.
12570
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12571
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12572

    
12573
  def _TestDelay(self):
12574
    """Do the actual sleep.
12575

12576
    """
12577
    if self.op.on_master:
12578
      if not utils.TestDelay(self.op.duration):
12579
        raise errors.OpExecError("Error during master delay test")
12580
    if self.op.on_nodes:
12581
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12582
      for node, node_result in result.items():
12583
        node_result.Raise("Failure during rpc call to node %s" % node)
12584

    
12585
  def Exec(self, feedback_fn):
12586
    """Execute the test delay opcode, with the wanted repetitions.
12587

12588
    """
12589
    if self.op.repeat == 0:
12590
      self._TestDelay()
12591
    else:
12592
      top_value = self.op.repeat - 1
12593
      for i in range(self.op.repeat):
12594
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12595
        self._TestDelay()
12596

    
12597

    
12598
class LUTestJqueue(NoHooksLU):
12599
  """Utility LU to test some aspects of the job queue.
12600

12601
  """
12602
  REQ_BGL = False
12603

    
12604
  # Must be lower than default timeout for WaitForJobChange to see whether it
12605
  # notices changed jobs
12606
  _CLIENT_CONNECT_TIMEOUT = 20.0
12607
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12608

    
12609
  @classmethod
12610
  def _NotifyUsingSocket(cls, cb, errcls):
12611
    """Opens a Unix socket and waits for another program to connect.
12612

12613
    @type cb: callable
12614
    @param cb: Callback to send socket name to client
12615
    @type errcls: class
12616
    @param errcls: Exception class to use for errors
12617

12618
    """
12619
    # Using a temporary directory as there's no easy way to create temporary
12620
    # sockets without writing a custom loop around tempfile.mktemp and
12621
    # socket.bind
12622
    tmpdir = tempfile.mkdtemp()
12623
    try:
12624
      tmpsock = utils.PathJoin(tmpdir, "sock")
12625

    
12626
      logging.debug("Creating temporary socket at %s", tmpsock)
12627
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12628
      try:
12629
        sock.bind(tmpsock)
12630
        sock.listen(1)
12631

    
12632
        # Send details to client
12633
        cb(tmpsock)
12634

    
12635
        # Wait for client to connect before continuing
12636
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12637
        try:
12638
          (conn, _) = sock.accept()
12639
        except socket.error, err:
12640
          raise errcls("Client didn't connect in time (%s)" % err)
12641
      finally:
12642
        sock.close()
12643
    finally:
12644
      # Remove as soon as client is connected
12645
      shutil.rmtree(tmpdir)
12646

    
12647
    # Wait for client to close
12648
    try:
12649
      try:
12650
        # pylint: disable-msg=E1101
12651
        # Instance of '_socketobject' has no ... member
12652
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12653
        conn.recv(1)
12654
      except socket.error, err:
12655
        raise errcls("Client failed to confirm notification (%s)" % err)
12656
    finally:
12657
      conn.close()
12658

    
12659
  def _SendNotification(self, test, arg, sockname):
12660
    """Sends a notification to the client.
12661

12662
    @type test: string
12663
    @param test: Test name
12664
    @param arg: Test argument (depends on test)
12665
    @type sockname: string
12666
    @param sockname: Socket path
12667

12668
    """
12669
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12670

    
12671
  def _Notify(self, prereq, test, arg):
12672
    """Notifies the client of a test.
12673

12674
    @type prereq: bool
12675
    @param prereq: Whether this is a prereq-phase test
12676
    @type test: string
12677
    @param test: Test name
12678
    @param arg: Test argument (depends on test)
12679

12680
    """
12681
    if prereq:
12682
      errcls = errors.OpPrereqError
12683
    else:
12684
      errcls = errors.OpExecError
12685

    
12686
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12687
                                                  test, arg),
12688
                                   errcls)
12689

    
12690
  def CheckArguments(self):
12691
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12692
    self.expandnames_calls = 0
12693

    
12694
  def ExpandNames(self):
12695
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12696
    if checkargs_calls < 1:
12697
      raise errors.ProgrammerError("CheckArguments was not called")
12698

    
12699
    self.expandnames_calls += 1
12700

    
12701
    if self.op.notify_waitlock:
12702
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12703

    
12704
    self.LogInfo("Expanding names")
12705

    
12706
    # Get lock on master node (just to get a lock, not for a particular reason)
12707
    self.needed_locks = {
12708
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12709
      }
12710

    
12711
  def Exec(self, feedback_fn):
12712
    if self.expandnames_calls < 1:
12713
      raise errors.ProgrammerError("ExpandNames was not called")
12714

    
12715
    if self.op.notify_exec:
12716
      self._Notify(False, constants.JQT_EXEC, None)
12717

    
12718
    self.LogInfo("Executing")
12719

    
12720
    if self.op.log_messages:
12721
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12722
      for idx, msg in enumerate(self.op.log_messages):
12723
        self.LogInfo("Sending log message %s", idx + 1)
12724
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12725
        # Report how many test messages have been sent
12726
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12727

    
12728
    if self.op.fail:
12729
      raise errors.OpExecError("Opcode failure was requested")
12730

    
12731
    return True
12732

    
12733

    
12734
class IAllocator(object):
12735
  """IAllocator framework.
12736

12737
  An IAllocator instance has three sets of attributes:
12738
    - cfg that is needed to query the cluster
12739
    - input data (all members of the _KEYS class attribute are required)
12740
    - four buffer attributes (in|out_data|text), that represent the
12741
      input (to the external script) in text and data structure format,
12742
      and the output from it, again in two formats
12743
    - the result variables from the script (success, info, nodes) for
12744
      easy usage
12745

12746
  """
12747
  # pylint: disable-msg=R0902
12748
  # lots of instance attributes
12749

    
12750
  def __init__(self, cfg, rpc, mode, **kwargs):
12751
    self.cfg = cfg
12752
    self.rpc = rpc
12753
    # init buffer variables
12754
    self.in_text = self.out_text = self.in_data = self.out_data = None
12755
    # init all input fields so that pylint is happy
12756
    self.mode = mode
12757
    self.memory = self.disks = self.disk_template = None
12758
    self.os = self.tags = self.nics = self.vcpus = None
12759
    self.hypervisor = None
12760
    self.relocate_from = None
12761
    self.name = None
12762
    self.instances = None
12763
    self.evac_mode = None
12764
    self.target_groups = []
12765
    # computed fields
12766
    self.required_nodes = None
12767
    # init result fields
12768
    self.success = self.info = self.result = None
12769

    
12770
    try:
12771
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12772
    except KeyError:
12773
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12774
                                   " IAllocator" % self.mode)
12775

    
12776
    keyset = [n for (n, _) in keydata]
12777

    
12778
    for key in kwargs:
12779
      if key not in keyset:
12780
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12781
                                     " IAllocator" % key)
12782
      setattr(self, key, kwargs[key])
12783

    
12784
    for key in keyset:
12785
      if key not in kwargs:
12786
        raise errors.ProgrammerError("Missing input parameter '%s' to"
12787
                                     " IAllocator" % key)
12788
    self._BuildInputData(compat.partial(fn, self), keydata)
12789

    
12790
  def _ComputeClusterData(self):
12791
    """Compute the generic allocator input data.
12792

12793
    This is the data that is independent of the actual operation.
12794

12795
    """
12796
    cfg = self.cfg
12797
    cluster_info = cfg.GetClusterInfo()
12798
    # cluster data
12799
    data = {
12800
      "version": constants.IALLOCATOR_VERSION,
12801
      "cluster_name": cfg.GetClusterName(),
12802
      "cluster_tags": list(cluster_info.GetTags()),
12803
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12804
      # we don't have job IDs
12805
      }
12806
    ninfo = cfg.GetAllNodesInfo()
12807
    iinfo = cfg.GetAllInstancesInfo().values()
12808
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12809

    
12810
    # node data
12811
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
12812

    
12813
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12814
      hypervisor_name = self.hypervisor
12815
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12816
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12817
    else:
12818
      hypervisor_name = cluster_info.enabled_hypervisors[0]
12819

    
12820
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12821
                                        hypervisor_name)
12822
    node_iinfo = \
12823
      self.rpc.call_all_instances_info(node_list,
12824
                                       cluster_info.enabled_hypervisors)
12825

    
12826
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12827

    
12828
    config_ndata = self._ComputeBasicNodeData(ninfo)
12829
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12830
                                                 i_list, config_ndata)
12831
    assert len(data["nodes"]) == len(ninfo), \
12832
        "Incomplete node data computed"
12833

    
12834
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12835

    
12836
    self.in_data = data
12837

    
12838
  @staticmethod
12839
  def _ComputeNodeGroupData(cfg):
12840
    """Compute node groups data.
12841

12842
    """
12843
    ng = dict((guuid, {
12844
      "name": gdata.name,
12845
      "alloc_policy": gdata.alloc_policy,
12846
      })
12847
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12848

    
12849
    return ng
12850

    
12851
  @staticmethod
12852
  def _ComputeBasicNodeData(node_cfg):
12853
    """Compute global node data.
12854

12855
    @rtype: dict
12856
    @returns: a dict of name: (node dict, node config)
12857

12858
    """
12859
    # fill in static (config-based) values
12860
    node_results = dict((ninfo.name, {
12861
      "tags": list(ninfo.GetTags()),
12862
      "primary_ip": ninfo.primary_ip,
12863
      "secondary_ip": ninfo.secondary_ip,
12864
      "offline": ninfo.offline,
12865
      "drained": ninfo.drained,
12866
      "master_candidate": ninfo.master_candidate,
12867
      "group": ninfo.group,
12868
      "master_capable": ninfo.master_capable,
12869
      "vm_capable": ninfo.vm_capable,
12870
      })
12871
      for ninfo in node_cfg.values())
12872

    
12873
    return node_results
12874

    
12875
  @staticmethod
12876
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12877
                              node_results):
12878
    """Compute global node data.
12879

12880
    @param node_results: the basic node structures as filled from the config
12881

12882
    """
12883
    # make a copy of the current dict
12884
    node_results = dict(node_results)
12885
    for nname, nresult in node_data.items():
12886
      assert nname in node_results, "Missing basic data for node %s" % nname
12887
      ninfo = node_cfg[nname]
12888

    
12889
      if not (ninfo.offline or ninfo.drained):
12890
        nresult.Raise("Can't get data for node %s" % nname)
12891
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12892
                                nname)
12893
        remote_info = nresult.payload
12894

    
12895
        for attr in ["memory_total", "memory_free", "memory_dom0",
12896
                     "vg_size", "vg_free", "cpu_total"]:
12897
          if attr not in remote_info:
12898
            raise errors.OpExecError("Node '%s' didn't return attribute"
12899
                                     " '%s'" % (nname, attr))
12900
          if not isinstance(remote_info[attr], int):
12901
            raise errors.OpExecError("Node '%s' returned invalid value"
12902
                                     " for '%s': %s" %
12903
                                     (nname, attr, remote_info[attr]))
12904
        # compute memory used by primary instances
12905
        i_p_mem = i_p_up_mem = 0
12906
        for iinfo, beinfo in i_list:
12907
          if iinfo.primary_node == nname:
12908
            i_p_mem += beinfo[constants.BE_MEMORY]
12909
            if iinfo.name not in node_iinfo[nname].payload:
12910
              i_used_mem = 0
12911
            else:
12912
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
12913
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12914
            remote_info["memory_free"] -= max(0, i_mem_diff)
12915

    
12916
            if iinfo.admin_up:
12917
              i_p_up_mem += beinfo[constants.BE_MEMORY]
12918

    
12919
        # compute memory used by instances
12920
        pnr_dyn = {
12921
          "total_memory": remote_info["memory_total"],
12922
          "reserved_memory": remote_info["memory_dom0"],
12923
          "free_memory": remote_info["memory_free"],
12924
          "total_disk": remote_info["vg_size"],
12925
          "free_disk": remote_info["vg_free"],
12926
          "total_cpus": remote_info["cpu_total"],
12927
          "i_pri_memory": i_p_mem,
12928
          "i_pri_up_memory": i_p_up_mem,
12929
          }
12930
        pnr_dyn.update(node_results[nname])
12931
        node_results[nname] = pnr_dyn
12932

    
12933
    return node_results
12934

    
12935
  @staticmethod
12936
  def _ComputeInstanceData(cluster_info, i_list):
12937
    """Compute global instance data.
12938

12939
    """
12940
    instance_data = {}
12941
    for iinfo, beinfo in i_list:
12942
      nic_data = []
12943
      for nic in iinfo.nics:
12944
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12945
        nic_dict = {
12946
          "mac": nic.mac,
12947
          "ip": nic.ip,
12948
          "mode": filled_params[constants.NIC_MODE],
12949
          "link": filled_params[constants.NIC_LINK],
12950
          }
12951
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12952
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12953
        nic_data.append(nic_dict)
12954
      pir = {
12955
        "tags": list(iinfo.GetTags()),
12956
        "admin_up": iinfo.admin_up,
12957
        "vcpus": beinfo[constants.BE_VCPUS],
12958
        "memory": beinfo[constants.BE_MEMORY],
12959
        "os": iinfo.os,
12960
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12961
        "nics": nic_data,
12962
        "disks": [{constants.IDISK_SIZE: dsk.size,
12963
                   constants.IDISK_MODE: dsk.mode}
12964
                  for dsk in iinfo.disks],
12965
        "disk_template": iinfo.disk_template,
12966
        "hypervisor": iinfo.hypervisor,
12967
        }
12968
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12969
                                                 pir["disks"])
12970
      instance_data[iinfo.name] = pir
12971

    
12972
    return instance_data
12973

    
12974
  def _AddNewInstance(self):
12975
    """Add new instance data to allocator structure.
12976

12977
    This in combination with _AllocatorGetClusterData will create the
12978
    correct structure needed as input for the allocator.
12979

12980
    The checks for the completeness of the opcode must have already been
12981
    done.
12982

12983
    """
12984
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12985

    
12986
    if self.disk_template in constants.DTS_INT_MIRROR:
12987
      self.required_nodes = 2
12988
    else:
12989
      self.required_nodes = 1
12990

    
12991
    request = {
12992
      "name": self.name,
12993
      "disk_template": self.disk_template,
12994
      "tags": self.tags,
12995
      "os": self.os,
12996
      "vcpus": self.vcpus,
12997
      "memory": self.memory,
12998
      "disks": self.disks,
12999
      "disk_space_total": disk_space,
13000
      "nics": self.nics,
13001
      "required_nodes": self.required_nodes,
13002
      "hypervisor": self.hypervisor,
13003
      }
13004

    
13005
    return request
13006

    
13007
  def _AddRelocateInstance(self):
13008
    """Add relocate instance data to allocator structure.
13009

13010
    This in combination with _IAllocatorGetClusterData will create the
13011
    correct structure needed as input for the allocator.
13012

13013
    The checks for the completeness of the opcode must have already been
13014
    done.
13015

13016
    """
13017
    instance = self.cfg.GetInstanceInfo(self.name)
13018
    if instance is None:
13019
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13020
                                   " IAllocator" % self.name)
13021

    
13022
    if instance.disk_template not in constants.DTS_MIRRORED:
13023
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13024
                                 errors.ECODE_INVAL)
13025

    
13026
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13027
        len(instance.secondary_nodes) != 1:
13028
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13029
                                 errors.ECODE_STATE)
13030

    
13031
    self.required_nodes = 1
13032
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13033
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13034

    
13035
    request = {
13036
      "name": self.name,
13037
      "disk_space_total": disk_space,
13038
      "required_nodes": self.required_nodes,
13039
      "relocate_from": self.relocate_from,
13040
      }
13041
    return request
13042

    
13043
  def _AddNodeEvacuate(self):
13044
    """Get data for node-evacuate requests.
13045

13046
    """
13047
    return {
13048
      "instances": self.instances,
13049
      "evac_mode": self.evac_mode,
13050
      }
13051

    
13052
  def _AddChangeGroup(self):
13053
    """Get data for node-evacuate requests.
13054

13055
    """
13056
    return {
13057
      "instances": self.instances,
13058
      "target_groups": self.target_groups,
13059
      }
13060

    
13061
  def _BuildInputData(self, fn, keydata):
13062
    """Build input data structures.
13063

13064
    """
13065
    self._ComputeClusterData()
13066

    
13067
    request = fn()
13068
    request["type"] = self.mode
13069
    for keyname, keytype in keydata:
13070
      if keyname not in request:
13071
        raise errors.ProgrammerError("Request parameter %s is missing" %
13072
                                     keyname)
13073
      val = request[keyname]
13074
      if not keytype(val):
13075
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13076
                                     " validation, value %s, expected"
13077
                                     " type %s" % (keyname, val, keytype))
13078
    self.in_data["request"] = request
13079

    
13080
    self.in_text = serializer.Dump(self.in_data)
13081

    
13082
  _STRING_LIST = ht.TListOf(ht.TString)
13083
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13084
     # pylint: disable-msg=E1101
13085
     # Class '...' has no 'OP_ID' member
13086
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13087
                          opcodes.OpInstanceMigrate.OP_ID,
13088
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13089
     })))
13090

    
13091
  _NEVAC_MOVED = \
13092
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13093
                       ht.TItems([ht.TNonEmptyString,
13094
                                  ht.TNonEmptyString,
13095
                                  ht.TListOf(ht.TNonEmptyString),
13096
                                 ])))
13097
  _NEVAC_FAILED = \
13098
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13099
                       ht.TItems([ht.TNonEmptyString,
13100
                                  ht.TMaybeString,
13101
                                 ])))
13102
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13103
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13104

    
13105
  _MODE_DATA = {
13106
    constants.IALLOCATOR_MODE_ALLOC:
13107
      (_AddNewInstance,
13108
       [
13109
        ("name", ht.TString),
13110
        ("memory", ht.TInt),
13111
        ("disks", ht.TListOf(ht.TDict)),
13112
        ("disk_template", ht.TString),
13113
        ("os", ht.TString),
13114
        ("tags", _STRING_LIST),
13115
        ("nics", ht.TListOf(ht.TDict)),
13116
        ("vcpus", ht.TInt),
13117
        ("hypervisor", ht.TString),
13118
        ], ht.TList),
13119
    constants.IALLOCATOR_MODE_RELOC:
13120
      (_AddRelocateInstance,
13121
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13122
       ht.TList),
13123
     constants.IALLOCATOR_MODE_NODE_EVAC:
13124
      (_AddNodeEvacuate, [
13125
        ("instances", _STRING_LIST),
13126
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13127
        ], _NEVAC_RESULT),
13128
     constants.IALLOCATOR_MODE_CHG_GROUP:
13129
      (_AddChangeGroup, [
13130
        ("instances", _STRING_LIST),
13131
        ("target_groups", _STRING_LIST),
13132
        ], _NEVAC_RESULT),
13133
    }
13134

    
13135
  def Run(self, name, validate=True, call_fn=None):
13136
    """Run an instance allocator and return the results.
13137

13138
    """
13139
    if call_fn is None:
13140
      call_fn = self.rpc.call_iallocator_runner
13141

    
13142
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13143
    result.Raise("Failure while running the iallocator script")
13144

    
13145
    self.out_text = result.payload
13146
    if validate:
13147
      self._ValidateResult()
13148

    
13149
  def _ValidateResult(self):
13150
    """Process the allocator results.
13151

13152
    This will process and if successful save the result in
13153
    self.out_data and the other parameters.
13154

13155
    """
13156
    try:
13157
      rdict = serializer.Load(self.out_text)
13158
    except Exception, err:
13159
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13160

    
13161
    if not isinstance(rdict, dict):
13162
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13163

    
13164
    # TODO: remove backwards compatiblity in later versions
13165
    if "nodes" in rdict and "result" not in rdict:
13166
      rdict["result"] = rdict["nodes"]
13167
      del rdict["nodes"]
13168

    
13169
    for key in "success", "info", "result":
13170
      if key not in rdict:
13171
        raise errors.OpExecError("Can't parse iallocator results:"
13172
                                 " missing key '%s'" % key)
13173
      setattr(self, key, rdict[key])
13174

    
13175
    if not self._result_check(self.result):
13176
      raise errors.OpExecError("Iallocator returned invalid result,"
13177
                               " expected %s, got %s" %
13178
                               (self._result_check, self.result),
13179
                               errors.ECODE_INVAL)
13180

    
13181
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13182
      assert self.relocate_from is not None
13183
      assert self.required_nodes == 1
13184

    
13185
      node2group = dict((name, ndata["group"])
13186
                        for (name, ndata) in self.in_data["nodes"].items())
13187

    
13188
      fn = compat.partial(self._NodesToGroups, node2group,
13189
                          self.in_data["nodegroups"])
13190

    
13191
      request_groups = fn(self.relocate_from)
13192
      result_groups = fn(rdict["result"])
13193

    
13194
      if self.success and result_groups != request_groups:
13195
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13196
                                 " differ from original groups (%s)" %
13197
                                 (utils.CommaJoin(result_groups),
13198
                                  utils.CommaJoin(request_groups)))
13199

    
13200
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13201
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13202

    
13203
    self.out_data = rdict
13204

    
13205
  @staticmethod
13206
  def _NodesToGroups(node2group, groups, nodes):
13207
    """Returns a list of unique group names for a list of nodes.
13208

13209
    @type node2group: dict
13210
    @param node2group: Map from node name to group UUID
13211
    @type groups: dict
13212
    @param groups: Group information
13213
    @type nodes: list
13214
    @param nodes: Node names
13215

13216
    """
13217
    result = set()
13218

    
13219
    for node in nodes:
13220
      try:
13221
        group_uuid = node2group[node]
13222
      except KeyError:
13223
        # Ignore unknown node
13224
        pass
13225
      else:
13226
        try:
13227
          group = groups[group_uuid]
13228
        except KeyError:
13229
          # Can't find group, let's use UUID
13230
          group_name = group_uuid
13231
        else:
13232
          group_name = group["name"]
13233

    
13234
        result.add(group_name)
13235

    
13236
    return sorted(result)
13237

    
13238

    
13239
class LUTestAllocator(NoHooksLU):
13240
  """Run allocator tests.
13241

13242
  This LU runs the allocator tests
13243

13244
  """
13245
  def CheckPrereq(self):
13246
    """Check prerequisites.
13247

13248
    This checks the opcode parameters depending on the director and mode test.
13249

13250
    """
13251
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13252
      for attr in ["memory", "disks", "disk_template",
13253
                   "os", "tags", "nics", "vcpus"]:
13254
        if not hasattr(self.op, attr):
13255
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13256
                                     attr, errors.ECODE_INVAL)
13257
      iname = self.cfg.ExpandInstanceName(self.op.name)
13258
      if iname is not None:
13259
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13260
                                   iname, errors.ECODE_EXISTS)
13261
      if not isinstance(self.op.nics, list):
13262
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13263
                                   errors.ECODE_INVAL)
13264
      if not isinstance(self.op.disks, list):
13265
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13266
                                   errors.ECODE_INVAL)
13267
      for row in self.op.disks:
13268
        if (not isinstance(row, dict) or
13269
            constants.IDISK_SIZE not in row or
13270
            not isinstance(row[constants.IDISK_SIZE], int) or
13271
            constants.IDISK_MODE not in row or
13272
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13273
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13274
                                     " parameter", errors.ECODE_INVAL)
13275
      if self.op.hypervisor is None:
13276
        self.op.hypervisor = self.cfg.GetHypervisorType()
13277
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13278
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13279
      self.op.name = fname
13280
      self.relocate_from = \
13281
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13282
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13283
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13284
      if not self.op.instances:
13285
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13286
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13287
    else:
13288
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13289
                                 self.op.mode, errors.ECODE_INVAL)
13290

    
13291
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13292
      if self.op.allocator is None:
13293
        raise errors.OpPrereqError("Missing allocator name",
13294
                                   errors.ECODE_INVAL)
13295
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13296
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13297
                                 self.op.direction, errors.ECODE_INVAL)
13298

    
13299
  def Exec(self, feedback_fn):
13300
    """Run the allocator test.
13301

13302
    """
13303
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13304
      ial = IAllocator(self.cfg, self.rpc,
13305
                       mode=self.op.mode,
13306
                       name=self.op.name,
13307
                       memory=self.op.memory,
13308
                       disks=self.op.disks,
13309
                       disk_template=self.op.disk_template,
13310
                       os=self.op.os,
13311
                       tags=self.op.tags,
13312
                       nics=self.op.nics,
13313
                       vcpus=self.op.vcpus,
13314
                       hypervisor=self.op.hypervisor,
13315
                       )
13316
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13317
      ial = IAllocator(self.cfg, self.rpc,
13318
                       mode=self.op.mode,
13319
                       name=self.op.name,
13320
                       relocate_from=list(self.relocate_from),
13321
                       )
13322
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13323
      ial = IAllocator(self.cfg, self.rpc,
13324
                       mode=self.op.mode,
13325
                       instances=self.op.instances,
13326
                       target_groups=self.op.target_groups)
13327
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13328
      ial = IAllocator(self.cfg, self.rpc,
13329
                       mode=self.op.mode,
13330
                       instances=self.op.instances,
13331
                       evac_mode=self.op.evac_mode)
13332
    else:
13333
      raise errors.ProgrammerError("Uncatched mode %s in"
13334
                                   " LUTestAllocator.Exec", self.op.mode)
13335

    
13336
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13337
      result = ial.in_text
13338
    else:
13339
      ial.Run(self.op.allocator, validate=False)
13340
      result = ial.out_text
13341
    return result
13342

    
13343

    
13344
#: Query type implementations
13345
_QUERY_IMPL = {
13346
  constants.QR_INSTANCE: _InstanceQuery,
13347
  constants.QR_NODE: _NodeQuery,
13348
  constants.QR_GROUP: _GroupQuery,
13349
  constants.QR_OS: _OsQuery,
13350
  }
13351

    
13352
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13353

    
13354

    
13355
def _GetQueryImplementation(name):
13356
  """Returns the implemtnation for a query type.
13357

13358
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13359

13360
  """
13361
  try:
13362
    return _QUERY_IMPL[name]
13363
  except KeyError:
13364
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13365
                               errors.ECODE_INVAL)