Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ a8282327

History | View | Annotate | Download (508.8 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62
from ganeti import rpc
63

    
64
import ganeti.masterd.instance # pylint: disable=W0611
65

    
66

    
67
#: Size of DRBD meta block device
68
DRBD_META_SIZE = 128
69

    
70
# States of instance
71
INSTANCE_UP = [constants.ADMINST_UP]
72
INSTANCE_DOWN = [constants.ADMINST_DOWN]
73
INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74
INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75
INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
76

    
77

    
78
class ResultWithJobs:
79
  """Data container for LU results with jobs.
80

81
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83
  contained in the C{jobs} attribute and include the job IDs in the opcode
84
  result.
85

86
  """
87
  def __init__(self, jobs, **kwargs):
88
    """Initializes this class.
89

90
    Additional return values can be specified as keyword arguments.
91

92
    @type jobs: list of lists of L{opcode.OpCode}
93
    @param jobs: A list of lists of opcode objects
94

95
    """
96
    self.jobs = jobs
97
    self.other = kwargs
98

    
99

    
100
class LogicalUnit(object):
101
  """Logical Unit base class.
102

103
  Subclasses must follow these rules:
104
    - implement ExpandNames
105
    - implement CheckPrereq (except when tasklets are used)
106
    - implement Exec (except when tasklets are used)
107
    - implement BuildHooksEnv
108
    - implement BuildHooksNodes
109
    - redefine HPATH and HTYPE
110
    - optionally redefine their run requirements:
111
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
112

113
  Note that all commands require root permissions.
114

115
  @ivar dry_run_result: the value (if any) that will be returned to the caller
116
      in dry-run mode (signalled by opcode dry_run parameter)
117

118
  """
119
  HPATH = None
120
  HTYPE = None
121
  REQ_BGL = True
122

    
123
  def __init__(self, processor, op, context, rpc_runner):
124
    """Constructor for LogicalUnit.
125

126
    This needs to be overridden in derived classes in order to check op
127
    validity.
128

129
    """
130
    self.proc = processor
131
    self.op = op
132
    self.cfg = context.cfg
133
    self.glm = context.glm
134
    # readability alias
135
    self.owned_locks = context.glm.list_owned
136
    self.context = context
137
    self.rpc = rpc_runner
138
    # Dicts used to declare locking needs to mcpu
139
    self.needed_locks = None
140
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
141
    self.add_locks = {}
142
    self.remove_locks = {}
143
    # Used to force good behavior when calling helper functions
144
    self.recalculate_locks = {}
145
    # logging
146
    self.Log = processor.Log # pylint: disable=C0103
147
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
148
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
149
    self.LogStep = processor.LogStep # pylint: disable=C0103
150
    # support for dry-run
151
    self.dry_run_result = None
152
    # support for generic debug attribute
153
    if (not hasattr(self.op, "debug_level") or
154
        not isinstance(self.op.debug_level, int)):
155
      self.op.debug_level = 0
156

    
157
    # Tasklets
158
    self.tasklets = None
159

    
160
    # Validate opcode parameters and set defaults
161
    self.op.Validate(True)
162

    
163
    self.CheckArguments()
164

    
165
  def CheckArguments(self):
166
    """Check syntactic validity for the opcode arguments.
167

168
    This method is for doing a simple syntactic check and ensure
169
    validity of opcode parameters, without any cluster-related
170
    checks. While the same can be accomplished in ExpandNames and/or
171
    CheckPrereq, doing these separate is better because:
172

173
      - ExpandNames is left as as purely a lock-related function
174
      - CheckPrereq is run after we have acquired locks (and possible
175
        waited for them)
176

177
    The function is allowed to change the self.op attribute so that
178
    later methods can no longer worry about missing parameters.
179

180
    """
181
    pass
182

    
183
  def ExpandNames(self):
184
    """Expand names for this LU.
185

186
    This method is called before starting to execute the opcode, and it should
187
    update all the parameters of the opcode to their canonical form (e.g. a
188
    short node name must be fully expanded after this method has successfully
189
    completed). This way locking, hooks, logging, etc. can work correctly.
190

191
    LUs which implement this method must also populate the self.needed_locks
192
    member, as a dict with lock levels as keys, and a list of needed lock names
193
    as values. Rules:
194

195
      - use an empty dict if you don't need any lock
196
      - if you don't need any lock at a particular level omit that level
197
      - don't put anything for the BGL level
198
      - if you want all locks at a level use locking.ALL_SET as a value
199

200
    If you need to share locks (rather than acquire them exclusively) at one
201
    level you can modify self.share_locks, setting a true value (usually 1) for
202
    that level. By default locks are not shared.
203

204
    This function can also define a list of tasklets, which then will be
205
    executed in order instead of the usual LU-level CheckPrereq and Exec
206
    functions, if those are not defined by the LU.
207

208
    Examples::
209

210
      # Acquire all nodes and one instance
211
      self.needed_locks = {
212
        locking.LEVEL_NODE: locking.ALL_SET,
213
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
214
      }
215
      # Acquire just two nodes
216
      self.needed_locks = {
217
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
218
      }
219
      # Acquire no locks
220
      self.needed_locks = {} # No, you can't leave it to the default value None
221

222
    """
223
    # The implementation of this method is mandatory only if the new LU is
224
    # concurrent, so that old LUs don't need to be changed all at the same
225
    # time.
226
    if self.REQ_BGL:
227
      self.needed_locks = {} # Exclusive LUs don't need locks.
228
    else:
229
      raise NotImplementedError
230

    
231
  def DeclareLocks(self, level):
232
    """Declare LU locking needs for a level
233

234
    While most LUs can just declare their locking needs at ExpandNames time,
235
    sometimes there's the need to calculate some locks after having acquired
236
    the ones before. This function is called just before acquiring locks at a
237
    particular level, but after acquiring the ones at lower levels, and permits
238
    such calculations. It can be used to modify self.needed_locks, and by
239
    default it does nothing.
240

241
    This function is only called if you have something already set in
242
    self.needed_locks for the level.
243

244
    @param level: Locking level which is going to be locked
245
    @type level: member of ganeti.locking.LEVELS
246

247
    """
248

    
249
  def CheckPrereq(self):
250
    """Check prerequisites for this LU.
251

252
    This method should check that the prerequisites for the execution
253
    of this LU are fulfilled. It can do internode communication, but
254
    it should be idempotent - no cluster or system changes are
255
    allowed.
256

257
    The method should raise errors.OpPrereqError in case something is
258
    not fulfilled. Its return value is ignored.
259

260
    This method should also update all the parameters of the opcode to
261
    their canonical form if it hasn't been done by ExpandNames before.
262

263
    """
264
    if self.tasklets is not None:
265
      for (idx, tl) in enumerate(self.tasklets):
266
        logging.debug("Checking prerequisites for tasklet %s/%s",
267
                      idx + 1, len(self.tasklets))
268
        tl.CheckPrereq()
269
    else:
270
      pass
271

    
272
  def Exec(self, feedback_fn):
273
    """Execute the LU.
274

275
    This method should implement the actual work. It should raise
276
    errors.OpExecError for failures that are somewhat dealt with in
277
    code, or expected.
278

279
    """
280
    if self.tasklets is not None:
281
      for (idx, tl) in enumerate(self.tasklets):
282
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
283
        tl.Exec(feedback_fn)
284
    else:
285
      raise NotImplementedError
286

    
287
  def BuildHooksEnv(self):
288
    """Build hooks environment for this LU.
289

290
    @rtype: dict
291
    @return: Dictionary containing the environment that will be used for
292
      running the hooks for this LU. The keys of the dict must not be prefixed
293
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294
      will extend the environment with additional variables. If no environment
295
      should be defined, an empty dictionary should be returned (not C{None}).
296
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
297
      will not be called.
298

299
    """
300
    raise NotImplementedError
301

    
302
  def BuildHooksNodes(self):
303
    """Build list of nodes to run LU's hooks.
304

305
    @rtype: tuple; (list, list)
306
    @return: Tuple containing a list of node names on which the hook
307
      should run before the execution and a list of node names on which the
308
      hook should run after the execution. No nodes should be returned as an
309
      empty list (and not None).
310
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
311
      will not be called.
312

313
    """
314
    raise NotImplementedError
315

    
316
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317
    """Notify the LU about the results of its hooks.
318

319
    This method is called every time a hooks phase is executed, and notifies
320
    the Logical Unit about the hooks' result. The LU can then use it to alter
321
    its result based on the hooks.  By default the method does nothing and the
322
    previous result is passed back unchanged but any LU can define it if it
323
    wants to use the local cluster hook-scripts somehow.
324

325
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
326
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327
    @param hook_results: the results of the multi-node hooks rpc call
328
    @param feedback_fn: function used send feedback back to the caller
329
    @param lu_result: the previous Exec result this LU had, or None
330
        in the PRE phase
331
    @return: the new Exec result, based on the previous result
332
        and hook results
333

334
    """
335
    # API must be kept, thus we ignore the unused argument and could
336
    # be a function warnings
337
    # pylint: disable=W0613,R0201
338
    return lu_result
339

    
340
  def _ExpandAndLockInstance(self):
341
    """Helper function to expand and lock an instance.
342

343
    Many LUs that work on an instance take its name in self.op.instance_name
344
    and need to expand it and then declare the expanded name for locking. This
345
    function does it, and then updates self.op.instance_name to the expanded
346
    name. It also initializes needed_locks as a dict, if this hasn't been done
347
    before.
348

349
    """
350
    if self.needed_locks is None:
351
      self.needed_locks = {}
352
    else:
353
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354
        "_ExpandAndLockInstance called with instance-level locks set"
355
    self.op.instance_name = _ExpandInstanceName(self.cfg,
356
                                                self.op.instance_name)
357
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
358

    
359
  def _LockInstancesNodes(self, primary_only=False,
360
                          level=locking.LEVEL_NODE):
361
    """Helper function to declare instances' nodes for locking.
362

363
    This function should be called after locking one or more instances to lock
364
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365
    with all primary or secondary nodes for instances already locked and
366
    present in self.needed_locks[locking.LEVEL_INSTANCE].
367

368
    It should be called from DeclareLocks, and for safety only works if
369
    self.recalculate_locks[locking.LEVEL_NODE] is set.
370

371
    In the future it may grow parameters to just lock some instance's nodes, or
372
    to just lock primaries or secondary nodes, if needed.
373

374
    If should be called in DeclareLocks in a way similar to::
375

376
      if level == locking.LEVEL_NODE:
377
        self._LockInstancesNodes()
378

379
    @type primary_only: boolean
380
    @param primary_only: only lock primary nodes of locked instances
381
    @param level: Which lock level to use for locking nodes
382

383
    """
384
    assert level in self.recalculate_locks, \
385
      "_LockInstancesNodes helper function called with no nodes to recalculate"
386

    
387
    # TODO: check if we're really been called with the instance locks held
388

    
389
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390
    # future we might want to have different behaviors depending on the value
391
    # of self.recalculate_locks[locking.LEVEL_NODE]
392
    wanted_nodes = []
393
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395
      wanted_nodes.append(instance.primary_node)
396
      if not primary_only:
397
        wanted_nodes.extend(instance.secondary_nodes)
398

    
399
    if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400
      self.needed_locks[level] = wanted_nodes
401
    elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402
      self.needed_locks[level].extend(wanted_nodes)
403
    else:
404
      raise errors.ProgrammerError("Unknown recalculation mode")
405

    
406
    del self.recalculate_locks[level]
407

    
408

    
409
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410
  """Simple LU which runs no hooks.
411

412
  This LU is intended as a parent for other LogicalUnits which will
413
  run no hooks, in order to reduce duplicate code.
414

415
  """
416
  HPATH = None
417
  HTYPE = None
418

    
419
  def BuildHooksEnv(self):
420
    """Empty BuildHooksEnv for NoHooksLu.
421

422
    This just raises an error.
423

424
    """
425
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
426

    
427
  def BuildHooksNodes(self):
428
    """Empty BuildHooksNodes for NoHooksLU.
429

430
    """
431
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
432

    
433

    
434
class Tasklet:
435
  """Tasklet base class.
436

437
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
439
  tasklets know nothing about locks.
440

441
  Subclasses must follow these rules:
442
    - Implement CheckPrereq
443
    - Implement Exec
444

445
  """
446
  def __init__(self, lu):
447
    self.lu = lu
448

    
449
    # Shortcuts
450
    self.cfg = lu.cfg
451
    self.rpc = lu.rpc
452

    
453
  def CheckPrereq(self):
454
    """Check prerequisites for this tasklets.
455

456
    This method should check whether the prerequisites for the execution of
457
    this tasklet are fulfilled. It can do internode communication, but it
458
    should be idempotent - no cluster or system changes are allowed.
459

460
    The method should raise errors.OpPrereqError in case something is not
461
    fulfilled. Its return value is ignored.
462

463
    This method should also update all parameters to their canonical form if it
464
    hasn't been done before.
465

466
    """
467
    pass
468

    
469
  def Exec(self, feedback_fn):
470
    """Execute the tasklet.
471

472
    This method should implement the actual work. It should raise
473
    errors.OpExecError for failures that are somewhat dealt with in code, or
474
    expected.
475

476
    """
477
    raise NotImplementedError
478

    
479

    
480
class _QueryBase:
481
  """Base for query utility classes.
482

483
  """
484
  #: Attribute holding field definitions
485
  FIELDS = None
486

    
487
  def __init__(self, qfilter, fields, use_locking):
488
    """Initializes this class.
489

490
    """
491
    self.use_locking = use_locking
492

    
493
    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
494
                             namefield="name")
495
    self.requested_data = self.query.RequestedData()
496
    self.names = self.query.RequestedNames()
497

    
498
    # Sort only if no names were requested
499
    self.sort_by_name = not self.names
500

    
501
    self.do_locking = None
502
    self.wanted = None
503

    
504
  def _GetNames(self, lu, all_names, lock_level):
505
    """Helper function to determine names asked for in the query.
506

507
    """
508
    if self.do_locking:
509
      names = lu.owned_locks(lock_level)
510
    else:
511
      names = all_names
512

    
513
    if self.wanted == locking.ALL_SET:
514
      assert not self.names
515
      # caller didn't specify names, so ordering is not important
516
      return utils.NiceSort(names)
517

    
518
    # caller specified names and we must keep the same order
519
    assert self.names
520
    assert not self.do_locking or lu.glm.is_owned(lock_level)
521

    
522
    missing = set(self.wanted).difference(names)
523
    if missing:
524
      raise errors.OpExecError("Some items were removed before retrieving"
525
                               " their data: %s" % missing)
526

    
527
    # Return expanded names
528
    return self.wanted
529

    
530
  def ExpandNames(self, lu):
531
    """Expand names for this query.
532

533
    See L{LogicalUnit.ExpandNames}.
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def DeclareLocks(self, lu, level):
539
    """Declare locks for this query.
540

541
    See L{LogicalUnit.DeclareLocks}.
542

543
    """
544
    raise NotImplementedError()
545

    
546
  def _GetQueryData(self, lu):
547
    """Collects all data for this query.
548

549
    @return: Query data object
550

551
    """
552
    raise NotImplementedError()
553

    
554
  def NewStyleQuery(self, lu):
555
    """Collect data and execute query.
556

557
    """
558
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559
                                  sort_by_name=self.sort_by_name)
560

    
561
  def OldStyleQuery(self, lu):
562
    """Collect data and execute query.
563

564
    """
565
    return self.query.OldStyleQuery(self._GetQueryData(lu),
566
                                    sort_by_name=self.sort_by_name)
567

    
568

    
569
def _ShareAll():
570
  """Returns a dict declaring all lock levels shared.
571

572
  """
573
  return dict.fromkeys(locking.LEVELS, 1)
574

    
575

    
576
def _MakeLegacyNodeInfo(data):
577
  """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
578

579
  Converts the data into a single dictionary. This is fine for most use cases,
580
  but some require information from more than one volume group or hypervisor.
581

582
  """
583
  (bootid, (vg_info, ), (hv_info, )) = data
584

    
585
  return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
586
    "bootid": bootid,
587
    })
588

    
589

    
590
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591
  """Checks if the owned node groups are still correct for an instance.
592

593
  @type cfg: L{config.ConfigWriter}
594
  @param cfg: The cluster configuration
595
  @type instance_name: string
596
  @param instance_name: Instance name
597
  @type owned_groups: set or frozenset
598
  @param owned_groups: List of currently owned node groups
599

600
  """
601
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
602

    
603
  if not owned_groups.issuperset(inst_groups):
604
    raise errors.OpPrereqError("Instance %s's node groups changed since"
605
                               " locks were acquired, current groups are"
606
                               " are '%s', owning groups '%s'; retry the"
607
                               " operation" %
608
                               (instance_name,
609
                                utils.CommaJoin(inst_groups),
610
                                utils.CommaJoin(owned_groups)),
611
                               errors.ECODE_STATE)
612

    
613
  return inst_groups
614

    
615

    
616
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617
  """Checks if the instances in a node group are still correct.
618

619
  @type cfg: L{config.ConfigWriter}
620
  @param cfg: The cluster configuration
621
  @type group_uuid: string
622
  @param group_uuid: Node group UUID
623
  @type owned_instances: set or frozenset
624
  @param owned_instances: List of currently owned instances
625

626
  """
627
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628
  if owned_instances != wanted_instances:
629
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
630
                               " locks were acquired, wanted '%s', have '%s';"
631
                               " retry the operation" %
632
                               (group_uuid,
633
                                utils.CommaJoin(wanted_instances),
634
                                utils.CommaJoin(owned_instances)),
635
                               errors.ECODE_STATE)
636

    
637
  return wanted_instances
638

    
639

    
640
def _SupportsOob(cfg, node):
641
  """Tells if node supports OOB.
642

643
  @type cfg: L{config.ConfigWriter}
644
  @param cfg: The cluster configuration
645
  @type node: L{objects.Node}
646
  @param node: The node
647
  @return: The OOB script if supported or an empty string otherwise
648

649
  """
650
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
651

    
652

    
653
def _GetWantedNodes(lu, nodes):
654
  """Returns list of checked and expanded node names.
655

656
  @type lu: L{LogicalUnit}
657
  @param lu: the logical unit on whose behalf we execute
658
  @type nodes: list
659
  @param nodes: list of node names or None for all nodes
660
  @rtype: list
661
  @return: the list of nodes, sorted
662
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
663

664
  """
665
  if nodes:
666
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
667

    
668
  return utils.NiceSort(lu.cfg.GetNodeList())
669

    
670

    
671
def _GetWantedInstances(lu, instances):
672
  """Returns list of checked and expanded instance names.
673

674
  @type lu: L{LogicalUnit}
675
  @param lu: the logical unit on whose behalf we execute
676
  @type instances: list
677
  @param instances: list of instance names or None for all instances
678
  @rtype: list
679
  @return: the list of instances, sorted
680
  @raise errors.OpPrereqError: if the instances parameter is wrong type
681
  @raise errors.OpPrereqError: if any of the passed instances is not found
682

683
  """
684
  if instances:
685
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
686
  else:
687
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
688
  return wanted
689

    
690

    
691
def _GetUpdatedParams(old_params, update_dict,
692
                      use_default=True, use_none=False):
693
  """Return the new version of a parameter dictionary.
694

695
  @type old_params: dict
696
  @param old_params: old parameters
697
  @type update_dict: dict
698
  @param update_dict: dict containing new parameter values, or
699
      constants.VALUE_DEFAULT to reset the parameter to its default
700
      value
701
  @param use_default: boolean
702
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703
      values as 'to be deleted' values
704
  @param use_none: boolean
705
  @type use_none: whether to recognise C{None} values as 'to be
706
      deleted' values
707
  @rtype: dict
708
  @return: the new parameter dictionary
709

710
  """
711
  params_copy = copy.deepcopy(old_params)
712
  for key, val in update_dict.iteritems():
713
    if ((use_default and val == constants.VALUE_DEFAULT) or
714
        (use_none and val is None)):
715
      try:
716
        del params_copy[key]
717
      except KeyError:
718
        pass
719
    else:
720
      params_copy[key] = val
721
  return params_copy
722

    
723

    
724
def _UpdateAndVerifySubDict(base, updates, type_check):
725
  """Updates and verifies a dict with sub dicts of the same type.
726

727
  @param base: The dict with the old data
728
  @param updates: The dict with the new data
729
  @param type_check: Dict suitable to ForceDictType to verify correct types
730
  @returns: A new dict with updated and verified values
731

732
  """
733
  def fn(old, value):
734
    new = _GetUpdatedParams(old, value)
735
    utils.ForceDictType(new, type_check)
736
    return new
737

    
738
  ret = copy.deepcopy(base)
739
  ret.update(dict((key, fn(base.get(key, {}), value))
740
                  for key, value in updates.items()))
741
  return ret
742

    
743

    
744
def _MergeAndVerifyHvState(op_input, obj_input):
745
  """Combines the hv state from an opcode with the one of the object
746

747
  @param op_input: The input dict from the opcode
748
  @param obj_input: The input dict from the objects
749
  @return: The verified and updated dict
750

751
  """
752
  if op_input:
753
    invalid_hvs = set(op_input) - constants.HYPER_TYPES
754
    if invalid_hvs:
755
      raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
756
                                 " %s" % utils.CommaJoin(invalid_hvs),
757
                                 errors.ECODE_INVAL)
758
    if obj_input is None:
759
      obj_input = {}
760
    type_check = constants.HVSTS_PARAMETER_TYPES
761
    return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
762

    
763
  return None
764

    
765

    
766
def _MergeAndVerifyDiskState(op_input, obj_input):
767
  """Combines the disk state from an opcode with the one of the object
768

769
  @param op_input: The input dict from the opcode
770
  @param obj_input: The input dict from the objects
771
  @return: The verified and updated dict
772
  """
773
  if op_input:
774
    invalid_dst = set(op_input) - constants.DS_VALID_TYPES
775
    if invalid_dst:
776
      raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
777
                                 utils.CommaJoin(invalid_dst),
778
                                 errors.ECODE_INVAL)
779
    type_check = constants.DSS_PARAMETER_TYPES
780
    if obj_input is None:
781
      obj_input = {}
782
    return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
783
                                              type_check))
784
                for key, value in op_input.items())
785

    
786
  return None
787

    
788

    
789
def _ReleaseLocks(lu, level, names=None, keep=None):
790
  """Releases locks owned by an LU.
791

792
  @type lu: L{LogicalUnit}
793
  @param level: Lock level
794
  @type names: list or None
795
  @param names: Names of locks to release
796
  @type keep: list or None
797
  @param keep: Names of locks to retain
798

799
  """
800
  assert not (keep is not None and names is not None), \
801
         "Only one of the 'names' and the 'keep' parameters can be given"
802

    
803
  if names is not None:
804
    should_release = names.__contains__
805
  elif keep:
806
    should_release = lambda name: name not in keep
807
  else:
808
    should_release = None
809

    
810
  owned = lu.owned_locks(level)
811
  if not owned:
812
    # Not owning any lock at this level, do nothing
813
    pass
814

    
815
  elif should_release:
816
    retain = []
817
    release = []
818

    
819
    # Determine which locks to release
820
    for name in owned:
821
      if should_release(name):
822
        release.append(name)
823
      else:
824
        retain.append(name)
825

    
826
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
827

    
828
    # Release just some locks
829
    lu.glm.release(level, names=release)
830

    
831
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
832
  else:
833
    # Release everything
834
    lu.glm.release(level)
835

    
836
    assert not lu.glm.is_owned(level), "No locks should be owned"
837

    
838

    
839
def _MapInstanceDisksToNodes(instances):
840
  """Creates a map from (node, volume) to instance name.
841

842
  @type instances: list of L{objects.Instance}
843
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
844

845
  """
846
  return dict(((node, vol), inst.name)
847
              for inst in instances
848
              for (node, vols) in inst.MapLVsByNode().items()
849
              for vol in vols)
850

    
851

    
852
def _RunPostHook(lu, node_name):
853
  """Runs the post-hook for an opcode on a single node.
854

855
  """
856
  hm = lu.proc.BuildHooksManager(lu)
857
  try:
858
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
859
  except:
860
    # pylint: disable=W0702
861
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
862

    
863

    
864
def _CheckOutputFields(static, dynamic, selected):
865
  """Checks whether all selected fields are valid.
866

867
  @type static: L{utils.FieldSet}
868
  @param static: static fields set
869
  @type dynamic: L{utils.FieldSet}
870
  @param dynamic: dynamic fields set
871

872
  """
873
  f = utils.FieldSet()
874
  f.Extend(static)
875
  f.Extend(dynamic)
876

    
877
  delta = f.NonMatching(selected)
878
  if delta:
879
    raise errors.OpPrereqError("Unknown output fields selected: %s"
880
                               % ",".join(delta), errors.ECODE_INVAL)
881

    
882

    
883
def _CheckGlobalHvParams(params):
884
  """Validates that given hypervisor params are not global ones.
885

886
  This will ensure that instances don't get customised versions of
887
  global params.
888

889
  """
890
  used_globals = constants.HVC_GLOBALS.intersection(params)
891
  if used_globals:
892
    msg = ("The following hypervisor parameters are global and cannot"
893
           " be customized at instance level, please modify them at"
894
           " cluster level: %s" % utils.CommaJoin(used_globals))
895
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
896

    
897

    
898
def _CheckNodeOnline(lu, node, msg=None):
899
  """Ensure that a given node is online.
900

901
  @param lu: the LU on behalf of which we make the check
902
  @param node: the node to check
903
  @param msg: if passed, should be a message to replace the default one
904
  @raise errors.OpPrereqError: if the node is offline
905

906
  """
907
  if msg is None:
908
    msg = "Can't use offline node"
909
  if lu.cfg.GetNodeInfo(node).offline:
910
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
911

    
912

    
913
def _CheckNodeNotDrained(lu, node):
914
  """Ensure that a given node is not drained.
915

916
  @param lu: the LU on behalf of which we make the check
917
  @param node: the node to check
918
  @raise errors.OpPrereqError: if the node is drained
919

920
  """
921
  if lu.cfg.GetNodeInfo(node).drained:
922
    raise errors.OpPrereqError("Can't use drained node %s" % node,
923
                               errors.ECODE_STATE)
924

    
925

    
926
def _CheckNodeVmCapable(lu, node):
927
  """Ensure that a given node is vm capable.
928

929
  @param lu: the LU on behalf of which we make the check
930
  @param node: the node to check
931
  @raise errors.OpPrereqError: if the node is not vm capable
932

933
  """
934
  if not lu.cfg.GetNodeInfo(node).vm_capable:
935
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
936
                               errors.ECODE_STATE)
937

    
938

    
939
def _CheckNodeHasOS(lu, node, os_name, force_variant):
940
  """Ensure that a node supports a given OS.
941

942
  @param lu: the LU on behalf of which we make the check
943
  @param node: the node to check
944
  @param os_name: the OS to query about
945
  @param force_variant: whether to ignore variant errors
946
  @raise errors.OpPrereqError: if the node is not supporting the OS
947

948
  """
949
  result = lu.rpc.call_os_get(node, os_name)
950
  result.Raise("OS '%s' not in supported OS list for node %s" %
951
               (os_name, node),
952
               prereq=True, ecode=errors.ECODE_INVAL)
953
  if not force_variant:
954
    _CheckOSVariant(result.payload, os_name)
955

    
956

    
957
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
958
  """Ensure that a node has the given secondary ip.
959

960
  @type lu: L{LogicalUnit}
961
  @param lu: the LU on behalf of which we make the check
962
  @type node: string
963
  @param node: the node to check
964
  @type secondary_ip: string
965
  @param secondary_ip: the ip to check
966
  @type prereq: boolean
967
  @param prereq: whether to throw a prerequisite or an execute error
968
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
969
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
970

971
  """
972
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
973
  result.Raise("Failure checking secondary ip on node %s" % node,
974
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
975
  if not result.payload:
976
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
977
           " please fix and re-run this command" % secondary_ip)
978
    if prereq:
979
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
980
    else:
981
      raise errors.OpExecError(msg)
982

    
983

    
984
def _GetClusterDomainSecret():
985
  """Reads the cluster domain secret.
986

987
  """
988
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
989
                               strict=True)
990

    
991

    
992
def _CheckInstanceState(lu, instance, req_states, msg=None):
993
  """Ensure that an instance is in one of the required states.
994

995
  @param lu: the LU on behalf of which we make the check
996
  @param instance: the instance to check
997
  @param msg: if passed, should be a message to replace the default one
998
  @raise errors.OpPrereqError: if the instance is not in the required state
999

1000
  """
1001
  if msg is None:
1002
    msg = "can't use instance from outside %s states" % ", ".join(req_states)
1003
  if instance.admin_state not in req_states:
1004
    raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
1005
                               (instance, instance.admin_state, msg),
1006
                               errors.ECODE_STATE)
1007

    
1008
  if constants.ADMINST_UP not in req_states:
1009
    pnode = instance.primary_node
1010
    ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1011
    ins_l.Raise("Can't contact node %s for instance information" % pnode,
1012
                prereq=True, ecode=errors.ECODE_ENVIRON)
1013

    
1014
    if instance.name in ins_l.payload:
1015
      raise errors.OpPrereqError("Instance %s is running, %s" %
1016
                                 (instance.name, msg), errors.ECODE_STATE)
1017

    
1018

    
1019
def _ExpandItemName(fn, name, kind):
1020
  """Expand an item name.
1021

1022
  @param fn: the function to use for expansion
1023
  @param name: requested item name
1024
  @param kind: text description ('Node' or 'Instance')
1025
  @return: the resolved (full) name
1026
  @raise errors.OpPrereqError: if the item is not found
1027

1028
  """
1029
  full_name = fn(name)
1030
  if full_name is None:
1031
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1032
                               errors.ECODE_NOENT)
1033
  return full_name
1034

    
1035

    
1036
def _ExpandNodeName(cfg, name):
1037
  """Wrapper over L{_ExpandItemName} for nodes."""
1038
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1039

    
1040

    
1041
def _ExpandInstanceName(cfg, name):
1042
  """Wrapper over L{_ExpandItemName} for instance."""
1043
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1044

    
1045

    
1046
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1047
                          minmem, maxmem, vcpus, nics, disk_template, disks,
1048
                          bep, hvp, hypervisor_name, tags):
1049
  """Builds instance related env variables for hooks
1050

1051
  This builds the hook environment from individual variables.
1052

1053
  @type name: string
1054
  @param name: the name of the instance
1055
  @type primary_node: string
1056
  @param primary_node: the name of the instance's primary node
1057
  @type secondary_nodes: list
1058
  @param secondary_nodes: list of secondary nodes as strings
1059
  @type os_type: string
1060
  @param os_type: the name of the instance's OS
1061
  @type status: string
1062
  @param status: the desired status of the instance
1063
  @type minmem: string
1064
  @param minmem: the minimum memory size of the instance
1065
  @type maxmem: string
1066
  @param maxmem: the maximum memory size of the instance
1067
  @type vcpus: string
1068
  @param vcpus: the count of VCPUs the instance has
1069
  @type nics: list
1070
  @param nics: list of tuples (ip, mac, mode, link) representing
1071
      the NICs the instance has
1072
  @type disk_template: string
1073
  @param disk_template: the disk template of the instance
1074
  @type disks: list
1075
  @param disks: the list of (size, mode) pairs
1076
  @type bep: dict
1077
  @param bep: the backend parameters for the instance
1078
  @type hvp: dict
1079
  @param hvp: the hypervisor parameters for the instance
1080
  @type hypervisor_name: string
1081
  @param hypervisor_name: the hypervisor for the instance
1082
  @type tags: list
1083
  @param tags: list of instance tags as strings
1084
  @rtype: dict
1085
  @return: the hook environment for this instance
1086

1087
  """
1088
  env = {
1089
    "OP_TARGET": name,
1090
    "INSTANCE_NAME": name,
1091
    "INSTANCE_PRIMARY": primary_node,
1092
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1093
    "INSTANCE_OS_TYPE": os_type,
1094
    "INSTANCE_STATUS": status,
1095
    "INSTANCE_MINMEM": minmem,
1096
    "INSTANCE_MAXMEM": maxmem,
1097
    # TODO(2.7) remove deprecated "memory" value
1098
    "INSTANCE_MEMORY": maxmem,
1099
    "INSTANCE_VCPUS": vcpus,
1100
    "INSTANCE_DISK_TEMPLATE": disk_template,
1101
    "INSTANCE_HYPERVISOR": hypervisor_name,
1102
  }
1103
  if nics:
1104
    nic_count = len(nics)
1105
    for idx, (ip, mac, mode, link) in enumerate(nics):
1106
      if ip is None:
1107
        ip = ""
1108
      env["INSTANCE_NIC%d_IP" % idx] = ip
1109
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1110
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1111
      env["INSTANCE_NIC%d_LINK" % idx] = link
1112
      if mode == constants.NIC_MODE_BRIDGED:
1113
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1114
  else:
1115
    nic_count = 0
1116

    
1117
  env["INSTANCE_NIC_COUNT"] = nic_count
1118

    
1119
  if disks:
1120
    disk_count = len(disks)
1121
    for idx, (size, mode) in enumerate(disks):
1122
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1123
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1124
  else:
1125
    disk_count = 0
1126

    
1127
  env["INSTANCE_DISK_COUNT"] = disk_count
1128

    
1129
  if not tags:
1130
    tags = []
1131

    
1132
  env["INSTANCE_TAGS"] = " ".join(tags)
1133

    
1134
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1135
    for key, value in source.items():
1136
      env["INSTANCE_%s_%s" % (kind, key)] = value
1137

    
1138
  return env
1139

    
1140

    
1141
def _NICListToTuple(lu, nics):
1142
  """Build a list of nic information tuples.
1143

1144
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1145
  value in LUInstanceQueryData.
1146

1147
  @type lu:  L{LogicalUnit}
1148
  @param lu: the logical unit on whose behalf we execute
1149
  @type nics: list of L{objects.NIC}
1150
  @param nics: list of nics to convert to hooks tuples
1151

1152
  """
1153
  hooks_nics = []
1154
  cluster = lu.cfg.GetClusterInfo()
1155
  for nic in nics:
1156
    ip = nic.ip
1157
    mac = nic.mac
1158
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1159
    mode = filled_params[constants.NIC_MODE]
1160
    link = filled_params[constants.NIC_LINK]
1161
    hooks_nics.append((ip, mac, mode, link))
1162
  return hooks_nics
1163

    
1164

    
1165
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1166
  """Builds instance related env variables for hooks from an object.
1167

1168
  @type lu: L{LogicalUnit}
1169
  @param lu: the logical unit on whose behalf we execute
1170
  @type instance: L{objects.Instance}
1171
  @param instance: the instance for which we should build the
1172
      environment
1173
  @type override: dict
1174
  @param override: dictionary with key/values that will override
1175
      our values
1176
  @rtype: dict
1177
  @return: the hook environment dictionary
1178

1179
  """
1180
  cluster = lu.cfg.GetClusterInfo()
1181
  bep = cluster.FillBE(instance)
1182
  hvp = cluster.FillHV(instance)
1183
  args = {
1184
    "name": instance.name,
1185
    "primary_node": instance.primary_node,
1186
    "secondary_nodes": instance.secondary_nodes,
1187
    "os_type": instance.os,
1188
    "status": instance.admin_state,
1189
    "maxmem": bep[constants.BE_MAXMEM],
1190
    "minmem": bep[constants.BE_MINMEM],
1191
    "vcpus": bep[constants.BE_VCPUS],
1192
    "nics": _NICListToTuple(lu, instance.nics),
1193
    "disk_template": instance.disk_template,
1194
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1195
    "bep": bep,
1196
    "hvp": hvp,
1197
    "hypervisor_name": instance.hypervisor,
1198
    "tags": instance.tags,
1199
  }
1200
  if override:
1201
    args.update(override)
1202
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1203

    
1204

    
1205
def _AdjustCandidatePool(lu, exceptions):
1206
  """Adjust the candidate pool after node operations.
1207

1208
  """
1209
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1210
  if mod_list:
1211
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1212
               utils.CommaJoin(node.name for node in mod_list))
1213
    for name in mod_list:
1214
      lu.context.ReaddNode(name)
1215
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1216
  if mc_now > mc_max:
1217
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1218
               (mc_now, mc_max))
1219

    
1220

    
1221
def _DecideSelfPromotion(lu, exceptions=None):
1222
  """Decide whether I should promote myself as a master candidate.
1223

1224
  """
1225
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1226
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1227
  # the new node will increase mc_max with one, so:
1228
  mc_should = min(mc_should + 1, cp_size)
1229
  return mc_now < mc_should
1230

    
1231

    
1232
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1233
  """Check that the brigdes needed by a list of nics exist.
1234

1235
  """
1236
  cluster = lu.cfg.GetClusterInfo()
1237
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1238
  brlist = [params[constants.NIC_LINK] for params in paramslist
1239
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1240
  if brlist:
1241
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1242
    result.Raise("Error checking bridges on destination node '%s'" %
1243
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1244

    
1245

    
1246
def _CheckInstanceBridgesExist(lu, instance, node=None):
1247
  """Check that the brigdes needed by an instance exist.
1248

1249
  """
1250
  if node is None:
1251
    node = instance.primary_node
1252
  _CheckNicsBridgesExist(lu, instance.nics, node)
1253

    
1254

    
1255
def _CheckOSVariant(os_obj, name):
1256
  """Check whether an OS name conforms to the os variants specification.
1257

1258
  @type os_obj: L{objects.OS}
1259
  @param os_obj: OS object to check
1260
  @type name: string
1261
  @param name: OS name passed by the user, to check for validity
1262

1263
  """
1264
  variant = objects.OS.GetVariant(name)
1265
  if not os_obj.supported_variants:
1266
    if variant:
1267
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1268
                                 " passed)" % (os_obj.name, variant),
1269
                                 errors.ECODE_INVAL)
1270
    return
1271
  if not variant:
1272
    raise errors.OpPrereqError("OS name must include a variant",
1273
                               errors.ECODE_INVAL)
1274

    
1275
  if variant not in os_obj.supported_variants:
1276
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1277

    
1278

    
1279
def _GetNodeInstancesInner(cfg, fn):
1280
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1281

    
1282

    
1283
def _GetNodeInstances(cfg, node_name):
1284
  """Returns a list of all primary and secondary instances on a node.
1285

1286
  """
1287

    
1288
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1289

    
1290

    
1291
def _GetNodePrimaryInstances(cfg, node_name):
1292
  """Returns primary instances on a node.
1293

1294
  """
1295
  return _GetNodeInstancesInner(cfg,
1296
                                lambda inst: node_name == inst.primary_node)
1297

    
1298

    
1299
def _GetNodeSecondaryInstances(cfg, node_name):
1300
  """Returns secondary instances on a node.
1301

1302
  """
1303
  return _GetNodeInstancesInner(cfg,
1304
                                lambda inst: node_name in inst.secondary_nodes)
1305

    
1306

    
1307
def _GetStorageTypeArgs(cfg, storage_type):
1308
  """Returns the arguments for a storage type.
1309

1310
  """
1311
  # Special case for file storage
1312
  if storage_type == constants.ST_FILE:
1313
    # storage.FileStorage wants a list of storage directories
1314
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1315

    
1316
  return []
1317

    
1318

    
1319
def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1320
  faulty = []
1321

    
1322
  for dev in instance.disks:
1323
    cfg.SetDiskID(dev, node_name)
1324

    
1325
  result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1326
  result.Raise("Failed to get disk status from node %s" % node_name,
1327
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1328

    
1329
  for idx, bdev_status in enumerate(result.payload):
1330
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1331
      faulty.append(idx)
1332

    
1333
  return faulty
1334

    
1335

    
1336
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1337
  """Check the sanity of iallocator and node arguments and use the
1338
  cluster-wide iallocator if appropriate.
1339

1340
  Check that at most one of (iallocator, node) is specified. If none is
1341
  specified, then the LU's opcode's iallocator slot is filled with the
1342
  cluster-wide default iallocator.
1343

1344
  @type iallocator_slot: string
1345
  @param iallocator_slot: the name of the opcode iallocator slot
1346
  @type node_slot: string
1347
  @param node_slot: the name of the opcode target node slot
1348

1349
  """
1350
  node = getattr(lu.op, node_slot, None)
1351
  iallocator = getattr(lu.op, iallocator_slot, None)
1352

    
1353
  if node is not None and iallocator is not None:
1354
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1355
                               errors.ECODE_INVAL)
1356
  elif node is None and iallocator is None:
1357
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1358
    if default_iallocator:
1359
      setattr(lu.op, iallocator_slot, default_iallocator)
1360
    else:
1361
      raise errors.OpPrereqError("No iallocator or node given and no"
1362
                                 " cluster-wide default iallocator found;"
1363
                                 " please specify either an iallocator or a"
1364
                                 " node, or set a cluster-wide default"
1365
                                 " iallocator")
1366

    
1367

    
1368
def _GetDefaultIAllocator(cfg, iallocator):
1369
  """Decides on which iallocator to use.
1370

1371
  @type cfg: L{config.ConfigWriter}
1372
  @param cfg: Cluster configuration object
1373
  @type iallocator: string or None
1374
  @param iallocator: Iallocator specified in opcode
1375
  @rtype: string
1376
  @return: Iallocator name
1377

1378
  """
1379
  if not iallocator:
1380
    # Use default iallocator
1381
    iallocator = cfg.GetDefaultIAllocator()
1382

    
1383
  if not iallocator:
1384
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1385
                               " opcode nor as a cluster-wide default",
1386
                               errors.ECODE_INVAL)
1387

    
1388
  return iallocator
1389

    
1390

    
1391
class LUClusterPostInit(LogicalUnit):
1392
  """Logical unit for running hooks after cluster initialization.
1393

1394
  """
1395
  HPATH = "cluster-init"
1396
  HTYPE = constants.HTYPE_CLUSTER
1397

    
1398
  def BuildHooksEnv(self):
1399
    """Build hooks env.
1400

1401
    """
1402
    return {
1403
      "OP_TARGET": self.cfg.GetClusterName(),
1404
      }
1405

    
1406
  def BuildHooksNodes(self):
1407
    """Build hooks nodes.
1408

1409
    """
1410
    return ([], [self.cfg.GetMasterNode()])
1411

    
1412
  def Exec(self, feedback_fn):
1413
    """Nothing to do.
1414

1415
    """
1416
    return True
1417

    
1418

    
1419
class LUClusterDestroy(LogicalUnit):
1420
  """Logical unit for destroying the cluster.
1421

1422
  """
1423
  HPATH = "cluster-destroy"
1424
  HTYPE = constants.HTYPE_CLUSTER
1425

    
1426
  def BuildHooksEnv(self):
1427
    """Build hooks env.
1428

1429
    """
1430
    return {
1431
      "OP_TARGET": self.cfg.GetClusterName(),
1432
      }
1433

    
1434
  def BuildHooksNodes(self):
1435
    """Build hooks nodes.
1436

1437
    """
1438
    return ([], [])
1439

    
1440
  def CheckPrereq(self):
1441
    """Check prerequisites.
1442

1443
    This checks whether the cluster is empty.
1444

1445
    Any errors are signaled by raising errors.OpPrereqError.
1446

1447
    """
1448
    master = self.cfg.GetMasterNode()
1449

    
1450
    nodelist = self.cfg.GetNodeList()
1451
    if len(nodelist) != 1 or nodelist[0] != master:
1452
      raise errors.OpPrereqError("There are still %d node(s) in"
1453
                                 " this cluster." % (len(nodelist) - 1),
1454
                                 errors.ECODE_INVAL)
1455
    instancelist = self.cfg.GetInstanceList()
1456
    if instancelist:
1457
      raise errors.OpPrereqError("There are still %d instance(s) in"
1458
                                 " this cluster." % len(instancelist),
1459
                                 errors.ECODE_INVAL)
1460

    
1461
  def Exec(self, feedback_fn):
1462
    """Destroys the cluster.
1463

1464
    """
1465
    master_params = self.cfg.GetMasterNetworkParameters()
1466

    
1467
    # Run post hooks on master node before it's removed
1468
    _RunPostHook(self, master_params.name)
1469

    
1470
    ems = self.cfg.GetUseExternalMipScript()
1471
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1472
                                                     master_params, ems)
1473
    result.Raise("Could not disable the master role")
1474

    
1475
    return master_params.name
1476

    
1477

    
1478
def _VerifyCertificate(filename):
1479
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1480

1481
  @type filename: string
1482
  @param filename: Path to PEM file
1483

1484
  """
1485
  try:
1486
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1487
                                           utils.ReadFile(filename))
1488
  except Exception, err: # pylint: disable=W0703
1489
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1490
            "Failed to load X509 certificate %s: %s" % (filename, err))
1491

    
1492
  (errcode, msg) = \
1493
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1494
                                constants.SSL_CERT_EXPIRATION_ERROR)
1495

    
1496
  if msg:
1497
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1498
  else:
1499
    fnamemsg = None
1500

    
1501
  if errcode is None:
1502
    return (None, fnamemsg)
1503
  elif errcode == utils.CERT_WARNING:
1504
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1505
  elif errcode == utils.CERT_ERROR:
1506
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1507

    
1508
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1509

    
1510

    
1511
def _GetAllHypervisorParameters(cluster, instances):
1512
  """Compute the set of all hypervisor parameters.
1513

1514
  @type cluster: L{objects.Cluster}
1515
  @param cluster: the cluster object
1516
  @param instances: list of L{objects.Instance}
1517
  @param instances: additional instances from which to obtain parameters
1518
  @rtype: list of (origin, hypervisor, parameters)
1519
  @return: a list with all parameters found, indicating the hypervisor they
1520
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1521

1522
  """
1523
  hvp_data = []
1524

    
1525
  for hv_name in cluster.enabled_hypervisors:
1526
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1527

    
1528
  for os_name, os_hvp in cluster.os_hvp.items():
1529
    for hv_name, hv_params in os_hvp.items():
1530
      if hv_params:
1531
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1532
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1533

    
1534
  # TODO: collapse identical parameter values in a single one
1535
  for instance in instances:
1536
    if instance.hvparams:
1537
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1538
                       cluster.FillHV(instance)))
1539

    
1540
  return hvp_data
1541

    
1542

    
1543
class _VerifyErrors(object):
1544
  """Mix-in for cluster/group verify LUs.
1545

1546
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1547
  self.op and self._feedback_fn to be available.)
1548

1549
  """
1550

    
1551
  ETYPE_FIELD = "code"
1552
  ETYPE_ERROR = "ERROR"
1553
  ETYPE_WARNING = "WARNING"
1554

    
1555
  def _Error(self, ecode, item, msg, *args, **kwargs):
1556
    """Format an error message.
1557

1558
    Based on the opcode's error_codes parameter, either format a
1559
    parseable error code, or a simpler error string.
1560

1561
    This must be called only from Exec and functions called from Exec.
1562

1563
    """
1564
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1565
    itype, etxt, _ = ecode
1566
    # first complete the msg
1567
    if args:
1568
      msg = msg % args
1569
    # then format the whole message
1570
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1571
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1572
    else:
1573
      if item:
1574
        item = " " + item
1575
      else:
1576
        item = ""
1577
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1578
    # and finally report it via the feedback_fn
1579
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1580

    
1581
  def _ErrorIf(self, cond, ecode, *args, **kwargs):
1582
    """Log an error message if the passed condition is True.
1583

1584
    """
1585
    cond = (bool(cond)
1586
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1587

    
1588
    # If the error code is in the list of ignored errors, demote the error to a
1589
    # warning
1590
    (_, etxt, _) = ecode
1591
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1592
      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1593

    
1594
    if cond:
1595
      self._Error(ecode, *args, **kwargs)
1596

    
1597
    # do not mark the operation as failed for WARN cases only
1598
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1599
      self.bad = self.bad or cond
1600

    
1601

    
1602
class LUClusterVerify(NoHooksLU):
1603
  """Submits all jobs necessary to verify the cluster.
1604

1605
  """
1606
  REQ_BGL = False
1607

    
1608
  def ExpandNames(self):
1609
    self.needed_locks = {}
1610

    
1611
  def Exec(self, feedback_fn):
1612
    jobs = []
1613

    
1614
    if self.op.group_name:
1615
      groups = [self.op.group_name]
1616
      depends_fn = lambda: None
1617
    else:
1618
      groups = self.cfg.GetNodeGroupList()
1619

    
1620
      # Verify global configuration
1621
      jobs.append([
1622
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1623
        ])
1624

    
1625
      # Always depend on global verification
1626
      depends_fn = lambda: [(-len(jobs), [])]
1627

    
1628
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1629
                                            ignore_errors=self.op.ignore_errors,
1630
                                            depends=depends_fn())]
1631
                for group in groups)
1632

    
1633
    # Fix up all parameters
1634
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1635
      op.debug_simulate_errors = self.op.debug_simulate_errors
1636
      op.verbose = self.op.verbose
1637
      op.error_codes = self.op.error_codes
1638
      try:
1639
        op.skip_checks = self.op.skip_checks
1640
      except AttributeError:
1641
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1642

    
1643
    return ResultWithJobs(jobs)
1644

    
1645

    
1646
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1647
  """Verifies the cluster config.
1648

1649
  """
1650
  REQ_BGL = True
1651

    
1652
  def _VerifyHVP(self, hvp_data):
1653
    """Verifies locally the syntax of the hypervisor parameters.
1654

1655
    """
1656
    for item, hv_name, hv_params in hvp_data:
1657
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1658
             (item, hv_name))
1659
      try:
1660
        hv_class = hypervisor.GetHypervisor(hv_name)
1661
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1662
        hv_class.CheckParameterSyntax(hv_params)
1663
      except errors.GenericError, err:
1664
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1665

    
1666
  def ExpandNames(self):
1667
    # Information can be safely retrieved as the BGL is acquired in exclusive
1668
    # mode
1669
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1670
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1671
    self.all_node_info = self.cfg.GetAllNodesInfo()
1672
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1673
    self.needed_locks = {}
1674

    
1675
  def Exec(self, feedback_fn):
1676
    """Verify integrity of cluster, performing various test on nodes.
1677

1678
    """
1679
    self.bad = False
1680
    self._feedback_fn = feedback_fn
1681

    
1682
    feedback_fn("* Verifying cluster config")
1683

    
1684
    for msg in self.cfg.VerifyConfig():
1685
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1686

    
1687
    feedback_fn("* Verifying cluster certificate files")
1688

    
1689
    for cert_filename in constants.ALL_CERT_FILES:
1690
      (errcode, msg) = _VerifyCertificate(cert_filename)
1691
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1692

    
1693
    feedback_fn("* Verifying hypervisor parameters")
1694

    
1695
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1696
                                                self.all_inst_info.values()))
1697

    
1698
    feedback_fn("* Verifying all nodes belong to an existing group")
1699

    
1700
    # We do this verification here because, should this bogus circumstance
1701
    # occur, it would never be caught by VerifyGroup, which only acts on
1702
    # nodes/instances reachable from existing node groups.
1703

    
1704
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1705
                         if node.group not in self.all_group_info)
1706

    
1707
    dangling_instances = {}
1708
    no_node_instances = []
1709

    
1710
    for inst in self.all_inst_info.values():
1711
      if inst.primary_node in dangling_nodes:
1712
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1713
      elif inst.primary_node not in self.all_node_info:
1714
        no_node_instances.append(inst.name)
1715

    
1716
    pretty_dangling = [
1717
        "%s (%s)" %
1718
        (node.name,
1719
         utils.CommaJoin(dangling_instances.get(node.name,
1720
                                                ["no instances"])))
1721
        for node in dangling_nodes]
1722

    
1723
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1724
                  None,
1725
                  "the following nodes (and their instances) belong to a non"
1726
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1727

    
1728
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1729
                  None,
1730
                  "the following instances have a non-existing primary-node:"
1731
                  " %s", utils.CommaJoin(no_node_instances))
1732

    
1733
    return not self.bad
1734

    
1735

    
1736
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1737
  """Verifies the status of a node group.
1738

1739
  """
1740
  HPATH = "cluster-verify"
1741
  HTYPE = constants.HTYPE_CLUSTER
1742
  REQ_BGL = False
1743

    
1744
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1745

    
1746
  class NodeImage(object):
1747
    """A class representing the logical and physical status of a node.
1748

1749
    @type name: string
1750
    @ivar name: the node name to which this object refers
1751
    @ivar volumes: a structure as returned from
1752
        L{ganeti.backend.GetVolumeList} (runtime)
1753
    @ivar instances: a list of running instances (runtime)
1754
    @ivar pinst: list of configured primary instances (config)
1755
    @ivar sinst: list of configured secondary instances (config)
1756
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1757
        instances for which this node is secondary (config)
1758
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1759
    @ivar dfree: free disk, as reported by the node (runtime)
1760
    @ivar offline: the offline status (config)
1761
    @type rpc_fail: boolean
1762
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1763
        not whether the individual keys were correct) (runtime)
1764
    @type lvm_fail: boolean
1765
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1766
    @type hyp_fail: boolean
1767
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1768
    @type ghost: boolean
1769
    @ivar ghost: whether this is a known node or not (config)
1770
    @type os_fail: boolean
1771
    @ivar os_fail: whether the RPC call didn't return valid OS data
1772
    @type oslist: list
1773
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1774
    @type vm_capable: boolean
1775
    @ivar vm_capable: whether the node can host instances
1776

1777
    """
1778
    def __init__(self, offline=False, name=None, vm_capable=True):
1779
      self.name = name
1780
      self.volumes = {}
1781
      self.instances = []
1782
      self.pinst = []
1783
      self.sinst = []
1784
      self.sbp = {}
1785
      self.mfree = 0
1786
      self.dfree = 0
1787
      self.offline = offline
1788
      self.vm_capable = vm_capable
1789
      self.rpc_fail = False
1790
      self.lvm_fail = False
1791
      self.hyp_fail = False
1792
      self.ghost = False
1793
      self.os_fail = False
1794
      self.oslist = {}
1795

    
1796
  def ExpandNames(self):
1797
    # This raises errors.OpPrereqError on its own:
1798
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1799

    
1800
    # Get instances in node group; this is unsafe and needs verification later
1801
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1802

    
1803
    self.needed_locks = {
1804
      locking.LEVEL_INSTANCE: inst_names,
1805
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1806
      locking.LEVEL_NODE: [],
1807
      }
1808

    
1809
    self.share_locks = _ShareAll()
1810

    
1811
  def DeclareLocks(self, level):
1812
    if level == locking.LEVEL_NODE:
1813
      # Get members of node group; this is unsafe and needs verification later
1814
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1815

    
1816
      all_inst_info = self.cfg.GetAllInstancesInfo()
1817

    
1818
      # In Exec(), we warn about mirrored instances that have primary and
1819
      # secondary living in separate node groups. To fully verify that
1820
      # volumes for these instances are healthy, we will need to do an
1821
      # extra call to their secondaries. We ensure here those nodes will
1822
      # be locked.
1823
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1824
        # Important: access only the instances whose lock is owned
1825
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1826
          nodes.update(all_inst_info[inst].secondary_nodes)
1827

    
1828
      self.needed_locks[locking.LEVEL_NODE] = nodes
1829

    
1830
  def CheckPrereq(self):
1831
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1832
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1833

    
1834
    group_nodes = set(self.group_info.members)
1835
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1836

    
1837
    unlocked_nodes = \
1838
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1839

    
1840
    unlocked_instances = \
1841
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1842

    
1843
    if unlocked_nodes:
1844
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1845
                                 utils.CommaJoin(unlocked_nodes))
1846

    
1847
    if unlocked_instances:
1848
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1849
                                 utils.CommaJoin(unlocked_instances))
1850

    
1851
    self.all_node_info = self.cfg.GetAllNodesInfo()
1852
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1853

    
1854
    self.my_node_names = utils.NiceSort(group_nodes)
1855
    self.my_inst_names = utils.NiceSort(group_instances)
1856

    
1857
    self.my_node_info = dict((name, self.all_node_info[name])
1858
                             for name in self.my_node_names)
1859

    
1860
    self.my_inst_info = dict((name, self.all_inst_info[name])
1861
                             for name in self.my_inst_names)
1862

    
1863
    # We detect here the nodes that will need the extra RPC calls for verifying
1864
    # split LV volumes; they should be locked.
1865
    extra_lv_nodes = set()
1866

    
1867
    for inst in self.my_inst_info.values():
1868
      if inst.disk_template in constants.DTS_INT_MIRROR:
1869
        group = self.my_node_info[inst.primary_node].group
1870
        for nname in inst.secondary_nodes:
1871
          if self.all_node_info[nname].group != group:
1872
            extra_lv_nodes.add(nname)
1873

    
1874
    unlocked_lv_nodes = \
1875
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1876

    
1877
    if unlocked_lv_nodes:
1878
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1879
                                 utils.CommaJoin(unlocked_lv_nodes))
1880
    self.extra_lv_nodes = list(extra_lv_nodes)
1881

    
1882
  def _VerifyNode(self, ninfo, nresult):
1883
    """Perform some basic validation on data returned from a node.
1884

1885
      - check the result data structure is well formed and has all the
1886
        mandatory fields
1887
      - check ganeti version
1888

1889
    @type ninfo: L{objects.Node}
1890
    @param ninfo: the node to check
1891
    @param nresult: the results from the node
1892
    @rtype: boolean
1893
    @return: whether overall this call was successful (and we can expect
1894
         reasonable values in the respose)
1895

1896
    """
1897
    node = ninfo.name
1898
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1899

    
1900
    # main result, nresult should be a non-empty dict
1901
    test = not nresult or not isinstance(nresult, dict)
1902
    _ErrorIf(test, constants.CV_ENODERPC, node,
1903
                  "unable to verify node: no data returned")
1904
    if test:
1905
      return False
1906

    
1907
    # compares ganeti version
1908
    local_version = constants.PROTOCOL_VERSION
1909
    remote_version = nresult.get("version", None)
1910
    test = not (remote_version and
1911
                isinstance(remote_version, (list, tuple)) and
1912
                len(remote_version) == 2)
1913
    _ErrorIf(test, constants.CV_ENODERPC, node,
1914
             "connection to node returned invalid data")
1915
    if test:
1916
      return False
1917

    
1918
    test = local_version != remote_version[0]
1919
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1920
             "incompatible protocol versions: master %s,"
1921
             " node %s", local_version, remote_version[0])
1922
    if test:
1923
      return False
1924

    
1925
    # node seems compatible, we can actually try to look into its results
1926

    
1927
    # full package version
1928
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1929
                  constants.CV_ENODEVERSION, node,
1930
                  "software version mismatch: master %s, node %s",
1931
                  constants.RELEASE_VERSION, remote_version[1],
1932
                  code=self.ETYPE_WARNING)
1933

    
1934
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1935
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1936
      for hv_name, hv_result in hyp_result.iteritems():
1937
        test = hv_result is not None
1938
        _ErrorIf(test, constants.CV_ENODEHV, node,
1939
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1940

    
1941
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1942
    if ninfo.vm_capable and isinstance(hvp_result, list):
1943
      for item, hv_name, hv_result in hvp_result:
1944
        _ErrorIf(True, constants.CV_ENODEHV, node,
1945
                 "hypervisor %s parameter verify failure (source %s): %s",
1946
                 hv_name, item, hv_result)
1947

    
1948
    test = nresult.get(constants.NV_NODESETUP,
1949
                       ["Missing NODESETUP results"])
1950
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1951
             "; ".join(test))
1952

    
1953
    return True
1954

    
1955
  def _VerifyNodeTime(self, ninfo, nresult,
1956
                      nvinfo_starttime, nvinfo_endtime):
1957
    """Check the node time.
1958

1959
    @type ninfo: L{objects.Node}
1960
    @param ninfo: the node to check
1961
    @param nresult: the remote results for the node
1962
    @param nvinfo_starttime: the start time of the RPC call
1963
    @param nvinfo_endtime: the end time of the RPC call
1964

1965
    """
1966
    node = ninfo.name
1967
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1968

    
1969
    ntime = nresult.get(constants.NV_TIME, None)
1970
    try:
1971
      ntime_merged = utils.MergeTime(ntime)
1972
    except (ValueError, TypeError):
1973
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1974
      return
1975

    
1976
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1977
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1978
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1979
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1980
    else:
1981
      ntime_diff = None
1982

    
1983
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1984
             "Node time diverges by at least %s from master node time",
1985
             ntime_diff)
1986

    
1987
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1988
    """Check the node LVM results.
1989

1990
    @type ninfo: L{objects.Node}
1991
    @param ninfo: the node to check
1992
    @param nresult: the remote results for the node
1993
    @param vg_name: the configured VG name
1994

1995
    """
1996
    if vg_name is None:
1997
      return
1998

    
1999
    node = ninfo.name
2000
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2001

    
2002
    # checks vg existence and size > 20G
2003
    vglist = nresult.get(constants.NV_VGLIST, None)
2004
    test = not vglist
2005
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2006
    if not test:
2007
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2008
                                            constants.MIN_VG_SIZE)
2009
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2010

    
2011
    # check pv names
2012
    pvlist = nresult.get(constants.NV_PVLIST, None)
2013
    test = pvlist is None
2014
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2015
    if not test:
2016
      # check that ':' is not present in PV names, since it's a
2017
      # special character for lvcreate (denotes the range of PEs to
2018
      # use on the PV)
2019
      for _, pvname, owner_vg in pvlist:
2020
        test = ":" in pvname
2021
        _ErrorIf(test, constants.CV_ENODELVM, node,
2022
                 "Invalid character ':' in PV '%s' of VG '%s'",
2023
                 pvname, owner_vg)
2024

    
2025
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2026
    """Check the node bridges.
2027

2028
    @type ninfo: L{objects.Node}
2029
    @param ninfo: the node to check
2030
    @param nresult: the remote results for the node
2031
    @param bridges: the expected list of bridges
2032

2033
    """
2034
    if not bridges:
2035
      return
2036

    
2037
    node = ninfo.name
2038
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2039

    
2040
    missing = nresult.get(constants.NV_BRIDGES, None)
2041
    test = not isinstance(missing, list)
2042
    _ErrorIf(test, constants.CV_ENODENET, node,
2043
             "did not return valid bridge information")
2044
    if not test:
2045
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2046
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2047

    
2048
  def _VerifyNodeUserScripts(self, ninfo, nresult):
2049
    """Check the results of user scripts presence and executability on the node
2050

2051
    @type ninfo: L{objects.Node}
2052
    @param ninfo: the node to check
2053
    @param nresult: the remote results for the node
2054

2055
    """
2056
    node = ninfo.name
2057

    
2058
    test = not constants.NV_USERSCRIPTS in nresult
2059
    self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2060
                  "did not return user scripts information")
2061

    
2062
    broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2063
    if not test:
2064
      self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2065
                    "user scripts not present or not executable: %s" %
2066
                    utils.CommaJoin(sorted(broken_scripts)))
2067

    
2068
  def _VerifyNodeNetwork(self, ninfo, nresult):
2069
    """Check the node network connectivity results.
2070

2071
    @type ninfo: L{objects.Node}
2072
    @param ninfo: the node to check
2073
    @param nresult: the remote results for the node
2074

2075
    """
2076
    node = ninfo.name
2077
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2078

    
2079
    test = constants.NV_NODELIST not in nresult
2080
    _ErrorIf(test, constants.CV_ENODESSH, node,
2081
             "node hasn't returned node ssh connectivity data")
2082
    if not test:
2083
      if nresult[constants.NV_NODELIST]:
2084
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2085
          _ErrorIf(True, constants.CV_ENODESSH, node,
2086
                   "ssh communication with node '%s': %s", a_node, a_msg)
2087

    
2088
    test = constants.NV_NODENETTEST not in nresult
2089
    _ErrorIf(test, constants.CV_ENODENET, node,
2090
             "node hasn't returned node tcp connectivity data")
2091
    if not test:
2092
      if nresult[constants.NV_NODENETTEST]:
2093
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2094
        for anode in nlist:
2095
          _ErrorIf(True, constants.CV_ENODENET, node,
2096
                   "tcp communication with node '%s': %s",
2097
                   anode, nresult[constants.NV_NODENETTEST][anode])
2098

    
2099
    test = constants.NV_MASTERIP not in nresult
2100
    _ErrorIf(test, constants.CV_ENODENET, node,
2101
             "node hasn't returned node master IP reachability data")
2102
    if not test:
2103
      if not nresult[constants.NV_MASTERIP]:
2104
        if node == self.master_node:
2105
          msg = "the master node cannot reach the master IP (not configured?)"
2106
        else:
2107
          msg = "cannot reach the master IP"
2108
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
2109

    
2110
  def _VerifyInstance(self, instance, instanceconfig, node_image,
2111
                      diskstatus):
2112
    """Verify an instance.
2113

2114
    This function checks to see if the required block devices are
2115
    available on the instance's node.
2116

2117
    """
2118
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2119
    node_current = instanceconfig.primary_node
2120

    
2121
    node_vol_should = {}
2122
    instanceconfig.MapLVsByNode(node_vol_should)
2123

    
2124
    for node in node_vol_should:
2125
      n_img = node_image[node]
2126
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2127
        # ignore missing volumes on offline or broken nodes
2128
        continue
2129
      for volume in node_vol_should[node]:
2130
        test = volume not in n_img.volumes
2131
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2132
                 "volume %s missing on node %s", volume, node)
2133

    
2134
    if instanceconfig.admin_state == constants.ADMINST_UP:
2135
      pri_img = node_image[node_current]
2136
      test = instance not in pri_img.instances and not pri_img.offline
2137
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2138
               "instance not running on its primary node %s",
2139
               node_current)
2140

    
2141
    diskdata = [(nname, success, status, idx)
2142
                for (nname, disks) in diskstatus.items()
2143
                for idx, (success, status) in enumerate(disks)]
2144

    
2145
    for nname, success, bdev_status, idx in diskdata:
2146
      # the 'ghost node' construction in Exec() ensures that we have a
2147
      # node here
2148
      snode = node_image[nname]
2149
      bad_snode = snode.ghost or snode.offline
2150
      _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2151
               not success and not bad_snode,
2152
               constants.CV_EINSTANCEFAULTYDISK, instance,
2153
               "couldn't retrieve status for disk/%s on %s: %s",
2154
               idx, nname, bdev_status)
2155
      _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2156
                success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2157
               constants.CV_EINSTANCEFAULTYDISK, instance,
2158
               "disk/%s on %s is faulty", idx, nname)
2159

    
2160
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2161
    """Verify if there are any unknown volumes in the cluster.
2162

2163
    The .os, .swap and backup volumes are ignored. All other volumes are
2164
    reported as unknown.
2165

2166
    @type reserved: L{ganeti.utils.FieldSet}
2167
    @param reserved: a FieldSet of reserved volume names
2168

2169
    """
2170
    for node, n_img in node_image.items():
2171
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2172
        # skip non-healthy nodes
2173
        continue
2174
      for volume in n_img.volumes:
2175
        test = ((node not in node_vol_should or
2176
                volume not in node_vol_should[node]) and
2177
                not reserved.Matches(volume))
2178
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2179
                      "volume %s is unknown", volume)
2180

    
2181
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2182
    """Verify N+1 Memory Resilience.
2183

2184
    Check that if one single node dies we can still start all the
2185
    instances it was primary for.
2186

2187
    """
2188
    cluster_info = self.cfg.GetClusterInfo()
2189
    for node, n_img in node_image.items():
2190
      # This code checks that every node which is now listed as
2191
      # secondary has enough memory to host all instances it is
2192
      # supposed to should a single other node in the cluster fail.
2193
      # FIXME: not ready for failover to an arbitrary node
2194
      # FIXME: does not support file-backed instances
2195
      # WARNING: we currently take into account down instances as well
2196
      # as up ones, considering that even if they're down someone
2197
      # might want to start them even in the event of a node failure.
2198
      if n_img.offline:
2199
        # we're skipping offline nodes from the N+1 warning, since
2200
        # most likely we don't have good memory infromation from them;
2201
        # we already list instances living on such nodes, and that's
2202
        # enough warning
2203
        continue
2204
      #TODO(dynmem): use MINMEM for checking
2205
      #TODO(dynmem): also consider ballooning out other instances
2206
      for prinode, instances in n_img.sbp.items():
2207
        needed_mem = 0
2208
        for instance in instances:
2209
          bep = cluster_info.FillBE(instance_cfg[instance])
2210
          if bep[constants.BE_AUTO_BALANCE]:
2211
            needed_mem += bep[constants.BE_MAXMEM]
2212
        test = n_img.mfree < needed_mem
2213
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2214
                      "not enough memory to accomodate instance failovers"
2215
                      " should node %s fail (%dMiB needed, %dMiB available)",
2216
                      prinode, needed_mem, n_img.mfree)
2217

    
2218
  @classmethod
2219
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2220
                   (files_all, files_opt, files_mc, files_vm)):
2221
    """Verifies file checksums collected from all nodes.
2222

2223
    @param errorif: Callback for reporting errors
2224
    @param nodeinfo: List of L{objects.Node} objects
2225
    @param master_node: Name of master node
2226
    @param all_nvinfo: RPC results
2227

2228
    """
2229
    # Define functions determining which nodes to consider for a file
2230
    files2nodefn = [
2231
      (files_all, None),
2232
      (files_mc, lambda node: (node.master_candidate or
2233
                               node.name == master_node)),
2234
      (files_vm, lambda node: node.vm_capable),
2235
      ]
2236

    
2237
    # Build mapping from filename to list of nodes which should have the file
2238
    nodefiles = {}
2239
    for (files, fn) in files2nodefn:
2240
      if fn is None:
2241
        filenodes = nodeinfo
2242
      else:
2243
        filenodes = filter(fn, nodeinfo)
2244
      nodefiles.update((filename,
2245
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2246
                       for filename in files)
2247

    
2248
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2249

    
2250
    fileinfo = dict((filename, {}) for filename in nodefiles)
2251
    ignore_nodes = set()
2252

    
2253
    for node in nodeinfo:
2254
      if node.offline:
2255
        ignore_nodes.add(node.name)
2256
        continue
2257

    
2258
      nresult = all_nvinfo[node.name]
2259

    
2260
      if nresult.fail_msg or not nresult.payload:
2261
        node_files = None
2262
      else:
2263
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2264

    
2265
      test = not (node_files and isinstance(node_files, dict))
2266
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2267
              "Node did not return file checksum data")
2268
      if test:
2269
        ignore_nodes.add(node.name)
2270
        continue
2271

    
2272
      # Build per-checksum mapping from filename to nodes having it
2273
      for (filename, checksum) in node_files.items():
2274
        assert filename in nodefiles
2275
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2276

    
2277
    for (filename, checksums) in fileinfo.items():
2278
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2279

    
2280
      # Nodes having the file
2281
      with_file = frozenset(node_name
2282
                            for nodes in fileinfo[filename].values()
2283
                            for node_name in nodes) - ignore_nodes
2284

    
2285
      expected_nodes = nodefiles[filename] - ignore_nodes
2286

    
2287
      # Nodes missing file
2288
      missing_file = expected_nodes - with_file
2289

    
2290
      if filename in files_opt:
2291
        # All or no nodes
2292
        errorif(missing_file and missing_file != expected_nodes,
2293
                constants.CV_ECLUSTERFILECHECK, None,
2294
                "File %s is optional, but it must exist on all or no"
2295
                " nodes (not found on %s)",
2296
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2297
      else:
2298
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2299
                "File %s is missing from node(s) %s", filename,
2300
                utils.CommaJoin(utils.NiceSort(missing_file)))
2301

    
2302
        # Warn if a node has a file it shouldn't
2303
        unexpected = with_file - expected_nodes
2304
        errorif(unexpected,
2305
                constants.CV_ECLUSTERFILECHECK, None,
2306
                "File %s should not exist on node(s) %s",
2307
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2308

    
2309
      # See if there are multiple versions of the file
2310
      test = len(checksums) > 1
2311
      if test:
2312
        variants = ["variant %s on %s" %
2313
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2314
                    for (idx, (checksum, nodes)) in
2315
                      enumerate(sorted(checksums.items()))]
2316
      else:
2317
        variants = []
2318

    
2319
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2320
              "File %s found with %s different checksums (%s)",
2321
              filename, len(checksums), "; ".join(variants))
2322

    
2323
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2324
                      drbd_map):
2325
    """Verifies and the node DRBD status.
2326

2327
    @type ninfo: L{objects.Node}
2328
    @param ninfo: the node to check
2329
    @param nresult: the remote results for the node
2330
    @param instanceinfo: the dict of instances
2331
    @param drbd_helper: the configured DRBD usermode helper
2332
    @param drbd_map: the DRBD map as returned by
2333
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2334

2335
    """
2336
    node = ninfo.name
2337
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2338

    
2339
    if drbd_helper:
2340
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2341
      test = (helper_result == None)
2342
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2343
               "no drbd usermode helper returned")
2344
      if helper_result:
2345
        status, payload = helper_result
2346
        test = not status
2347
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2348
                 "drbd usermode helper check unsuccessful: %s", payload)
2349
        test = status and (payload != drbd_helper)
2350
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2351
                 "wrong drbd usermode helper: %s", payload)
2352

    
2353
    # compute the DRBD minors
2354
    node_drbd = {}
2355
    for minor, instance in drbd_map[node].items():
2356
      test = instance not in instanceinfo
2357
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2358
               "ghost instance '%s' in temporary DRBD map", instance)
2359
        # ghost instance should not be running, but otherwise we
2360
        # don't give double warnings (both ghost instance and
2361
        # unallocated minor in use)
2362
      if test:
2363
        node_drbd[minor] = (instance, False)
2364
      else:
2365
        instance = instanceinfo[instance]
2366
        node_drbd[minor] = (instance.name,
2367
                            instance.admin_state == constants.ADMINST_UP)
2368

    
2369
    # and now check them
2370
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2371
    test = not isinstance(used_minors, (tuple, list))
2372
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2373
             "cannot parse drbd status file: %s", str(used_minors))
2374
    if test:
2375
      # we cannot check drbd status
2376
      return
2377

    
2378
    for minor, (iname, must_exist) in node_drbd.items():
2379
      test = minor not in used_minors and must_exist
2380
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2381
               "drbd minor %d of instance %s is not active", minor, iname)
2382
    for minor in used_minors:
2383
      test = minor not in node_drbd
2384
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2385
               "unallocated drbd minor %d is in use", minor)
2386

    
2387
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2388
    """Builds the node OS structures.
2389

2390
    @type ninfo: L{objects.Node}
2391
    @param ninfo: the node to check
2392
    @param nresult: the remote results for the node
2393
    @param nimg: the node image object
2394

2395
    """
2396
    node = ninfo.name
2397
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2398

    
2399
    remote_os = nresult.get(constants.NV_OSLIST, None)
2400
    test = (not isinstance(remote_os, list) or
2401
            not compat.all(isinstance(v, list) and len(v) == 7
2402
                           for v in remote_os))
2403

    
2404
    _ErrorIf(test, constants.CV_ENODEOS, node,
2405
             "node hasn't returned valid OS data")
2406

    
2407
    nimg.os_fail = test
2408

    
2409
    if test:
2410
      return
2411

    
2412
    os_dict = {}
2413

    
2414
    for (name, os_path, status, diagnose,
2415
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2416

    
2417
      if name not in os_dict:
2418
        os_dict[name] = []
2419

    
2420
      # parameters is a list of lists instead of list of tuples due to
2421
      # JSON lacking a real tuple type, fix it:
2422
      parameters = [tuple(v) for v in parameters]
2423
      os_dict[name].append((os_path, status, diagnose,
2424
                            set(variants), set(parameters), set(api_ver)))
2425

    
2426
    nimg.oslist = os_dict
2427

    
2428
  def _VerifyNodeOS(self, ninfo, nimg, base):
2429
    """Verifies the node OS list.
2430

2431
    @type ninfo: L{objects.Node}
2432
    @param ninfo: the node to check
2433
    @param nimg: the node image object
2434
    @param base: the 'template' node we match against (e.g. from the master)
2435

2436
    """
2437
    node = ninfo.name
2438
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2439

    
2440
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2441

    
2442
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2443
    for os_name, os_data in nimg.oslist.items():
2444
      assert os_data, "Empty OS status for OS %s?!" % os_name
2445
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2446
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2447
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2448
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2449
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2450
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2451
      # comparisons with the 'base' image
2452
      test = os_name not in base.oslist
2453
      _ErrorIf(test, constants.CV_ENODEOS, node,
2454
               "Extra OS %s not present on reference node (%s)",
2455
               os_name, base.name)
2456
      if test:
2457
        continue
2458
      assert base.oslist[os_name], "Base node has empty OS status?"
2459
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2460
      if not b_status:
2461
        # base OS is invalid, skipping
2462
        continue
2463
      for kind, a, b in [("API version", f_api, b_api),
2464
                         ("variants list", f_var, b_var),
2465
                         ("parameters", beautify_params(f_param),
2466
                          beautify_params(b_param))]:
2467
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2468
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2469
                 kind, os_name, base.name,
2470
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2471

    
2472
    # check any missing OSes
2473
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2474
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2475
             "OSes present on reference node %s but missing on this node: %s",
2476
             base.name, utils.CommaJoin(missing))
2477

    
2478
  def _VerifyOob(self, ninfo, nresult):
2479
    """Verifies out of band functionality of a node.
2480

2481
    @type ninfo: L{objects.Node}
2482
    @param ninfo: the node to check
2483
    @param nresult: the remote results for the node
2484

2485
    """
2486
    node = ninfo.name
2487
    # We just have to verify the paths on master and/or master candidates
2488
    # as the oob helper is invoked on the master
2489
    if ((ninfo.master_candidate or ninfo.master_capable) and
2490
        constants.NV_OOB_PATHS in nresult):
2491
      for path_result in nresult[constants.NV_OOB_PATHS]:
2492
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2493

    
2494
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2495
    """Verifies and updates the node volume data.
2496

2497
    This function will update a L{NodeImage}'s internal structures
2498
    with data from the remote call.
2499

2500
    @type ninfo: L{objects.Node}
2501
    @param ninfo: the node to check
2502
    @param nresult: the remote results for the node
2503
    @param nimg: the node image object
2504
    @param vg_name: the configured VG name
2505

2506
    """
2507
    node = ninfo.name
2508
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2509

    
2510
    nimg.lvm_fail = True
2511
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2512
    if vg_name is None:
2513
      pass
2514
    elif isinstance(lvdata, basestring):
2515
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2516
               utils.SafeEncode(lvdata))
2517
    elif not isinstance(lvdata, dict):
2518
      _ErrorIf(True, constants.CV_ENODELVM, node,
2519
               "rpc call to node failed (lvlist)")
2520
    else:
2521
      nimg.volumes = lvdata
2522
      nimg.lvm_fail = False
2523

    
2524
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2525
    """Verifies and updates the node instance list.
2526

2527
    If the listing was successful, then updates this node's instance
2528
    list. Otherwise, it marks the RPC call as failed for the instance
2529
    list key.
2530

2531
    @type ninfo: L{objects.Node}
2532
    @param ninfo: the node to check
2533
    @param nresult: the remote results for the node
2534
    @param nimg: the node image object
2535

2536
    """
2537
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2538
    test = not isinstance(idata, list)
2539
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2540
                  "rpc call to node failed (instancelist): %s",
2541
                  utils.SafeEncode(str(idata)))
2542
    if test:
2543
      nimg.hyp_fail = True
2544
    else:
2545
      nimg.instances = idata
2546

    
2547
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2548
    """Verifies and computes a node information map
2549

2550
    @type ninfo: L{objects.Node}
2551
    @param ninfo: the node to check
2552
    @param nresult: the remote results for the node
2553
    @param nimg: the node image object
2554
    @param vg_name: the configured VG name
2555

2556
    """
2557
    node = ninfo.name
2558
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2559

    
2560
    # try to read free memory (from the hypervisor)
2561
    hv_info = nresult.get(constants.NV_HVINFO, None)
2562
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2563
    _ErrorIf(test, constants.CV_ENODEHV, node,
2564
             "rpc call to node failed (hvinfo)")
2565
    if not test:
2566
      try:
2567
        nimg.mfree = int(hv_info["memory_free"])
2568
      except (ValueError, TypeError):
2569
        _ErrorIf(True, constants.CV_ENODERPC, node,
2570
                 "node returned invalid nodeinfo, check hypervisor")
2571

    
2572
    # FIXME: devise a free space model for file based instances as well
2573
    if vg_name is not None:
2574
      test = (constants.NV_VGLIST not in nresult or
2575
              vg_name not in nresult[constants.NV_VGLIST])
2576
      _ErrorIf(test, constants.CV_ENODELVM, node,
2577
               "node didn't return data for the volume group '%s'"
2578
               " - it is either missing or broken", vg_name)
2579
      if not test:
2580
        try:
2581
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2582
        except (ValueError, TypeError):
2583
          _ErrorIf(True, constants.CV_ENODERPC, node,
2584
                   "node returned invalid LVM info, check LVM status")
2585

    
2586
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2587
    """Gets per-disk status information for all instances.
2588

2589
    @type nodelist: list of strings
2590
    @param nodelist: Node names
2591
    @type node_image: dict of (name, L{objects.Node})
2592
    @param node_image: Node objects
2593
    @type instanceinfo: dict of (name, L{objects.Instance})
2594
    @param instanceinfo: Instance objects
2595
    @rtype: {instance: {node: [(succes, payload)]}}
2596
    @return: a dictionary of per-instance dictionaries with nodes as
2597
        keys and disk information as values; the disk information is a
2598
        list of tuples (success, payload)
2599

2600
    """
2601
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2602

    
2603
    node_disks = {}
2604
    node_disks_devonly = {}
2605
    diskless_instances = set()
2606
    diskless = constants.DT_DISKLESS
2607

    
2608
    for nname in nodelist:
2609
      node_instances = list(itertools.chain(node_image[nname].pinst,
2610
                                            node_image[nname].sinst))
2611
      diskless_instances.update(inst for inst in node_instances
2612
                                if instanceinfo[inst].disk_template == diskless)
2613
      disks = [(inst, disk)
2614
               for inst in node_instances
2615
               for disk in instanceinfo[inst].disks]
2616

    
2617
      if not disks:
2618
        # No need to collect data
2619
        continue
2620

    
2621
      node_disks[nname] = disks
2622

    
2623
      # Creating copies as SetDiskID below will modify the objects and that can
2624
      # lead to incorrect data returned from nodes
2625
      devonly = [dev.Copy() for (_, dev) in disks]
2626

    
2627
      for dev in devonly:
2628
        self.cfg.SetDiskID(dev, nname)
2629

    
2630
      node_disks_devonly[nname] = devonly
2631

    
2632
    assert len(node_disks) == len(node_disks_devonly)
2633

    
2634
    # Collect data from all nodes with disks
2635
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2636
                                                          node_disks_devonly)
2637

    
2638
    assert len(result) == len(node_disks)
2639

    
2640
    instdisk = {}
2641

    
2642
    for (nname, nres) in result.items():
2643
      disks = node_disks[nname]
2644

    
2645
      if nres.offline:
2646
        # No data from this node
2647
        data = len(disks) * [(False, "node offline")]
2648
      else:
2649
        msg = nres.fail_msg
2650
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2651
                 "while getting disk information: %s", msg)
2652
        if msg:
2653
          # No data from this node
2654
          data = len(disks) * [(False, msg)]
2655
        else:
2656
          data = []
2657
          for idx, i in enumerate(nres.payload):
2658
            if isinstance(i, (tuple, list)) and len(i) == 2:
2659
              data.append(i)
2660
            else:
2661
              logging.warning("Invalid result from node %s, entry %d: %s",
2662
                              nname, idx, i)
2663
              data.append((False, "Invalid result from the remote node"))
2664

    
2665
      for ((inst, _), status) in zip(disks, data):
2666
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2667

    
2668
    # Add empty entries for diskless instances.
2669
    for inst in diskless_instances:
2670
      assert inst not in instdisk
2671
      instdisk[inst] = {}
2672

    
2673
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2674
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2675
                      compat.all(isinstance(s, (tuple, list)) and
2676
                                 len(s) == 2 for s in statuses)
2677
                      for inst, nnames in instdisk.items()
2678
                      for nname, statuses in nnames.items())
2679
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2680

    
2681
    return instdisk
2682

    
2683
  @staticmethod
2684
  def _SshNodeSelector(group_uuid, all_nodes):
2685
    """Create endless iterators for all potential SSH check hosts.
2686

2687
    """
2688
    nodes = [node for node in all_nodes
2689
             if (node.group != group_uuid and
2690
                 not node.offline)]
2691
    keyfunc = operator.attrgetter("group")
2692

    
2693
    return map(itertools.cycle,
2694
               [sorted(map(operator.attrgetter("name"), names))
2695
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2696
                                                  keyfunc)])
2697

    
2698
  @classmethod
2699
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2700
    """Choose which nodes should talk to which other nodes.
2701

2702
    We will make nodes contact all nodes in their group, and one node from
2703
    every other group.
2704

2705
    @warning: This algorithm has a known issue if one node group is much
2706
      smaller than others (e.g. just one node). In such a case all other
2707
      nodes will talk to the single node.
2708

2709
    """
2710
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2711
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2712

    
2713
    return (online_nodes,
2714
            dict((name, sorted([i.next() for i in sel]))
2715
                 for name in online_nodes))
2716

    
2717
  def BuildHooksEnv(self):
2718
    """Build hooks env.
2719

2720
    Cluster-Verify hooks just ran in the post phase and their failure makes
2721
    the output be logged in the verify output and the verification to fail.
2722

2723
    """
2724
    env = {
2725
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2726
      }
2727

    
2728
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2729
               for node in self.my_node_info.values())
2730

    
2731
    return env
2732

    
2733
  def BuildHooksNodes(self):
2734
    """Build hooks nodes.
2735

2736
    """
2737
    return ([], self.my_node_names)
2738

    
2739
  def Exec(self, feedback_fn):
2740
    """Verify integrity of the node group, performing various test on nodes.
2741

2742
    """
2743
    # This method has too many local variables. pylint: disable=R0914
2744
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2745

    
2746
    if not self.my_node_names:
2747
      # empty node group
2748
      feedback_fn("* Empty node group, skipping verification")
2749
      return True
2750

    
2751
    self.bad = False
2752
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2753
    verbose = self.op.verbose
2754
    self._feedback_fn = feedback_fn
2755

    
2756
    vg_name = self.cfg.GetVGName()
2757
    drbd_helper = self.cfg.GetDRBDHelper()
2758
    cluster = self.cfg.GetClusterInfo()
2759
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2760
    hypervisors = cluster.enabled_hypervisors
2761
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2762

    
2763
    i_non_redundant = [] # Non redundant instances
2764
    i_non_a_balanced = [] # Non auto-balanced instances
2765
    i_offline = 0 # Count of offline instances
2766
    n_offline = 0 # Count of offline nodes
2767
    n_drained = 0 # Count of nodes being drained
2768
    node_vol_should = {}
2769

    
2770
    # FIXME: verify OS list
2771

    
2772
    # File verification
2773
    filemap = _ComputeAncillaryFiles(cluster, False)
2774

    
2775
    # do local checksums
2776
    master_node = self.master_node = self.cfg.GetMasterNode()
2777
    master_ip = self.cfg.GetMasterIP()
2778

    
2779
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2780

    
2781
    user_scripts = []
2782
    if self.cfg.GetUseExternalMipScript():
2783
      user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2784

    
2785
    node_verify_param = {
2786
      constants.NV_FILELIST:
2787
        utils.UniqueSequence(filename
2788
                             for files in filemap
2789
                             for filename in files),
2790
      constants.NV_NODELIST:
2791
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2792
                                  self.all_node_info.values()),
2793
      constants.NV_HYPERVISOR: hypervisors,
2794
      constants.NV_HVPARAMS:
2795
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2796
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2797
                                 for node in node_data_list
2798
                                 if not node.offline],
2799
      constants.NV_INSTANCELIST: hypervisors,
2800
      constants.NV_VERSION: None,
2801
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2802
      constants.NV_NODESETUP: None,
2803
      constants.NV_TIME: None,
2804
      constants.NV_MASTERIP: (master_node, master_ip),
2805
      constants.NV_OSLIST: None,
2806
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2807
      constants.NV_USERSCRIPTS: user_scripts,
2808
      }
2809

    
2810
    if vg_name is not None:
2811
      node_verify_param[constants.NV_VGLIST] = None
2812
      node_verify_param[constants.NV_LVLIST] = vg_name
2813
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2814
      node_verify_param[constants.NV_DRBDLIST] = None
2815

    
2816
    if drbd_helper:
2817
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2818

    
2819
    # bridge checks
2820
    # FIXME: this needs to be changed per node-group, not cluster-wide
2821
    bridges = set()
2822
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2823
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2824
      bridges.add(default_nicpp[constants.NIC_LINK])
2825
    for instance in self.my_inst_info.values():
2826
      for nic in instance.nics:
2827
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2828
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2829
          bridges.add(full_nic[constants.NIC_LINK])
2830

    
2831
    if bridges:
2832
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2833

    
2834
    # Build our expected cluster state
2835
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2836
                                                 name=node.name,
2837
                                                 vm_capable=node.vm_capable))
2838
                      for node in node_data_list)
2839

    
2840
    # Gather OOB paths
2841
    oob_paths = []
2842
    for node in self.all_node_info.values():
2843
      path = _SupportsOob(self.cfg, node)
2844
      if path and path not in oob_paths:
2845
        oob_paths.append(path)
2846

    
2847
    if oob_paths:
2848
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2849

    
2850
    for instance in self.my_inst_names:
2851
      inst_config = self.my_inst_info[instance]
2852

    
2853
      for nname in inst_config.all_nodes:
2854
        if nname not in node_image:
2855
          gnode = self.NodeImage(name=nname)
2856
          gnode.ghost = (nname not in self.all_node_info)
2857
          node_image[nname] = gnode
2858

    
2859
      inst_config.MapLVsByNode(node_vol_should)
2860

    
2861
      pnode = inst_config.primary_node
2862
      node_image[pnode].pinst.append(instance)
2863

    
2864
      for snode in inst_config.secondary_nodes:
2865
        nimg = node_image[snode]
2866
        nimg.sinst.append(instance)
2867
        if pnode not in nimg.sbp:
2868
          nimg.sbp[pnode] = []
2869
        nimg.sbp[pnode].append(instance)
2870

    
2871
    # At this point, we have the in-memory data structures complete,
2872
    # except for the runtime information, which we'll gather next
2873

    
2874
    # Due to the way our RPC system works, exact response times cannot be
2875
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2876
    # time before and after executing the request, we can at least have a time
2877
    # window.
2878
    nvinfo_starttime = time.time()
2879
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2880
                                           node_verify_param,
2881
                                           self.cfg.GetClusterName())
2882
    nvinfo_endtime = time.time()
2883

    
2884
    if self.extra_lv_nodes and vg_name is not None:
2885
      extra_lv_nvinfo = \
2886
          self.rpc.call_node_verify(self.extra_lv_nodes,
2887
                                    {constants.NV_LVLIST: vg_name},
2888
                                    self.cfg.GetClusterName())
2889
    else:
2890
      extra_lv_nvinfo = {}
2891

    
2892
    all_drbd_map = self.cfg.ComputeDRBDMap()
2893

    
2894
    feedback_fn("* Gathering disk information (%s nodes)" %
2895
                len(self.my_node_names))
2896
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2897
                                     self.my_inst_info)
2898

    
2899
    feedback_fn("* Verifying configuration file consistency")
2900

    
2901
    # If not all nodes are being checked, we need to make sure the master node
2902
    # and a non-checked vm_capable node are in the list.
2903
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2904
    if absent_nodes:
2905
      vf_nvinfo = all_nvinfo.copy()
2906
      vf_node_info = list(self.my_node_info.values())
2907
      additional_nodes = []
2908
      if master_node not in self.my_node_info:
2909
        additional_nodes.append(master_node)
2910
        vf_node_info.append(self.all_node_info[master_node])
2911
      # Add the first vm_capable node we find which is not included
2912
      for node in absent_nodes:
2913
        nodeinfo = self.all_node_info[node]
2914
        if nodeinfo.vm_capable and not nodeinfo.offline:
2915
          additional_nodes.append(node)
2916
          vf_node_info.append(self.all_node_info[node])
2917
          break
2918
      key = constants.NV_FILELIST
2919
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2920
                                                 {key: node_verify_param[key]},
2921
                                                 self.cfg.GetClusterName()))
2922
    else:
2923
      vf_nvinfo = all_nvinfo
2924
      vf_node_info = self.my_node_info.values()
2925

    
2926
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2927

    
2928
    feedback_fn("* Verifying node status")
2929

    
2930
    refos_img = None
2931

    
2932
    for node_i in node_data_list:
2933
      node = node_i.name
2934
      nimg = node_image[node]
2935

    
2936
      if node_i.offline:
2937
        if verbose:
2938
          feedback_fn("* Skipping offline node %s" % (node,))
2939
        n_offline += 1
2940
        continue
2941

    
2942
      if node == master_node:
2943
        ntype = "master"
2944
      elif node_i.master_candidate:
2945
        ntype = "master candidate"
2946
      elif node_i.drained:
2947
        ntype = "drained"
2948
        n_drained += 1
2949
      else:
2950
        ntype = "regular"
2951
      if verbose:
2952
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2953

    
2954
      msg = all_nvinfo[node].fail_msg
2955
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2956
               msg)
2957
      if msg:
2958
        nimg.rpc_fail = True
2959
        continue
2960

    
2961
      nresult = all_nvinfo[node].payload
2962

    
2963
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2964
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2965
      self._VerifyNodeNetwork(node_i, nresult)
2966
      self._VerifyNodeUserScripts(node_i, nresult)
2967
      self._VerifyOob(node_i, nresult)
2968

    
2969
      if nimg.vm_capable:
2970
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2971
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2972
                             all_drbd_map)
2973

    
2974
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2975
        self._UpdateNodeInstances(node_i, nresult, nimg)
2976
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2977
        self._UpdateNodeOS(node_i, nresult, nimg)
2978

    
2979
        if not nimg.os_fail:
2980
          if refos_img is None:
2981
            refos_img = nimg
2982
          self._VerifyNodeOS(node_i, nimg, refos_img)
2983
        self._VerifyNodeBridges(node_i, nresult, bridges)
2984

    
2985
        # Check whether all running instancies are primary for the node. (This
2986
        # can no longer be done from _VerifyInstance below, since some of the
2987
        # wrong instances could be from other node groups.)
2988
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2989

    
2990
        for inst in non_primary_inst:
2991
          # FIXME: investigate best way to handle offline insts
2992
          if inst.admin_state == constants.ADMINST_OFFLINE:
2993
            if verbose:
2994
              feedback_fn("* Skipping offline instance %s" % inst.name)
2995
            i_offline += 1
2996
            continue
2997
          test = inst in self.all_inst_info
2998
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2999
                   "instance should not run on node %s", node_i.name)
3000
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3001
                   "node is running unknown instance %s", inst)
3002

    
3003
    for node, result in extra_lv_nvinfo.items():
3004
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3005
                              node_image[node], vg_name)
3006

    
3007
    feedback_fn("* Verifying instance status")
3008
    for instance in self.my_inst_names:
3009
      if verbose:
3010
        feedback_fn("* Verifying instance %s" % instance)
3011
      inst_config = self.my_inst_info[instance]
3012
      self._VerifyInstance(instance, inst_config, node_image,
3013
                           instdisk[instance])
3014
      inst_nodes_offline = []
3015

    
3016
      pnode = inst_config.primary_node
3017
      pnode_img = node_image[pnode]
3018
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3019
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
3020
               " primary node failed", instance)
3021

    
3022
      _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3023
               pnode_img.offline,
3024
               constants.CV_EINSTANCEBADNODE, instance,
3025
               "instance is marked as running and lives on offline node %s",
3026
               inst_config.primary_node)
3027

    
3028
      # If the instance is non-redundant we cannot survive losing its primary
3029
      # node, so we are not N+1 compliant. On the other hand we have no disk
3030
      # templates with more than one secondary so that situation is not well
3031
      # supported either.
3032
      # FIXME: does not support file-backed instances
3033
      if not inst_config.secondary_nodes:
3034
        i_non_redundant.append(instance)
3035

    
3036
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
3037
               constants.CV_EINSTANCELAYOUT,
3038
               instance, "instance has multiple secondary nodes: %s",
3039
               utils.CommaJoin(inst_config.secondary_nodes),
3040
               code=self.ETYPE_WARNING)
3041

    
3042
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
3043
        pnode = inst_config.primary_node
3044
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
3045
        instance_groups = {}
3046

    
3047
        for node in instance_nodes:
3048
          instance_groups.setdefault(self.all_node_info[node].group,
3049
                                     []).append(node)
3050

    
3051
        pretty_list = [
3052
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3053
          # Sort so that we always list the primary node first.
3054
          for group, nodes in sorted(instance_groups.items(),
3055
                                     key=lambda (_, nodes): pnode in nodes,
3056
                                     reverse=True)]
3057

    
3058
        self._ErrorIf(len(instance_groups) > 1,
3059
                      constants.CV_EINSTANCESPLITGROUPS,
3060
                      instance, "instance has primary and secondary nodes in"
3061
                      " different groups: %s", utils.CommaJoin(pretty_list),
3062
                      code=self.ETYPE_WARNING)
3063

    
3064
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3065
        i_non_a_balanced.append(instance)
3066

    
3067
      for snode in inst_config.secondary_nodes:
3068
        s_img = node_image[snode]
3069
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3070
                 snode, "instance %s, connection to secondary node failed",
3071
                 instance)
3072

    
3073
        if s_img.offline:
3074
          inst_nodes_offline.append(snode)
3075

    
3076
      # warn that the instance lives on offline nodes
3077
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3078
               "instance has offline secondary node(s) %s",
3079
               utils.CommaJoin(inst_nodes_offline))
3080
      # ... or ghost/non-vm_capable nodes
3081
      for node in inst_config.all_nodes:
3082
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3083
                 instance, "instance lives on ghost node %s", node)
3084
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3085
                 instance, "instance lives on non-vm_capable node %s", node)
3086

    
3087
    feedback_fn("* Verifying orphan volumes")
3088
    reserved = utils.FieldSet(*cluster.reserved_lvs)
3089

    
3090
    # We will get spurious "unknown volume" warnings if any node of this group
3091
    # is secondary for an instance whose primary is in another group. To avoid
3092
    # them, we find these instances and add their volumes to node_vol_should.
3093
    for inst in self.all_inst_info.values():
3094
      for secondary in inst.secondary_nodes:
3095
        if (secondary in self.my_node_info
3096
            and inst.name not in self.my_inst_info):
3097
          inst.MapLVsByNode(node_vol_should)
3098
          break
3099

    
3100
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3101

    
3102
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3103
      feedback_fn("* Verifying N+1 Memory redundancy")
3104
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3105

    
3106
    feedback_fn("* Other Notes")
3107
    if i_non_redundant:
3108
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3109
                  % len(i_non_redundant))
3110

    
3111
    if i_non_a_balanced:
3112
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3113
                  % len(i_non_a_balanced))
3114

    
3115
    if i_offline:
3116
      feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3117

    
3118
    if n_offline:
3119
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3120

    
3121
    if n_drained:
3122
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3123

    
3124
    return not self.bad
3125

    
3126
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3127
    """Analyze the post-hooks' result
3128

3129
    This method analyses the hook result, handles it, and sends some
3130
    nicely-formatted feedback back to the user.
3131

3132
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
3133
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3134
    @param hooks_results: the results of the multi-node hooks rpc call
3135
    @param feedback_fn: function used send feedback back to the caller
3136
    @param lu_result: previous Exec result
3137
    @return: the new Exec result, based on the previous result
3138
        and hook results
3139

3140
    """
3141
    # We only really run POST phase hooks, only for non-empty groups,
3142
    # and are only interested in their results
3143
    if not self.my_node_names:
3144
      # empty node group
3145
      pass
3146
    elif phase == constants.HOOKS_PHASE_POST:
3147
      # Used to change hooks' output to proper indentation
3148
      feedback_fn("* Hooks Results")
3149
      assert hooks_results, "invalid result from hooks"
3150

    
3151
      for node_name in hooks_results:
3152
        res = hooks_results[node_name]
3153
        msg = res.fail_msg
3154
        test = msg and not res.offline
3155
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3156
                      "Communication failure in hooks execution: %s", msg)
3157
        if res.offline or msg:
3158
          # No need to investigate payload if node is offline or gave
3159
          # an error.
3160
          continue
3161
        for script, hkr, output in res.payload:
3162
          test = hkr == constants.HKR_FAIL
3163
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3164
                        "Script %s failed, output:", script)
3165
          if test:
3166
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3167
            feedback_fn("%s" % output)
3168
            lu_result = False
3169

    
3170
    return lu_result
3171

    
3172

    
3173
class LUClusterVerifyDisks(NoHooksLU):
3174
  """Verifies the cluster disks status.
3175

3176
  """
3177
  REQ_BGL = False
3178

    
3179
  def ExpandNames(self):
3180
    self.share_locks = _ShareAll()
3181
    self.needed_locks = {
3182
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3183
      }
3184

    
3185
  def Exec(self, feedback_fn):
3186
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3187

    
3188
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3189
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3190
                           for group in group_names])
3191

    
3192

    
3193
class LUGroupVerifyDisks(NoHooksLU):
3194
  """Verifies the status of all disks in a node group.
3195

3196
  """
3197
  REQ_BGL = False
3198

    
3199
  def ExpandNames(self):
3200
    # Raises errors.OpPrereqError on its own if group can't be found
3201
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3202

    
3203
    self.share_locks = _ShareAll()
3204
    self.needed_locks = {
3205
      locking.LEVEL_INSTANCE: [],
3206
      locking.LEVEL_NODEGROUP: [],
3207
      locking.LEVEL_NODE: [],
3208
      }
3209

    
3210
  def DeclareLocks(self, level):
3211
    if level == locking.LEVEL_INSTANCE:
3212
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3213

    
3214
      # Lock instances optimistically, needs verification once node and group
3215
      # locks have been acquired
3216
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3217
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3218

    
3219
    elif level == locking.LEVEL_NODEGROUP:
3220
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3221

    
3222
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3223
        set([self.group_uuid] +
3224
            # Lock all groups used by instances optimistically; this requires
3225
            # going via the node before it's locked, requiring verification
3226
            # later on
3227
            [group_uuid
3228
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3229
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3230

    
3231
    elif level == locking.LEVEL_NODE:
3232
      # This will only lock the nodes in the group to be verified which contain
3233
      # actual instances
3234
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3235
      self._LockInstancesNodes()
3236

    
3237
      # Lock all nodes in group to be verified
3238
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3239
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3240
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3241

    
3242
  def CheckPrereq(self):
3243
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3244
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3245
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3246

    
3247
    assert self.group_uuid in owned_groups
3248

    
3249
    # Check if locked instances are still correct
3250
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3251

    
3252
    # Get instance information
3253
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3254

    
3255
    # Check if node groups for locked instances are still correct
3256
    for (instance_name, inst) in self.instances.items():
3257
      assert owned_nodes.issuperset(inst.all_nodes), \
3258
        "Instance %s's nodes changed while we kept the lock" % instance_name
3259

    
3260
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3261
                                             owned_groups)
3262

    
3263
      assert self.group_uuid in inst_groups, \
3264
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3265

    
3266
  def Exec(self, feedback_fn):
3267
    """Verify integrity of cluster disks.
3268

3269
    @rtype: tuple of three items
3270
    @return: a tuple of (dict of node-to-node_error, list of instances
3271
        which need activate-disks, dict of instance: (node, volume) for
3272
        missing volumes
3273

3274
    """
3275
    res_nodes = {}
3276
    res_instances = set()
3277
    res_missing = {}
3278

    
3279
    nv_dict = _MapInstanceDisksToNodes([inst
3280
            for inst in self.instances.values()
3281
            if inst.admin_state == constants.ADMINST_UP])
3282

    
3283
    if nv_dict:
3284
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3285
                             set(self.cfg.GetVmCapableNodeList()))
3286

    
3287
      node_lvs = self.rpc.call_lv_list(nodes, [])
3288

    
3289
      for (node, node_res) in node_lvs.items():
3290
        if node_res.offline:
3291
          continue
3292

    
3293
        msg = node_res.fail_msg
3294
        if msg:
3295
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3296
          res_nodes[node] = msg
3297
          continue
3298

    
3299
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3300
          inst = nv_dict.pop((node, lv_name), None)
3301
          if not (lv_online or inst is None):
3302
            res_instances.add(inst)
3303

    
3304
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3305
      # better
3306
      for key, inst in nv_dict.iteritems():
3307
        res_missing.setdefault(inst, []).append(list(key))
3308

    
3309
    return (res_nodes, list(res_instances), res_missing)
3310

    
3311

    
3312
class LUClusterRepairDiskSizes(NoHooksLU):
3313
  """Verifies the cluster disks sizes.
3314

3315
  """
3316
  REQ_BGL = False
3317

    
3318
  def ExpandNames(self):
3319
    if self.op.instances:
3320
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3321
      self.needed_locks = {
3322
        locking.LEVEL_NODE_RES: [],
3323
        locking.LEVEL_INSTANCE: self.wanted_names,
3324
        }
3325
      self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3326
    else:
3327
      self.wanted_names = None
3328
      self.needed_locks = {
3329
        locking.LEVEL_NODE_RES: locking.ALL_SET,
3330
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3331
        }
3332
    self.share_locks = {
3333
      locking.LEVEL_NODE_RES: 1,
3334
      locking.LEVEL_INSTANCE: 0,
3335
      }
3336

    
3337
  def DeclareLocks(self, level):
3338
    if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3339
      self._LockInstancesNodes(primary_only=True, level=level)
3340

    
3341
  def CheckPrereq(self):
3342
    """Check prerequisites.
3343

3344
    This only checks the optional instance list against the existing names.
3345

3346
    """
3347
    if self.wanted_names is None:
3348
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3349

    
3350
    self.wanted_instances = \
3351
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3352

    
3353
  def _EnsureChildSizes(self, disk):
3354
    """Ensure children of the disk have the needed disk size.
3355

3356
    This is valid mainly for DRBD8 and fixes an issue where the
3357
    children have smaller disk size.
3358

3359
    @param disk: an L{ganeti.objects.Disk} object
3360

3361
    """
3362
    if disk.dev_type == constants.LD_DRBD8:
3363
      assert disk.children, "Empty children for DRBD8?"
3364
      fchild = disk.children[0]
3365
      mismatch = fchild.size < disk.size
3366
      if mismatch:
3367
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3368
                     fchild.size, disk.size)
3369
        fchild.size = disk.size
3370

    
3371
      # and we recurse on this child only, not on the metadev
3372
      return self._EnsureChildSizes(fchild) or mismatch
3373
    else:
3374
      return False
3375

    
3376
  def Exec(self, feedback_fn):
3377
    """Verify the size of cluster disks.
3378

3379
    """
3380
    # TODO: check child disks too
3381
    # TODO: check differences in size between primary/secondary nodes
3382
    per_node_disks = {}
3383
    for instance in self.wanted_instances:
3384
      pnode = instance.primary_node
3385
      if pnode not in per_node_disks:
3386
        per_node_disks[pnode] = []
3387
      for idx, disk in enumerate(instance.disks):
3388
        per_node_disks[pnode].append((instance, idx, disk))
3389

    
3390
    assert not (frozenset(per_node_disks.keys()) -
3391
                self.owned_locks(locking.LEVEL_NODE_RES)), \
3392
      "Not owning correct locks"
3393
    assert not self.owned_locks(locking.LEVEL_NODE)
3394

    
3395
    changed = []
3396
    for node, dskl in per_node_disks.items():
3397
      newl = [v[2].Copy() for v in dskl]
3398
      for dsk in newl:
3399
        self.cfg.SetDiskID(dsk, node)
3400
      result = self.rpc.call_blockdev_getsize(node, newl)
3401
      if result.fail_msg:
3402
        self.LogWarning("Failure in blockdev_getsize call to node"
3403
                        " %s, ignoring", node)
3404
        continue
3405
      if len(result.payload) != len(dskl):
3406
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3407
                        " result.payload=%s", node, len(dskl), result.payload)
3408
        self.LogWarning("Invalid result from node %s, ignoring node results",
3409
                        node)
3410
        continue
3411
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3412
        if size is None:
3413
          self.LogWarning("Disk %d of instance %s did not return size"
3414
                          " information, ignoring", idx, instance.name)
3415
          continue
3416
        if not isinstance(size, (int, long)):
3417
          self.LogWarning("Disk %d of instance %s did not return valid"
3418
                          " size information, ignoring", idx, instance.name)
3419
          continue
3420
        size = size >> 20
3421
        if size != disk.size:
3422
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3423
                       " correcting: recorded %d, actual %d", idx,
3424
                       instance.name, disk.size, size)
3425
          disk.size = size
3426
          self.cfg.Update(instance, feedback_fn)
3427
          changed.append((instance.name, idx, size))
3428
        if self._EnsureChildSizes(disk):
3429
          self.cfg.Update(instance, feedback_fn)
3430
          changed.append((instance.name, idx, disk.size))
3431
    return changed
3432

    
3433

    
3434
class LUClusterRename(LogicalUnit):
3435
  """Rename the cluster.
3436

3437
  """
3438
  HPATH = "cluster-rename"
3439
  HTYPE = constants.HTYPE_CLUSTER
3440

    
3441
  def BuildHooksEnv(self):
3442
    """Build hooks env.
3443

3444
    """
3445
    return {
3446
      "OP_TARGET": self.cfg.GetClusterName(),
3447
      "NEW_NAME": self.op.name,
3448
      }
3449

    
3450
  def BuildHooksNodes(self):
3451
    """Build hooks nodes.
3452

3453
    """
3454
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3455

    
3456
  def CheckPrereq(self):
3457
    """Verify that the passed name is a valid one.
3458

3459
    """
3460
    hostname = netutils.GetHostname(name=self.op.name,
3461
                                    family=self.cfg.GetPrimaryIPFamily())
3462

    
3463
    new_name = hostname.name
3464
    self.ip = new_ip = hostname.ip
3465
    old_name = self.cfg.GetClusterName()
3466
    old_ip = self.cfg.GetMasterIP()
3467
    if new_name == old_name and new_ip == old_ip:
3468
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3469
                                 " cluster has changed",
3470
                                 errors.ECODE_INVAL)
3471
    if new_ip != old_ip:
3472
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3473
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3474
                                   " reachable on the network" %
3475
                                   new_ip, errors.ECODE_NOTUNIQUE)
3476

    
3477
    self.op.name = new_name
3478

    
3479
  def Exec(self, feedback_fn):
3480
    """Rename the cluster.
3481

3482
    """
3483
    clustername = self.op.name
3484
    new_ip = self.ip
3485

    
3486
    # shutdown the master IP
3487
    master_params = self.cfg.GetMasterNetworkParameters()
3488
    ems = self.cfg.GetUseExternalMipScript()
3489
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3490
                                                     master_params, ems)
3491
    result.Raise("Could not disable the master role")
3492

    
3493
    try:
3494
      cluster = self.cfg.GetClusterInfo()
3495
      cluster.cluster_name = clustername
3496
      cluster.master_ip = new_ip
3497
      self.cfg.Update(cluster, feedback_fn)
3498

    
3499
      # update the known hosts file
3500
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3501
      node_list = self.cfg.GetOnlineNodeList()
3502
      try:
3503
        node_list.remove(master_params.name)
3504
      except ValueError:
3505
        pass
3506
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3507
    finally:
3508
      master_params.ip = new_ip
3509
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3510
                                                     master_params, ems)
3511
      msg = result.fail_msg
3512
      if msg:
3513
        self.LogWarning("Could not re-enable the master role on"
3514
                        " the master, please restart manually: %s", msg)
3515

    
3516
    return clustername
3517

    
3518

    
3519
def _ValidateNetmask(cfg, netmask):
3520
  """Checks if a netmask is valid.
3521

3522
  @type cfg: L{config.ConfigWriter}
3523
  @param cfg: The cluster configuration
3524
  @type netmask: int
3525
  @param netmask: the netmask to be verified
3526
  @raise errors.OpPrereqError: if the validation fails
3527

3528
  """
3529
  ip_family = cfg.GetPrimaryIPFamily()
3530
  try:
3531
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3532
  except errors.ProgrammerError:
3533
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3534
                               ip_family)
3535
  if not ipcls.ValidateNetmask(netmask):
3536
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3537
                                (netmask))
3538

    
3539

    
3540
class LUClusterSetParams(LogicalUnit):
3541
  """Change the parameters of the cluster.
3542

3543
  """
3544
  HPATH = "cluster-modify"
3545
  HTYPE = constants.HTYPE_CLUSTER
3546
  REQ_BGL = False
3547

    
3548
  def CheckArguments(self):
3549
    """Check parameters
3550

3551
    """
3552
    if self.op.uid_pool:
3553
      uidpool.CheckUidPool(self.op.uid_pool)
3554

    
3555
    if self.op.add_uids:
3556
      uidpool.CheckUidPool(self.op.add_uids)
3557

    
3558
    if self.op.remove_uids:
3559
      uidpool.CheckUidPool(self.op.remove_uids)
3560

    
3561
    if self.op.master_netmask is not None:
3562
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3563

    
3564
    if self.op.diskparams:
3565
      for dt_params in self.op.diskparams.values():
3566
        utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3567

    
3568
  def ExpandNames(self):
3569
    # FIXME: in the future maybe other cluster params won't require checking on
3570
    # all nodes to be modified.
3571
    self.needed_locks = {
3572
      locking.LEVEL_NODE: locking.ALL_SET,
3573
    }
3574
    self.share_locks[locking.LEVEL_NODE] = 1
3575

    
3576
  def BuildHooksEnv(self):
3577
    """Build hooks env.
3578

3579
    """
3580
    return {
3581
      "OP_TARGET": self.cfg.GetClusterName(),
3582
      "NEW_VG_NAME": self.op.vg_name,
3583
      }
3584

    
3585
  def BuildHooksNodes(self):
3586
    """Build hooks nodes.
3587

3588
    """
3589
    mn = self.cfg.GetMasterNode()
3590
    return ([mn], [mn])
3591

    
3592
  def CheckPrereq(self):
3593
    """Check prerequisites.
3594

3595
    This checks whether the given params don't conflict and
3596
    if the given volume group is valid.
3597

3598
    """
3599
    if self.op.vg_name is not None and not self.op.vg_name:
3600
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3601
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3602
                                   " instances exist", errors.ECODE_INVAL)
3603

    
3604
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3605
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3606
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3607
                                   " drbd-based instances exist",
3608
                                   errors.ECODE_INVAL)
3609

    
3610
    node_list = self.owned_locks(locking.LEVEL_NODE)
3611

    
3612
    # if vg_name not None, checks given volume group on all nodes
3613
    if self.op.vg_name:
3614
      vglist = self.rpc.call_vg_list(node_list)
3615
      for node in node_list:
3616
        msg = vglist[node].fail_msg
3617
        if msg:
3618
          # ignoring down node
3619
          self.LogWarning("Error while gathering data on node %s"
3620
                          " (ignoring node): %s", node, msg)
3621
          continue
3622
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3623
                                              self.op.vg_name,
3624
                                              constants.MIN_VG_SIZE)
3625
        if vgstatus:
3626
          raise errors.OpPrereqError("Error on node '%s': %s" %
3627
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3628

    
3629
    if self.op.drbd_helper:
3630
      # checks given drbd helper on all nodes
3631
      helpers = self.rpc.call_drbd_helper(node_list)
3632
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3633
        if ninfo.offline:
3634
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3635
          continue
3636
        msg = helpers[node].fail_msg
3637
        if msg:
3638
          raise errors.OpPrereqError("Error checking drbd helper on node"
3639
                                     " '%s': %s" % (node, msg),
3640
                                     errors.ECODE_ENVIRON)
3641
        node_helper = helpers[node].payload
3642
        if node_helper != self.op.drbd_helper:
3643
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3644
                                     (node, node_helper), errors.ECODE_ENVIRON)
3645

    
3646
    self.cluster = cluster = self.cfg.GetClusterInfo()
3647
    # validate params changes
3648
    if self.op.beparams:
3649
      objects.UpgradeBeParams(self.op.beparams)
3650
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3651
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3652

    
3653
    if self.op.ndparams:
3654
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3655
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3656

    
3657
      # TODO: we need a more general way to handle resetting
3658
      # cluster-level parameters to default values
3659
      if self.new_ndparams["oob_program"] == "":
3660
        self.new_ndparams["oob_program"] = \
3661
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3662

    
3663
    if self.op.nicparams:
3664
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3665
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3666
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3667
      nic_errors = []
3668

    
3669
      # check all instances for consistency
3670
      for instance in self.cfg.GetAllInstancesInfo().values():
3671
        for nic_idx, nic in enumerate(instance.nics):
3672
          params_copy = copy.deepcopy(nic.nicparams)
3673
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3674

    
3675
          # check parameter syntax
3676
          try:
3677
            objects.NIC.CheckParameterSyntax(params_filled)
3678
          except errors.ConfigurationError, err:
3679
            nic_errors.append("Instance %s, nic/%d: %s" %
3680
                              (instance.name, nic_idx, err))
3681

    
3682
          # if we're moving instances to routed, check that they have an ip
3683
          target_mode = params_filled[constants.NIC_MODE]
3684
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3685
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3686
                              " address" % (instance.name, nic_idx))
3687
      if nic_errors:
3688
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3689
                                   "\n".join(nic_errors))
3690

    
3691
    # hypervisor list/parameters
3692
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3693
    if self.op.hvparams:
3694
      for hv_name, hv_dict in self.op.hvparams.items():
3695
        if hv_name not in self.new_hvparams:
3696
          self.new_hvparams[hv_name] = hv_dict
3697
        else:
3698
          self.new_hvparams[hv_name].update(hv_dict)
3699

    
3700
    # disk template parameters
3701
    self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3702
    if self.op.diskparams:
3703
      for dt_name, dt_params in self.op.diskparams.items():
3704
        if dt_name not in self.op.diskparams:
3705
          self.new_diskparams[dt_name] = dt_params
3706
        else:
3707
          self.new_diskparams[dt_name].update(dt_params)
3708

    
3709
    # os hypervisor parameters
3710
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3711
    if self.op.os_hvp:
3712
      for os_name, hvs in self.op.os_hvp.items():
3713
        if os_name not in self.new_os_hvp:
3714
          self.new_os_hvp[os_name] = hvs
3715
        else:
3716
          for hv_name, hv_dict in hvs.items():
3717
            if hv_name not in self.new_os_hvp[os_name]:
3718
              self.new_os_hvp[os_name][hv_name] = hv_dict
3719
            else:
3720
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3721

    
3722
    # os parameters
3723
    self.new_osp = objects.FillDict(cluster.osparams, {})
3724
    if self.op.osparams:
3725
      for os_name, osp in self.op.osparams.items():
3726
        if os_name not in self.new_osp:
3727
          self.new_osp[os_name] = {}
3728

    
3729
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3730
                                                  use_none=True)
3731

    
3732
        if not self.new_osp[os_name]:
3733
          # we removed all parameters
3734
          del self.new_osp[os_name]
3735
        else:
3736
          # check the parameter validity (remote check)
3737
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3738
                         os_name, self.new_osp[os_name])
3739

    
3740
    # changes to the hypervisor list
3741
    if self.op.enabled_hypervisors is not None:
3742
      self.hv_list = self.op.enabled_hypervisors
3743
      for hv in self.hv_list:
3744
        # if the hypervisor doesn't already exist in the cluster
3745
        # hvparams, we initialize it to empty, and then (in both
3746
        # cases) we make sure to fill the defaults, as we might not
3747
        # have a complete defaults list if the hypervisor wasn't
3748
        # enabled before
3749
        if hv not in new_hvp:
3750
          new_hvp[hv] = {}
3751
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3752
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3753
    else:
3754
      self.hv_list = cluster.enabled_hypervisors
3755

    
3756
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3757
      # either the enabled list has changed, or the parameters have, validate
3758
      for hv_name, hv_params in self.new_hvparams.items():
3759
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3760
            (self.op.enabled_hypervisors and
3761
             hv_name in self.op.enabled_hypervisors)):
3762
          # either this is a new hypervisor, or its parameters have changed
3763
          hv_class = hypervisor.GetHypervisor(hv_name)
3764
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3765
          hv_class.CheckParameterSyntax(hv_params)
3766
          _CheckHVParams(self, node_list, hv_name, hv_params)
3767

    
3768
    if self.op.os_hvp:
3769
      # no need to check any newly-enabled hypervisors, since the
3770
      # defaults have already been checked in the above code-block
3771
      for os_name, os_hvp in self.new_os_hvp.items():
3772
        for hv_name, hv_params in os_hvp.items():
3773
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3774
          # we need to fill in the new os_hvp on top of the actual hv_p
3775
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3776
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3777
          hv_class = hypervisor.GetHypervisor(hv_name)
3778
          hv_class.CheckParameterSyntax(new_osp)
3779
          _CheckHVParams(self, node_list, hv_name, new_osp)
3780

    
3781
    if self.op.default_iallocator:
3782
      alloc_script = utils.FindFile(self.op.default_iallocator,
3783
                                    constants.IALLOCATOR_SEARCH_PATH,
3784
                                    os.path.isfile)
3785
      if alloc_script is None:
3786
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3787
                                   " specified" % self.op.default_iallocator,
3788
                                   errors.ECODE_INVAL)
3789

    
3790
  def Exec(self, feedback_fn):
3791
    """Change the parameters of the cluster.
3792

3793
    """
3794
    if self.op.vg_name is not None:
3795
      new_volume = self.op.vg_name
3796
      if not new_volume:
3797
        new_volume = None
3798
      if new_volume != self.cfg.GetVGName():
3799
        self.cfg.SetVGName(new_volume)
3800
      else:
3801
        feedback_fn("Cluster LVM configuration already in desired"
3802
                    " state, not changing")
3803
    if self.op.drbd_helper is not None:
3804
      new_helper = self.op.drbd_helper
3805
      if not new_helper:
3806
        new_helper = None
3807
      if new_helper != self.cfg.GetDRBDHelper():
3808
        self.cfg.SetDRBDHelper(new_helper)
3809
      else:
3810
        feedback_fn("Cluster DRBD helper already in desired state,"
3811
                    " not changing")
3812
    if self.op.hvparams:
3813
      self.cluster.hvparams = self.new_hvparams
3814
    if self.op.os_hvp:
3815
      self.cluster.os_hvp = self.new_os_hvp
3816
    if self.op.enabled_hypervisors is not None:
3817
      self.cluster.hvparams = self.new_hvparams
3818
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3819
    if self.op.beparams:
3820
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3821
    if self.op.nicparams:
3822
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3823
    if self.op.osparams:
3824
      self.cluster.osparams = self.new_osp
3825
    if self.op.ndparams:
3826
      self.cluster.ndparams = self.new_ndparams
3827
    if self.op.diskparams:
3828
      self.cluster.diskparams = self.new_diskparams
3829

    
3830
    if self.op.candidate_pool_size is not None:
3831
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3832
      # we need to update the pool size here, otherwise the save will fail
3833
      _AdjustCandidatePool(self, [])
3834

    
3835
    if self.op.maintain_node_health is not None:
3836
      if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3837
        feedback_fn("Note: CONFD was disabled at build time, node health"
3838
                    " maintenance is not useful (still enabling it)")
3839
      self.cluster.maintain_node_health = self.op.maintain_node_health
3840

    
3841
    if self.op.prealloc_wipe_disks is not None:
3842
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3843

    
3844
    if self.op.add_uids is not None:
3845
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3846

    
3847
    if self.op.remove_uids is not None:
3848
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3849

    
3850
    if self.op.uid_pool is not None:
3851
      self.cluster.uid_pool = self.op.uid_pool
3852

    
3853
    if self.op.default_iallocator is not None:
3854
      self.cluster.default_iallocator = self.op.default_iallocator
3855

    
3856
    if self.op.reserved_lvs is not None:
3857
      self.cluster.reserved_lvs = self.op.reserved_lvs
3858

    
3859
    if self.op.use_external_mip_script is not None:
3860
      self.cluster.use_external_mip_script = self.op.use_external_mip_script
3861

    
3862
    def helper_os(aname, mods, desc):
3863
      desc += " OS list"
3864
      lst = getattr(self.cluster, aname)
3865
      for key, val in mods:
3866
        if key == constants.DDM_ADD:
3867
          if val in lst:
3868
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3869
          else:
3870
            lst.append(val)
3871
        elif key == constants.DDM_REMOVE:
3872
          if val in lst:
3873
            lst.remove(val)
3874
          else:
3875
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3876
        else:
3877
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3878

    
3879
    if self.op.hidden_os:
3880
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3881

    
3882
    if self.op.blacklisted_os:
3883
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3884

    
3885
    if self.op.master_netdev:
3886
      master_params = self.cfg.GetMasterNetworkParameters()
3887
      ems = self.cfg.GetUseExternalMipScript()
3888
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3889
                  self.cluster.master_netdev)
3890
      result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3891
                                                       master_params, ems)
3892
      result.Raise("Could not disable the master ip")
3893
      feedback_fn("Changing master_netdev from %s to %s" %
3894
                  (master_params.netdev, self.op.master_netdev))
3895
      self.cluster.master_netdev = self.op.master_netdev
3896

    
3897
    if self.op.master_netmask:
3898
      master_params = self.cfg.GetMasterNetworkParameters()
3899
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3900
      result = self.rpc.call_node_change_master_netmask(master_params.name,
3901
                                                        master_params.netmask,
3902
                                                        self.op.master_netmask,
3903
                                                        master_params.ip,
3904
                                                        master_params.netdev)
3905
      if result.fail_msg:
3906
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3907
        feedback_fn(msg)
3908

    
3909
      self.cluster.master_netmask = self.op.master_netmask
3910

    
3911
    self.cfg.Update(self.cluster, feedback_fn)
3912

    
3913
    if self.op.master_netdev:
3914
      master_params = self.cfg.GetMasterNetworkParameters()
3915
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3916
                  self.op.master_netdev)
3917
      ems = self.cfg.GetUseExternalMipScript()
3918
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3919
                                                     master_params, ems)
3920
      if result.fail_msg:
3921
        self.LogWarning("Could not re-enable the master ip on"
3922
                        " the master, please restart manually: %s",
3923
                        result.fail_msg)
3924

    
3925

    
3926
def _UploadHelper(lu, nodes, fname):
3927
  """Helper for uploading a file and showing warnings.
3928

3929
  """
3930
  if os.path.exists(fname):
3931
    result = lu.rpc.call_upload_file(nodes, fname)
3932
    for to_node, to_result in result.items():
3933
      msg = to_result.fail_msg
3934
      if msg:
3935
        msg = ("Copy of file %s to node %s failed: %s" %
3936
               (fname, to_node, msg))
3937
        lu.proc.LogWarning(msg)
3938

    
3939

    
3940
def _ComputeAncillaryFiles(cluster, redist):
3941
  """Compute files external to Ganeti which need to be consistent.
3942

3943
  @type redist: boolean
3944
  @param redist: Whether to include files which need to be redistributed
3945

3946
  """
3947
  # Compute files for all nodes
3948
  files_all = set([
3949
    constants.SSH_KNOWN_HOSTS_FILE,
3950
    constants.CONFD_HMAC_KEY,
3951
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3952
    constants.SPICE_CERT_FILE,
3953
    constants.SPICE_CACERT_FILE,
3954
    constants.RAPI_USERS_FILE,
3955
    ])
3956

    
3957
  if not redist:
3958
    files_all.update(constants.ALL_CERT_FILES)
3959
    files_all.update(ssconf.SimpleStore().GetFileList())
3960
  else:
3961
    # we need to ship at least the RAPI certificate
3962
    files_all.add(constants.RAPI_CERT_FILE)
3963

    
3964
  if cluster.modify_etc_hosts:
3965
    files_all.add(constants.ETC_HOSTS)
3966

    
3967
  # Files which are optional, these must:
3968
  # - be present in one other category as well
3969
  # - either exist or not exist on all nodes of that category (mc, vm all)
3970
  files_opt = set([
3971
    constants.RAPI_USERS_FILE,
3972
    ])
3973

    
3974
  # Files which should only be on master candidates
3975
  files_mc = set()
3976

    
3977
  if not redist:
3978
    files_mc.add(constants.CLUSTER_CONF_FILE)
3979

    
3980
    # FIXME: this should also be replicated but Ganeti doesn't support files_mc
3981
    # replication
3982
    files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
3983

    
3984
  # Files which should only be on VM-capable nodes
3985
  files_vm = set(filename
3986
    for hv_name in cluster.enabled_hypervisors
3987
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3988

    
3989
  files_opt |= set(filename
3990
    for hv_name in cluster.enabled_hypervisors
3991
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3992

    
3993
  # Filenames in each category must be unique
3994
  all_files_set = files_all | files_mc | files_vm
3995
  assert (len(all_files_set) ==
3996
          sum(map(len, [files_all, files_mc, files_vm]))), \
3997
         "Found file listed in more than one file list"
3998

    
3999
  # Optional files must be present in one other category
4000
  assert all_files_set.issuperset(files_opt), \
4001
         "Optional file not in a different required list"
4002

    
4003
  return (files_all, files_opt, files_mc, files_vm)
4004

    
4005

    
4006
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4007
  """Distribute additional files which are part of the cluster configuration.
4008

4009
  ConfigWriter takes care of distributing the config and ssconf files, but
4010
  there are more files which should be distributed to all nodes. This function
4011
  makes sure those are copied.
4012

4013
  @param lu: calling logical unit
4014
  @param additional_nodes: list of nodes not in the config to distribute to
4015
  @type additional_vm: boolean
4016
  @param additional_vm: whether the additional nodes are vm-capable or not
4017

4018
  """
4019
  # Gather target nodes
4020
  cluster = lu.cfg.GetClusterInfo()
4021
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4022

    
4023
  online_nodes = lu.cfg.GetOnlineNodeList()
4024
  vm_nodes = lu.cfg.GetVmCapableNodeList()
4025

    
4026
  if additional_nodes is not None:
4027
    online_nodes.extend(additional_nodes)
4028
    if additional_vm:
4029
      vm_nodes.extend(additional_nodes)
4030

    
4031
  # Never distribute to master node
4032
  for nodelist in [online_nodes, vm_nodes]:
4033
    if master_info.name in nodelist:
4034
      nodelist.remove(master_info.name)
4035

    
4036
  # Gather file lists
4037
  (files_all, _, files_mc, files_vm) = \
4038
    _ComputeAncillaryFiles(cluster, True)
4039

    
4040
  # Never re-distribute configuration file from here
4041
  assert not (constants.CLUSTER_CONF_FILE in files_all or
4042
              constants.CLUSTER_CONF_FILE in files_vm)
4043
  assert not files_mc, "Master candidates not handled in this function"
4044

    
4045
  filemap = [
4046
    (online_nodes, files_all),
4047
    (vm_nodes, files_vm),
4048
    ]
4049

    
4050
  # Upload the files
4051
  for (node_list, files) in filemap:
4052
    for fname in files:
4053
      _UploadHelper(lu, node_list, fname)
4054

    
4055

    
4056
class LUClusterRedistConf(NoHooksLU):
4057
  """Force the redistribution of cluster configuration.
4058

4059
  This is a very simple LU.
4060

4061
  """
4062
  REQ_BGL = False
4063

    
4064
  def ExpandNames(self):
4065
    self.needed_locks = {
4066
      locking.LEVEL_NODE: locking.ALL_SET,
4067
    }
4068
    self.share_locks[locking.LEVEL_NODE] = 1
4069

    
4070
  def Exec(self, feedback_fn):
4071
    """Redistribute the configuration.
4072

4073
    """
4074
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4075
    _RedistributeAncillaryFiles(self)
4076

    
4077

    
4078
class LUClusterActivateMasterIp(NoHooksLU):
4079
  """Activate the master IP on the master node.
4080

4081
  """
4082
  def Exec(self, feedback_fn):
4083
    """Activate the master IP.
4084

4085
    """
4086
    master_params = self.cfg.GetMasterNetworkParameters()
4087
    ems = self.cfg.GetUseExternalMipScript()
4088
    result = self.rpc.call_node_activate_master_ip(master_params.name,
4089
                                                   master_params, ems)
4090
    result.Raise("Could not activate the master IP")
4091

    
4092

    
4093
class LUClusterDeactivateMasterIp(NoHooksLU):
4094
  """Deactivate the master IP on the master node.
4095

4096
  """
4097
  def Exec(self, feedback_fn):
4098
    """Deactivate the master IP.
4099

4100
    """
4101
    master_params = self.cfg.GetMasterNetworkParameters()
4102
    ems = self.cfg.GetUseExternalMipScript()
4103
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4104
                                                     master_params, ems)
4105
    result.Raise("Could not deactivate the master IP")
4106

    
4107

    
4108
def _WaitForSync(lu, instance, disks=None, oneshot=False):
4109
  """Sleep and poll for an instance's disk to sync.
4110

4111
  """
4112
  if not instance.disks or disks is not None and not disks:
4113
    return True
4114

    
4115
  disks = _ExpandCheckDisks(instance, disks)
4116

    
4117
  if not oneshot:
4118
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4119

    
4120
  node = instance.primary_node
4121

    
4122
  for dev in disks:
4123
    lu.cfg.SetDiskID(dev, node)
4124

    
4125
  # TODO: Convert to utils.Retry
4126

    
4127
  retries = 0
4128
  degr_retries = 10 # in seconds, as we sleep 1 second each time
4129
  while True:
4130
    max_time = 0
4131
    done = True
4132
    cumul_degraded = False
4133
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4134
    msg = rstats.fail_msg
4135
    if msg:
4136
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4137
      retries += 1
4138
      if retries >= 10:
4139
        raise errors.RemoteError("Can't contact node %s for mirror data,"
4140
                                 " aborting." % node)
4141
      time.sleep(6)
4142
      continue
4143
    rstats = rstats.payload
4144
    retries = 0
4145
    for i, mstat in enumerate(rstats):
4146
      if mstat is None:
4147
        lu.LogWarning("Can't compute data for node %s/%s",
4148
                           node, disks[i].iv_name)
4149
        continue
4150

    
4151
      cumul_degraded = (cumul_degraded or
4152
                        (mstat.is_degraded and mstat.sync_percent is None))
4153
      if mstat.sync_percent is not None:
4154
        done = False
4155
        if mstat.estimated_time is not None:
4156
          rem_time = ("%s remaining (estimated)" %
4157
                      utils.FormatSeconds(mstat.estimated_time))
4158
          max_time = mstat.estimated_time
4159
        else:
4160
          rem_time = "no time estimate"
4161
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4162
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
4163

    
4164
    # if we're done but degraded, let's do a few small retries, to
4165
    # make sure we see a stable and not transient situation; therefore
4166
    # we force restart of the loop
4167
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
4168
      logging.info("Degraded disks found, %d retries left", degr_retries)
4169
      degr_retries -= 1
4170
      time.sleep(1)
4171
      continue
4172

    
4173
    if done or oneshot:
4174
      break
4175

    
4176
    time.sleep(min(60, max_time))
4177

    
4178
  if done:
4179
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4180
  return not cumul_degraded
4181

    
4182

    
4183
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4184
  """Check that mirrors are not degraded.
4185

4186
  The ldisk parameter, if True, will change the test from the
4187
  is_degraded attribute (which represents overall non-ok status for
4188
  the device(s)) to the ldisk (representing the local storage status).
4189

4190
  """
4191
  lu.cfg.SetDiskID(dev, node)
4192

    
4193
  result = True
4194

    
4195
  if on_primary or dev.AssembleOnSecondary():
4196
    rstats = lu.rpc.call_blockdev_find(node, dev)
4197
    msg = rstats.fail_msg
4198
    if msg:
4199
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4200
      result = False
4201
    elif not rstats.payload:
4202
      lu.LogWarning("Can't find disk on node %s", node)
4203
      result = False
4204
    else:
4205
      if ldisk:
4206
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4207
      else:
4208
        result = result and not rstats.payload.is_degraded
4209

    
4210
  if dev.children:
4211
    for child in dev.children:
4212
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4213

    
4214
  return result
4215

    
4216

    
4217
class LUOobCommand(NoHooksLU):
4218
  """Logical unit for OOB handling.
4219

4220
  """
4221
  REG_BGL = False
4222
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4223

    
4224
  def ExpandNames(self):
4225
    """Gather locks we need.
4226

4227
    """
4228
    if self.op.node_names:
4229
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4230
      lock_names = self.op.node_names
4231
    else:
4232
      lock_names = locking.ALL_SET
4233

    
4234
    self.needed_locks = {
4235
      locking.LEVEL_NODE: lock_names,
4236
      }
4237

    
4238
  def CheckPrereq(self):
4239
    """Check prerequisites.
4240

4241
    This checks:
4242
     - the node exists in the configuration
4243
     - OOB is supported
4244

4245
    Any errors are signaled by raising errors.OpPrereqError.
4246

4247
    """
4248
    self.nodes = []
4249
    self.master_node = self.cfg.GetMasterNode()
4250

    
4251
    assert self.op.power_delay >= 0.0
4252

    
4253
    if self.op.node_names:
4254
      if (self.op.command in self._SKIP_MASTER and
4255
          self.master_node in self.op.node_names):
4256
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4257
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4258

    
4259
        if master_oob_handler:
4260
          additional_text = ("run '%s %s %s' if you want to operate on the"
4261
                             " master regardless") % (master_oob_handler,
4262
                                                      self.op.command,
4263
                                                      self.master_node)
4264
        else:
4265
          additional_text = "it does not support out-of-band operations"
4266

    
4267
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4268
                                    " allowed for %s; %s") %
4269
                                   (self.master_node, self.op.command,
4270
                                    additional_text), errors.ECODE_INVAL)
4271
    else:
4272
      self.op.node_names = self.cfg.GetNodeList()
4273
      if self.op.command in self._SKIP_MASTER:
4274
        self.op.node_names.remove(self.master_node)
4275

    
4276
    if self.op.command in self._SKIP_MASTER:
4277
      assert self.master_node not in self.op.node_names
4278

    
4279
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4280
      if node is None:
4281
        raise errors.OpPrereqError("Node %s not found" % node_name,
4282
                                   errors.ECODE_NOENT)
4283
      else:
4284
        self.nodes.append(node)
4285

    
4286
      if (not self.op.ignore_status and
4287
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4288
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4289
                                    " not marked offline") % node_name,
4290
                                   errors.ECODE_STATE)
4291

    
4292
  def Exec(self, feedback_fn):
4293
    """Execute OOB and return result if we expect any.
4294

4295
    """
4296
    master_node = self.master_node
4297
    ret = []
4298

    
4299
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4300
                                              key=lambda node: node.name)):
4301
      node_entry = [(constants.RS_NORMAL, node.name)]
4302
      ret.append(node_entry)
4303

    
4304
      oob_program = _SupportsOob(self.cfg, node)
4305

    
4306
      if not oob_program:
4307
        node_entry.append((constants.RS_UNAVAIL, None))
4308
        continue
4309

    
4310
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4311
                   self.op.command, oob_program, node.name)
4312
      result = self.rpc.call_run_oob(master_node, oob_program,
4313
                                     self.op.command, node.name,
4314
                                     self.op.timeout)
4315

    
4316
      if result.fail_msg:
4317
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4318
                        node.name, result.fail_msg)
4319
        node_entry.append((constants.RS_NODATA, None))
4320
      else:
4321
        try:
4322
          self._CheckPayload(result)
4323
        except errors.OpExecError, err:
4324
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4325
                          node.name, err)
4326
          node_entry.append((constants.RS_NODATA, None))
4327
        else:
4328
          if self.op.command == constants.OOB_HEALTH:
4329
            # For health we should log important events
4330
            for item, status in result.payload:
4331
              if status in [constants.OOB_STATUS_WARNING,
4332
                            constants.OOB_STATUS_CRITICAL]:
4333
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4334
                                item, node.name, status)
4335

    
4336
          if self.op.command == constants.OOB_POWER_ON:
4337
            node.powered = True
4338
          elif self.op.command == constants.OOB_POWER_OFF:
4339
            node.powered = False
4340
          elif self.op.command == constants.OOB_POWER_STATUS:
4341
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4342
            if powered != node.powered:
4343
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4344
                               " match actual power state (%s)"), node.powered,
4345
                              node.name, powered)
4346

    
4347
          # For configuration changing commands we should update the node
4348
          if self.op.command in (constants.OOB_POWER_ON,
4349
                                 constants.OOB_POWER_OFF):
4350
            self.cfg.Update(node, feedback_fn)
4351

    
4352
          node_entry.append((constants.RS_NORMAL, result.payload))
4353

    
4354
          if (self.op.command == constants.OOB_POWER_ON and
4355
              idx < len(self.nodes) - 1):
4356
            time.sleep(self.op.power_delay)
4357

    
4358
    return ret
4359

    
4360
  def _CheckPayload(self, result):
4361
    """Checks if the payload is valid.
4362

4363
    @param result: RPC result
4364
    @raises errors.OpExecError: If payload is not valid
4365

4366
    """
4367
    errs = []
4368
    if self.op.command == constants.OOB_HEALTH:
4369
      if not isinstance(result.payload, list):
4370
        errs.append("command 'health' is expected to return a list but got %s" %
4371
                    type(result.payload))
4372
      else:
4373
        for item, status in result.payload:
4374
          if status not in constants.OOB_STATUSES:
4375
            errs.append("health item '%s' has invalid status '%s'" %
4376
                        (item, status))
4377

    
4378
    if self.op.command == constants.OOB_POWER_STATUS:
4379
      if not isinstance(result.payload, dict):
4380
        errs.append("power-status is expected to return a dict but got %s" %
4381
                    type(result.payload))
4382

    
4383
    if self.op.command in [
4384
        constants.OOB_POWER_ON,
4385
        constants.OOB_POWER_OFF,
4386
        constants.OOB_POWER_CYCLE,
4387
        ]:
4388
      if result.payload is not None:
4389
        errs.append("%s is expected to not return payload but got '%s'" %
4390
                    (self.op.command, result.payload))
4391

    
4392
    if errs:
4393
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4394
                               utils.CommaJoin(errs))
4395

    
4396

    
4397
class _OsQuery(_QueryBase):
4398
  FIELDS = query.OS_FIELDS
4399

    
4400
  def ExpandNames(self, lu):
4401
    # Lock all nodes in shared mode
4402
    # Temporary removal of locks, should be reverted later
4403
    # TODO: reintroduce locks when they are lighter-weight
4404
    lu.needed_locks = {}
4405
    #self.share_locks[locking.LEVEL_NODE] = 1
4406
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4407

    
4408
    # The following variables interact with _QueryBase._GetNames
4409
    if self.names:
4410
      self.wanted = self.names
4411
    else:
4412
      self.wanted = locking.ALL_SET
4413

    
4414
    self.do_locking = self.use_locking
4415

    
4416
  def DeclareLocks(self, lu, level):
4417
    pass
4418

    
4419
  @staticmethod
4420
  def _DiagnoseByOS(rlist):
4421
    """Remaps a per-node return list into an a per-os per-node dictionary
4422

4423
    @param rlist: a map with node names as keys and OS objects as values
4424

4425
    @rtype: dict
4426
    @return: a dictionary with osnames as keys and as value another
4427
        map, with nodes as keys and tuples of (path, status, diagnose,
4428
        variants, parameters, api_versions) as values, eg::
4429

4430
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4431
                                     (/srv/..., False, "invalid api")],
4432
                           "node2": [(/srv/..., True, "", [], [])]}
4433
          }
4434

4435
    """
4436
    all_os = {}
4437
    # we build here the list of nodes that didn't fail the RPC (at RPC
4438
    # level), so that nodes with a non-responding node daemon don't
4439
    # make all OSes invalid
4440
    good_nodes = [node_name for node_name in rlist
4441
                  if not rlist[node_name].fail_msg]
4442
    for node_name, nr in rlist.items():
4443
      if nr.fail_msg or not nr.payload:
4444
        continue
4445
      for (name, path, status, diagnose, variants,
4446
           params, api_versions) in nr.payload:
4447
        if name not in all_os:
4448
          # build a list of nodes for this os containing empty lists
4449
          # for each node in node_list
4450
          all_os[name] = {}
4451
          for nname in good_nodes:
4452
            all_os[name][nname] = []
4453
        # convert params from [name, help] to (name, help)
4454
        params = [tuple(v) for v in params]
4455
        all_os[name][node_name].append((path, status, diagnose,
4456
                                        variants, params, api_versions))
4457
    return all_os
4458

    
4459
  def _GetQueryData(self, lu):
4460
    """Computes the list of nodes and their attributes.
4461

4462
    """
4463
    # Locking is not used
4464
    assert not (compat.any(lu.glm.is_owned(level)
4465
                           for level in locking.LEVELS
4466
                           if level != locking.LEVEL_CLUSTER) or
4467
                self.do_locking or self.use_locking)
4468

    
4469
    valid_nodes = [node.name
4470
                   for node in lu.cfg.GetAllNodesInfo().values()
4471
                   if not node.offline and node.vm_capable]
4472
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4473
    cluster = lu.cfg.GetClusterInfo()
4474

    
4475
    data = {}
4476

    
4477
    for (os_name, os_data) in pol.items():
4478
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4479
                          hidden=(os_name in cluster.hidden_os),
4480
                          blacklisted=(os_name in cluster.blacklisted_os))
4481

    
4482
      variants = set()
4483
      parameters = set()
4484
      api_versions = set()
4485

    
4486
      for idx, osl in enumerate(os_data.values()):
4487
        info.valid = bool(info.valid and osl and osl[0][1])
4488
        if not info.valid:
4489
          break
4490

    
4491
        (node_variants, node_params, node_api) = osl[0][3:6]
4492
        if idx == 0:
4493
          # First entry
4494
          variants.update(node_variants)
4495
          parameters.update(node_params)
4496
          api_versions.update(node_api)
4497
        else:
4498
          # Filter out inconsistent values
4499
          variants.intersection_update(node_variants)
4500
          parameters.intersection_update(node_params)
4501
          api_versions.intersection_update(node_api)
4502

    
4503
      info.variants = list(variants)
4504
      info.parameters = list(parameters)
4505
      info.api_versions = list(api_versions)
4506

    
4507
      data[os_name] = info
4508

    
4509
    # Prepare data in requested order
4510
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4511
            if name in data]
4512

    
4513

    
4514
class LUOsDiagnose(NoHooksLU):
4515
  """Logical unit for OS diagnose/query.
4516

4517
  """
4518
  REQ_BGL = False
4519

    
4520
  @staticmethod
4521
  def _BuildFilter(fields, names):
4522
    """Builds a filter for querying OSes.
4523

4524
    """
4525
    name_filter = qlang.MakeSimpleFilter("name", names)
4526

    
4527
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4528
    # respective field is not requested
4529
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4530
                     for fname in ["hidden", "blacklisted"]
4531
                     if fname not in fields]
4532
    if "valid" not in fields:
4533
      status_filter.append([qlang.OP_TRUE, "valid"])
4534

    
4535
    if status_filter:
4536
      status_filter.insert(0, qlang.OP_AND)
4537
    else:
4538
      status_filter = None
4539

    
4540
    if name_filter and status_filter:
4541
      return [qlang.OP_AND, name_filter, status_filter]
4542
    elif name_filter:
4543
      return name_filter
4544
    else:
4545
      return status_filter
4546

    
4547
  def CheckArguments(self):
4548
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4549
                       self.op.output_fields, False)
4550

    
4551
  def ExpandNames(self):
4552
    self.oq.ExpandNames(self)
4553

    
4554
  def Exec(self, feedback_fn):
4555
    return self.oq.OldStyleQuery(self)
4556

    
4557

    
4558
class LUNodeRemove(LogicalUnit):
4559
  """Logical unit for removing a node.
4560

4561
  """
4562
  HPATH = "node-remove"
4563
  HTYPE = constants.HTYPE_NODE
4564

    
4565
  def BuildHooksEnv(self):
4566
    """Build hooks env.
4567

4568
    This doesn't run on the target node in the pre phase as a failed
4569
    node would then be impossible to remove.
4570

4571
    """
4572
    return {
4573
      "OP_TARGET": self.op.node_name,
4574
      "NODE_NAME": self.op.node_name,
4575
      }
4576

    
4577
  def BuildHooksNodes(self):
4578
    """Build hooks nodes.
4579

4580
    """
4581
    all_nodes = self.cfg.GetNodeList()
4582
    try:
4583
      all_nodes.remove(self.op.node_name)
4584
    except ValueError:
4585
      logging.warning("Node '%s', which is about to be removed, was not found"
4586
                      " in the list of all nodes", self.op.node_name)
4587
    return (all_nodes, all_nodes)
4588

    
4589
  def CheckPrereq(self):
4590
    """Check prerequisites.
4591

4592
    This checks:
4593
     - the node exists in the configuration
4594
     - it does not have primary or secondary instances
4595
     - it's not the master
4596

4597
    Any errors are signaled by raising errors.OpPrereqError.
4598

4599
    """
4600
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4601
    node = self.cfg.GetNodeInfo(self.op.node_name)
4602
    assert node is not None
4603

    
4604
    masternode = self.cfg.GetMasterNode()
4605
    if node.name == masternode:
4606
      raise errors.OpPrereqError("Node is the master node, failover to another"
4607
                                 " node is required", errors.ECODE_INVAL)
4608

    
4609
    for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4610
      if node.name in instance.all_nodes:
4611
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4612
                                   " please remove first" % instance_name,
4613
                                   errors.ECODE_INVAL)
4614
    self.op.node_name = node.name
4615
    self.node = node
4616

    
4617
  def Exec(self, feedback_fn):
4618
    """Removes the node from the cluster.
4619

4620
    """
4621
    node = self.node
4622
    logging.info("Stopping the node daemon and removing configs from node %s",
4623
                 node.name)
4624

    
4625
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4626

    
4627
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4628
      "Not owning BGL"
4629

    
4630
    # Promote nodes to master candidate as needed
4631
    _AdjustCandidatePool(self, exceptions=[node.name])
4632
    self.context.RemoveNode(node.name)
4633

    
4634
    # Run post hooks on the node before it's removed
4635
    _RunPostHook(self, node.name)
4636

    
4637
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4638
    msg = result.fail_msg
4639
    if msg:
4640
      self.LogWarning("Errors encountered on the remote node while leaving"
4641
                      " the cluster: %s", msg)
4642

    
4643
    # Remove node from our /etc/hosts
4644
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4645
      master_node = self.cfg.GetMasterNode()
4646
      result = self.rpc.call_etc_hosts_modify(master_node,
4647
                                              constants.ETC_HOSTS_REMOVE,
4648
                                              node.name, None)
4649
      result.Raise("Can't update hosts file with new host data")
4650
      _RedistributeAncillaryFiles(self)
4651

    
4652

    
4653
class _NodeQuery(_QueryBase):
4654
  FIELDS = query.NODE_FIELDS
4655

    
4656
  def ExpandNames(self, lu):
4657
    lu.needed_locks = {}
4658
    lu.share_locks = _ShareAll()
4659

    
4660
    if self.names:
4661
      self.wanted = _GetWantedNodes(lu, self.names)
4662
    else:
4663
      self.wanted = locking.ALL_SET
4664

    
4665
    self.do_locking = (self.use_locking and
4666
                       query.NQ_LIVE in self.requested_data)
4667

    
4668
    if self.do_locking:
4669
      # If any non-static field is requested we need to lock the nodes
4670
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4671

    
4672
  def DeclareLocks(self, lu, level):
4673
    pass
4674

    
4675
  def _GetQueryData(self, lu):
4676
    """Computes the list of nodes and their attributes.
4677

4678
    """
4679
    all_info = lu.cfg.GetAllNodesInfo()
4680

    
4681
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4682

    
4683
    # Gather data as requested
4684
    if query.NQ_LIVE in self.requested_data:
4685
      # filter out non-vm_capable nodes
4686
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4687

    
4688
      node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4689
                                        [lu.cfg.GetHypervisorType()])
4690
      live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4691
                       for (name, nresult) in node_data.items()
4692
                       if not nresult.fail_msg and nresult.payload)
4693
    else:
4694
      live_data = None
4695

    
4696
    if query.NQ_INST in self.requested_data:
4697
      node_to_primary = dict([(name, set()) for name in nodenames])
4698
      node_to_secondary = dict([(name, set()) for name in nodenames])
4699

    
4700
      inst_data = lu.cfg.GetAllInstancesInfo()
4701

    
4702
      for inst in inst_data.values():
4703
        if inst.primary_node in node_to_primary:
4704
          node_to_primary[inst.primary_node].add(inst.name)
4705
        for secnode in inst.secondary_nodes:
4706
          if secnode in node_to_secondary:
4707
            node_to_secondary[secnode].add(inst.name)
4708
    else:
4709
      node_to_primary = None
4710
      node_to_secondary = None
4711

    
4712
    if query.NQ_OOB in self.requested_data:
4713
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4714
                         for name, node in all_info.iteritems())
4715
    else:
4716
      oob_support = None
4717

    
4718
    if query.NQ_GROUP in self.requested_data:
4719
      groups = lu.cfg.GetAllNodeGroupsInfo()
4720
    else:
4721
      groups = {}
4722

    
4723
    return query.NodeQueryData([all_info[name] for name in nodenames],
4724
                               live_data, lu.cfg.GetMasterNode(),
4725
                               node_to_primary, node_to_secondary, groups,
4726
                               oob_support, lu.cfg.GetClusterInfo())
4727

    
4728

    
4729
class LUNodeQuery(NoHooksLU):
4730
  """Logical unit for querying nodes.
4731

4732
  """
4733
  # pylint: disable=W0142
4734
  REQ_BGL = False
4735

    
4736
  def CheckArguments(self):
4737
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4738
                         self.op.output_fields, self.op.use_locking)
4739

    
4740
  def ExpandNames(self):
4741
    self.nq.ExpandNames(self)
4742

    
4743
  def DeclareLocks(self, level):
4744
    self.nq.DeclareLocks(self, level)
4745

    
4746
  def Exec(self, feedback_fn):
4747
    return self.nq.OldStyleQuery(self)
4748

    
4749

    
4750
class LUNodeQueryvols(NoHooksLU):
4751
  """Logical unit for getting volumes on node(s).
4752

4753
  """
4754
  REQ_BGL = False
4755
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4756
  _FIELDS_STATIC = utils.FieldSet("node")
4757

    
4758
  def CheckArguments(self):
4759
    _CheckOutputFields(static=self._FIELDS_STATIC,
4760
                       dynamic=self._FIELDS_DYNAMIC,
4761
                       selected=self.op.output_fields)
4762

    
4763
  def ExpandNames(self):
4764
    self.share_locks = _ShareAll()
4765
    self.needed_locks = {}
4766

    
4767
    if not self.op.nodes:
4768
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4769
    else:
4770
      self.needed_locks[locking.LEVEL_NODE] = \
4771
        _GetWantedNodes(self, self.op.nodes)
4772

    
4773
  def Exec(self, feedback_fn):
4774
    """Computes the list of nodes and their attributes.
4775

4776
    """
4777
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4778
    volumes = self.rpc.call_node_volumes(nodenames)
4779

    
4780
    ilist = self.cfg.GetAllInstancesInfo()
4781
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4782

    
4783
    output = []
4784
    for node in nodenames:
4785
      nresult = volumes[node]
4786
      if nresult.offline:
4787
        continue
4788
      msg = nresult.fail_msg
4789
      if msg:
4790
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4791
        continue
4792

    
4793
      node_vols = sorted(nresult.payload,
4794
                         key=operator.itemgetter("dev"))
4795

    
4796
      for vol in node_vols:
4797
        node_output = []
4798
        for field in self.op.output_fields:
4799
          if field == "node":
4800
            val = node
4801
          elif field == "phys":
4802
            val = vol["dev"]
4803
          elif field == "vg":
4804
            val = vol["vg"]
4805
          elif field == "name":
4806
            val = vol["name"]
4807
          elif field == "size":
4808
            val = int(float(vol["size"]))
4809
          elif field == "instance":
4810
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4811
          else:
4812
            raise errors.ParameterError(field)
4813
          node_output.append(str(val))
4814

    
4815
        output.append(node_output)
4816

    
4817
    return output
4818

    
4819

    
4820
class LUNodeQueryStorage(NoHooksLU):
4821
  """Logical unit for getting information on storage units on node(s).
4822

4823
  """
4824
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4825
  REQ_BGL = False
4826

    
4827
  def CheckArguments(self):
4828
    _CheckOutputFields(static=self._FIELDS_STATIC,
4829
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4830
                       selected=self.op.output_fields)
4831

    
4832
  def ExpandNames(self):
4833
    self.share_locks = _ShareAll()
4834
    self.needed_locks = {}
4835

    
4836
    if self.op.nodes:
4837
      self.needed_locks[locking.LEVEL_NODE] = \
4838
        _GetWantedNodes(self, self.op.nodes)
4839
    else:
4840
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4841

    
4842
  def Exec(self, feedback_fn):
4843
    """Computes the list of nodes and their attributes.
4844

4845
    """
4846
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4847

    
4848
    # Always get name to sort by
4849
    if constants.SF_NAME in self.op.output_fields:
4850
      fields = self.op.output_fields[:]
4851
    else:
4852
      fields = [constants.SF_NAME] + self.op.output_fields
4853

    
4854
    # Never ask for node or type as it's only known to the LU
4855
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4856
      while extra in fields:
4857
        fields.remove(extra)
4858

    
4859
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4860
    name_idx = field_idx[constants.SF_NAME]
4861

    
4862
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4863
    data = self.rpc.call_storage_list(self.nodes,
4864
                                      self.op.storage_type, st_args,
4865
                                      self.op.name, fields)
4866

    
4867
    result = []
4868

    
4869
    for node in utils.NiceSort(self.nodes):
4870
      nresult = data[node]
4871
      if nresult.offline:
4872
        continue
4873

    
4874
      msg = nresult.fail_msg
4875
      if msg:
4876
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4877
        continue
4878

    
4879
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4880

    
4881
      for name in utils.NiceSort(rows.keys()):
4882
        row = rows[name]
4883

    
4884
        out = []
4885

    
4886
        for field in self.op.output_fields:
4887
          if field == constants.SF_NODE:
4888
            val = node
4889
          elif field == constants.SF_TYPE:
4890
            val = self.op.storage_type
4891
          elif field in field_idx:
4892
            val = row[field_idx[field]]
4893
          else:
4894
            raise errors.ParameterError(field)
4895

    
4896
          out.append(val)
4897

    
4898
        result.append(out)
4899

    
4900
    return result
4901

    
4902

    
4903
class _InstanceQuery(_QueryBase):
4904
  FIELDS = query.INSTANCE_FIELDS
4905

    
4906
  def ExpandNames(self, lu):
4907
    lu.needed_locks = {}
4908
    lu.share_locks = _ShareAll()
4909

    
4910
    if self.names:
4911
      self.wanted = _GetWantedInstances(lu, self.names)
4912
    else:
4913
      self.wanted = locking.ALL_SET
4914

    
4915
    self.do_locking = (self.use_locking and
4916
                       query.IQ_LIVE in self.requested_data)
4917
    if self.do_locking:
4918
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4919
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4920
      lu.needed_locks[locking.LEVEL_NODE] = []
4921
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4922

    
4923
    self.do_grouplocks = (self.do_locking and
4924
                          query.IQ_NODES in self.requested_data)
4925

    
4926
  def DeclareLocks(self, lu, level):
4927
    if self.do_locking:
4928
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4929
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4930

    
4931
        # Lock all groups used by instances optimistically; this requires going
4932
        # via the node before it's locked, requiring verification later on
4933
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4934
          set(group_uuid
4935
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4936
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4937
      elif level == locking.LEVEL_NODE:
4938
        lu._LockInstancesNodes() # pylint: disable=W0212
4939

    
4940
  @staticmethod
4941
  def _CheckGroupLocks(lu):
4942
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4943
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4944

    
4945
    # Check if node groups for locked instances are still correct
4946
    for instance_name in owned_instances:
4947
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4948

    
4949
  def _GetQueryData(self, lu):
4950
    """Computes the list of instances and their attributes.
4951

4952
    """
4953
    if self.do_grouplocks:
4954
      self._CheckGroupLocks(lu)
4955

    
4956
    cluster = lu.cfg.GetClusterInfo()
4957
    all_info = lu.cfg.GetAllInstancesInfo()
4958

    
4959
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4960

    
4961
    instance_list = [all_info[name] for name in instance_names]
4962
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4963
                                        for inst in instance_list)))
4964
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4965
    bad_nodes = []
4966
    offline_nodes = []
4967
    wrongnode_inst = set()
4968

    
4969
    # Gather data as requested
4970
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4971
      live_data = {}
4972
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4973
      for name in nodes:
4974
        result = node_data[name]
4975
        if result.offline:
4976
          # offline nodes will be in both lists
4977
          assert result.fail_msg
4978
          offline_nodes.append(name)
4979
        if result.fail_msg:
4980
          bad_nodes.append(name)
4981
        elif result.payload:
4982
          for inst in result.payload:
4983
            if inst in all_info:
4984
              if all_info[inst].primary_node == name:
4985
                live_data.update(result.payload)
4986
              else:
4987
                wrongnode_inst.add(inst)
4988
            else:
4989
              # orphan instance; we don't list it here as we don't
4990
              # handle this case yet in the output of instance listing
4991
              logging.warning("Orphan instance '%s' found on node %s",
4992
                              inst, name)
4993
        # else no instance is alive
4994
    else:
4995
      live_data = {}
4996

    
4997
    if query.IQ_DISKUSAGE in self.requested_data:
4998
      disk_usage = dict((inst.name,
4999
                         _ComputeDiskSize(inst.disk_template,
5000
                                          [{constants.IDISK_SIZE: disk.size}
5001
                                           for disk in inst.disks]))
5002
                        for inst in instance_list)
5003
    else:
5004
      disk_usage = None
5005

    
5006
    if query.IQ_CONSOLE in self.requested_data:
5007
      consinfo = {}
5008
      for inst in instance_list:
5009
        if inst.name in live_data:
5010
          # Instance is running
5011
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5012
        else:
5013
          consinfo[inst.name] = None
5014
      assert set(consinfo.keys()) == set(instance_names)
5015
    else:
5016
      consinfo = None
5017

    
5018
    if query.IQ_NODES in self.requested_data:
5019
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5020
                                            instance_list)))
5021
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5022
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5023
                    for uuid in set(map(operator.attrgetter("group"),
5024
                                        nodes.values())))
5025
    else:
5026
      nodes = None
5027
      groups = None
5028

    
5029
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5030
                                   disk_usage, offline_nodes, bad_nodes,
5031
                                   live_data, wrongnode_inst, consinfo,
5032
                                   nodes, groups)
5033

    
5034

    
5035
class LUQuery(NoHooksLU):
5036
  """Query for resources/items of a certain kind.
5037

5038
  """
5039
  # pylint: disable=W0142
5040
  REQ_BGL = False
5041

    
5042
  def CheckArguments(self):
5043
    qcls = _GetQueryImplementation(self.op.what)
5044

    
5045
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5046

    
5047
  def ExpandNames(self):
5048
    self.impl.ExpandNames(self)
5049

    
5050
  def DeclareLocks(self, level):
5051
    self.impl.DeclareLocks(self, level)
5052

    
5053
  def Exec(self, feedback_fn):
5054
    return self.impl.NewStyleQuery(self)
5055

    
5056

    
5057
class LUQueryFields(NoHooksLU):
5058
  """Query for resources/items of a certain kind.
5059

5060
  """
5061
  # pylint: disable=W0142
5062
  REQ_BGL = False
5063

    
5064
  def CheckArguments(self):
5065
    self.qcls = _GetQueryImplementation(self.op.what)
5066

    
5067
  def ExpandNames(self):
5068
    self.needed_locks = {}
5069

    
5070
  def Exec(self, feedback_fn):
5071
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5072

    
5073

    
5074
class LUNodeModifyStorage(NoHooksLU):
5075
  """Logical unit for modifying a storage volume on a node.
5076

5077
  """
5078
  REQ_BGL = False
5079

    
5080
  def CheckArguments(self):
5081
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5082

    
5083
    storage_type = self.op.storage_type
5084

    
5085
    try:
5086
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5087
    except KeyError:
5088
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
5089
                                 " modified" % storage_type,
5090
                                 errors.ECODE_INVAL)
5091

    
5092
    diff = set(self.op.changes.keys()) - modifiable
5093
    if diff:
5094
      raise errors.OpPrereqError("The following fields can not be modified for"
5095
                                 " storage units of type '%s': %r" %
5096
                                 (storage_type, list(diff)),
5097
                                 errors.ECODE_INVAL)
5098

    
5099
  def ExpandNames(self):
5100
    self.needed_locks = {
5101
      locking.LEVEL_NODE: self.op.node_name,
5102
      }
5103

    
5104
  def Exec(self, feedback_fn):
5105
    """Computes the list of nodes and their attributes.
5106

5107
    """
5108
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5109
    result = self.rpc.call_storage_modify(self.op.node_name,
5110
                                          self.op.storage_type, st_args,
5111
                                          self.op.name, self.op.changes)
5112
    result.Raise("Failed to modify storage unit '%s' on %s" %
5113
                 (self.op.name, self.op.node_name))
5114

    
5115

    
5116
class LUNodeAdd(LogicalUnit):
5117
  """Logical unit for adding node to the cluster.
5118

5119
  """
5120
  HPATH = "node-add"
5121
  HTYPE = constants.HTYPE_NODE
5122
  _NFLAGS = ["master_capable", "vm_capable"]
5123

    
5124
  def CheckArguments(self):
5125
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5126
    # validate/normalize the node name
5127
    self.hostname = netutils.GetHostname(name=self.op.node_name,
5128
                                         family=self.primary_ip_family)
5129
    self.op.node_name = self.hostname.name
5130

    
5131
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5132
      raise errors.OpPrereqError("Cannot readd the master node",
5133
                                 errors.ECODE_STATE)
5134

    
5135
    if self.op.readd and self.op.group:
5136
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
5137
                                 " being readded", errors.ECODE_INVAL)
5138

    
5139
  def BuildHooksEnv(self):
5140
    """Build hooks env.
5141

5142
    This will run on all nodes before, and on all nodes + the new node after.
5143

5144
    """
5145
    return {
5146
      "OP_TARGET": self.op.node_name,
5147
      "NODE_NAME": self.op.node_name,
5148
      "NODE_PIP": self.op.primary_ip,
5149
      "NODE_SIP": self.op.secondary_ip,
5150
      "MASTER_CAPABLE": str(self.op.master_capable),
5151
      "VM_CAPABLE": str(self.op.vm_capable),
5152
      }
5153

    
5154
  def BuildHooksNodes(self):
5155
    """Build hooks nodes.
5156

5157
    """
5158
    # Exclude added node
5159
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5160
    post_nodes = pre_nodes + [self.op.node_name, ]
5161

    
5162
    return (pre_nodes, post_nodes)
5163

    
5164
  def CheckPrereq(self):
5165
    """Check prerequisites.
5166

5167
    This checks:
5168
     - the new node is not already in the config
5169
     - it is resolvable
5170
     - its parameters (single/dual homed) matches the cluster
5171

5172
    Any errors are signaled by raising errors.OpPrereqError.
5173

5174
    """
5175
    cfg = self.cfg
5176
    hostname = self.hostname
5177
    node = hostname.name
5178
    primary_ip = self.op.primary_ip = hostname.ip
5179
    if self.op.secondary_ip is None:
5180
      if self.primary_ip_family == netutils.IP6Address.family:
5181
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5182
                                   " IPv4 address must be given as secondary",
5183
                                   errors.ECODE_INVAL)
5184
      self.op.secondary_ip = primary_ip
5185

    
5186
    secondary_ip = self.op.secondary_ip
5187
    if not netutils.IP4Address.IsValid(secondary_ip):
5188
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5189
                                 " address" % secondary_ip, errors.ECODE_INVAL)
5190

    
5191
    node_list = cfg.GetNodeList()
5192
    if not self.op.readd and node in node_list:
5193
      raise errors.OpPrereqError("Node %s is already in the configuration" %
5194
                                 node, errors.ECODE_EXISTS)
5195
    elif self.op.readd and node not in node_list:
5196
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5197
                                 errors.ECODE_NOENT)
5198

    
5199
    self.changed_primary_ip = False
5200

    
5201
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5202
      if self.op.readd and node == existing_node_name:
5203
        if existing_node.secondary_ip != secondary_ip:
5204
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
5205
                                     " address configuration as before",
5206
                                     errors.ECODE_INVAL)
5207
        if existing_node.primary_ip != primary_ip:
5208
          self.changed_primary_ip = True
5209

    
5210
        continue
5211

    
5212
      if (existing_node.primary_ip == primary_ip or
5213
          existing_node.secondary_ip == primary_ip or
5214
          existing_node.primary_ip == secondary_ip or
5215
          existing_node.secondary_ip == secondary_ip):
5216
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5217
                                   " existing node %s" % existing_node.name,
5218
                                   errors.ECODE_NOTUNIQUE)
5219

    
5220
    # After this 'if' block, None is no longer a valid value for the
5221
    # _capable op attributes
5222
    if self.op.readd:
5223
      old_node = self.cfg.GetNodeInfo(node)
5224
      assert old_node is not None, "Can't retrieve locked node %s" % node
5225
      for attr in self._NFLAGS:
5226
        if getattr(self.op, attr) is None:
5227
          setattr(self.op, attr, getattr(old_node, attr))
5228
    else:
5229
      for attr in self._NFLAGS:
5230
        if getattr(self.op, attr) is None:
5231
          setattr(self.op, attr, True)
5232

    
5233
    if self.op.readd and not self.op.vm_capable:
5234
      pri, sec = cfg.GetNodeInstances(node)
5235
      if pri or sec:
5236
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5237
                                   " flag set to false, but it already holds"
5238
                                   " instances" % node,
5239
                                   errors.ECODE_STATE)
5240

    
5241
    # check that the type of the node (single versus dual homed) is the
5242
    # same as for the master
5243
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5244
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5245
    newbie_singlehomed = secondary_ip == primary_ip
5246
    if master_singlehomed != newbie_singlehomed:
5247
      if master_singlehomed:
5248
        raise errors.OpPrereqError("The master has no secondary ip but the"
5249
                                   " new node has one",
5250
                                   errors.ECODE_INVAL)
5251
      else:
5252
        raise errors.OpPrereqError("The master has a secondary ip but the"
5253
                                   " new node doesn't have one",
5254
                                   errors.ECODE_INVAL)
5255

    
5256
    # checks reachability
5257
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5258
      raise errors.OpPrereqError("Node not reachable by ping",
5259
                                 errors.ECODE_ENVIRON)
5260

    
5261
    if not newbie_singlehomed:
5262
      # check reachability from my secondary ip to newbie's secondary ip
5263
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5264
                           source=myself.secondary_ip):
5265
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5266
                                   " based ping to node daemon port",
5267
                                   errors.ECODE_ENVIRON)
5268

    
5269
    if self.op.readd:
5270
      exceptions = [node]
5271
    else:
5272
      exceptions = []
5273

    
5274
    if self.op.master_capable:
5275
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5276
    else:
5277
      self.master_candidate = False
5278

    
5279
    if self.op.readd:
5280
      self.new_node = old_node
5281
    else:
5282
      node_group = cfg.LookupNodeGroup(self.op.group)
5283
      self.new_node = objects.Node(name=node,
5284
                                   primary_ip=primary_ip,
5285
                                   secondary_ip=secondary_ip,
5286
                                   master_candidate=self.master_candidate,
5287
                                   offline=False, drained=False,
5288
                                   group=node_group)
5289

    
5290
    if self.op.ndparams:
5291
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5292

    
5293
  def Exec(self, feedback_fn):
5294
    """Adds the new node to the cluster.
5295

5296
    """
5297
    new_node = self.new_node
5298
    node = new_node.name
5299

    
5300
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5301
      "Not owning BGL"
5302

    
5303
    # We adding a new node so we assume it's powered
5304
    new_node.powered = True
5305

    
5306
    # for re-adds, reset the offline/drained/master-candidate flags;
5307
    # we need to reset here, otherwise offline would prevent RPC calls
5308
    # later in the procedure; this also means that if the re-add
5309
    # fails, we are left with a non-offlined, broken node
5310
    if self.op.readd:
5311
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5312
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5313
      # if we demote the node, we do cleanup later in the procedure
5314
      new_node.master_candidate = self.master_candidate
5315
      if self.changed_primary_ip:
5316
        new_node.primary_ip = self.op.primary_ip
5317

    
5318
    # copy the master/vm_capable flags
5319
    for attr in self._NFLAGS:
5320
      setattr(new_node, attr, getattr(self.op, attr))
5321

    
5322
    # notify the user about any possible mc promotion
5323
    if new_node.master_candidate:
5324
      self.LogInfo("Node will be a master candidate")
5325

    
5326
    if self.op.ndparams:
5327
      new_node.ndparams = self.op.ndparams
5328
    else:
5329
      new_node.ndparams = {}
5330

    
5331
    # check connectivity
5332
    result = self.rpc.call_version([node])[node]
5333
    result.Raise("Can't get version information from node %s" % node)
5334
    if constants.PROTOCOL_VERSION == result.payload:
5335
      logging.info("Communication to node %s fine, sw version %s match",
5336
                   node, result.payload)
5337
    else:
5338
      raise errors.OpExecError("Version mismatch master version %s,"
5339
                               " node version %s" %
5340
                               (constants.PROTOCOL_VERSION, result.payload))
5341

    
5342
    # Add node to our /etc/hosts, and add key to known_hosts
5343
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5344
      master_node = self.cfg.GetMasterNode()
5345
      result = self.rpc.call_etc_hosts_modify(master_node,
5346
                                              constants.ETC_HOSTS_ADD,
5347
                                              self.hostname.name,
5348
                                              self.hostname.ip)
5349
      result.Raise("Can't update hosts file with new host data")
5350

    
5351
    if new_node.secondary_ip != new_node.primary_ip:
5352
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5353
                               False)
5354

    
5355
    node_verify_list = [self.cfg.GetMasterNode()]
5356
    node_verify_param = {
5357
      constants.NV_NODELIST: ([node], {}),
5358
      # TODO: do a node-net-test as well?
5359
    }
5360

    
5361
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5362
                                       self.cfg.GetClusterName())
5363
    for verifier in node_verify_list:
5364
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5365
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5366
      if nl_payload:
5367
        for failed in nl_payload:
5368
          feedback_fn("ssh/hostname verification failed"
5369
                      " (checking from %s): %s" %
5370
                      (verifier, nl_payload[failed]))
5371
        raise errors.OpExecError("ssh/hostname verification failed")
5372

    
5373
    if self.op.readd:
5374
      _RedistributeAncillaryFiles(self)
5375
      self.context.ReaddNode(new_node)
5376
      # make sure we redistribute the config
5377
      self.cfg.Update(new_node, feedback_fn)
5378
      # and make sure the new node will not have old files around
5379
      if not new_node.master_candidate:
5380
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5381
        msg = result.fail_msg
5382
        if msg:
5383
          self.LogWarning("Node failed to demote itself from master"
5384
                          " candidate status: %s" % msg)
5385
    else:
5386
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5387
                                  additional_vm=self.op.vm_capable)
5388
      self.context.AddNode(new_node, self.proc.GetECId())
5389

    
5390

    
5391
class LUNodeSetParams(LogicalUnit):
5392
  """Modifies the parameters of a node.
5393

5394
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5395
      to the node role (as _ROLE_*)
5396
  @cvar _R2F: a dictionary from node role to tuples of flags
5397
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5398

5399
  """
5400
  HPATH = "node-modify"
5401
  HTYPE = constants.HTYPE_NODE
5402
  REQ_BGL = False
5403
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5404
  _F2R = {
5405
    (True, False, False): _ROLE_CANDIDATE,
5406
    (False, True, False): _ROLE_DRAINED,
5407
    (False, False, True): _ROLE_OFFLINE,
5408
    (False, False, False): _ROLE_REGULAR,
5409
    }
5410
  _R2F = dict((v, k) for k, v in _F2R.items())
5411
  _FLAGS = ["master_candidate", "drained", "offline"]
5412

    
5413
  def CheckArguments(self):
5414
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5415
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5416
                self.op.master_capable, self.op.vm_capable,
5417
                self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5418
                self.op.disk_state]
5419
    if all_mods.count(None) == len(all_mods):
5420
      raise errors.OpPrereqError("Please pass at least one modification",
5421
                                 errors.ECODE_INVAL)
5422
    if all_mods.count(True) > 1:
5423
      raise errors.OpPrereqError("Can't set the node into more than one"
5424
                                 " state at the same time",
5425
                                 errors.ECODE_INVAL)
5426

    
5427
    # Boolean value that tells us whether we might be demoting from MC
5428
    self.might_demote = (self.op.master_candidate == False or
5429
                         self.op.offline == True or
5430
                         self.op.drained == True or
5431
                         self.op.master_capable == False)
5432

    
5433
    if self.op.secondary_ip:
5434
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5435
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5436
                                   " address" % self.op.secondary_ip,
5437
                                   errors.ECODE_INVAL)
5438

    
5439
    self.lock_all = self.op.auto_promote and self.might_demote
5440
    self.lock_instances = self.op.secondary_ip is not None
5441

    
5442
  def _InstanceFilter(self, instance):
5443
    """Filter for getting affected instances.
5444

5445
    """
5446
    return (instance.disk_template in constants.DTS_INT_MIRROR and
5447
            self.op.node_name in instance.all_nodes)
5448

    
5449
  def ExpandNames(self):
5450
    if self.lock_all:
5451
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5452
    else:
5453
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5454

    
5455
    # Since modifying a node can have severe effects on currently running
5456
    # operations the resource lock is at least acquired in shared mode
5457
    self.needed_locks[locking.LEVEL_NODE_RES] = \
5458
      self.needed_locks[locking.LEVEL_NODE]
5459

    
5460
    # Get node resource and instance locks in shared mode; they are not used
5461
    # for anything but read-only access
5462
    self.share_locks[locking.LEVEL_NODE_RES] = 1
5463
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5464

    
5465
    if self.lock_instances:
5466
      self.needed_locks[locking.LEVEL_INSTANCE] = \
5467
        frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5468

    
5469
  def BuildHooksEnv(self):
5470
    """Build hooks env.
5471

5472
    This runs on the master node.
5473

5474
    """
5475
    return {
5476
      "OP_TARGET": self.op.node_name,
5477
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5478
      "OFFLINE": str(self.op.offline),
5479
      "DRAINED": str(self.op.drained),
5480
      "MASTER_CAPABLE": str(self.op.master_capable),
5481
      "VM_CAPABLE": str(self.op.vm_capable),
5482
      }
5483

    
5484
  def BuildHooksNodes(self):
5485
    """Build hooks nodes.
5486

5487
    """
5488
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5489
    return (nl, nl)
5490

    
5491
  def CheckPrereq(self):
5492
    """Check prerequisites.
5493

5494
    This only checks the instance list against the existing names.
5495

5496
    """
5497
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5498

    
5499
    if self.lock_instances:
5500
      affected_instances = \
5501
        self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5502

    
5503
      # Verify instance locks
5504
      owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5505
      wanted_instances = frozenset(affected_instances.keys())
5506
      if wanted_instances - owned_instances:
5507
        raise errors.OpPrereqError("Instances affected by changing node %s's"
5508
                                   " secondary IP address have changed since"
5509
                                   " locks were acquired, wanted '%s', have"
5510
                                   " '%s'; retry the operation" %
5511
                                   (self.op.node_name,
5512
                                    utils.CommaJoin(wanted_instances),
5513
                                    utils.CommaJoin(owned_instances)),
5514
                                   errors.ECODE_STATE)
5515
    else:
5516
      affected_instances = None
5517

    
5518
    if (self.op.master_candidate is not None or
5519
        self.op.drained is not None or
5520
        self.op.offline is not None):
5521
      # we can't change the master's node flags
5522
      if self.op.node_name == self.cfg.GetMasterNode():
5523
        raise errors.OpPrereqError("The master role can be changed"
5524
                                   " only via master-failover",
5525
                                   errors.ECODE_INVAL)
5526

    
5527
    if self.op.master_candidate and not node.master_capable:
5528
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5529
                                 " it a master candidate" % node.name,
5530
                                 errors.ECODE_STATE)
5531

    
5532
    if self.op.vm_capable == False:
5533
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5534
      if ipri or isec:
5535
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5536
                                   " the vm_capable flag" % node.name,
5537
                                   errors.ECODE_STATE)
5538

    
5539
    if node.master_candidate and self.might_demote and not self.lock_all:
5540
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5541
      # check if after removing the current node, we're missing master
5542
      # candidates
5543
      (mc_remaining, mc_should, _) = \
5544
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5545
      if mc_remaining < mc_should:
5546
        raise errors.OpPrereqError("Not enough master candidates, please"
5547
                                   " pass auto promote option to allow"
5548
                                   " promotion", errors.ECODE_STATE)
5549

    
5550
    self.old_flags = old_flags = (node.master_candidate,
5551
                                  node.drained, node.offline)
5552
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5553
    self.old_role = old_role = self._F2R[old_flags]
5554

    
5555
    # Check for ineffective changes
5556
    for attr in self._FLAGS:
5557
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5558
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5559
        setattr(self.op, attr, None)
5560

    
5561
    # Past this point, any flag change to False means a transition
5562
    # away from the respective state, as only real changes are kept
5563

    
5564
    # TODO: We might query the real power state if it supports OOB
5565
    if _SupportsOob(self.cfg, node):
5566
      if self.op.offline is False and not (node.powered or
5567
                                           self.op.powered == True):
5568
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5569
                                    " offline status can be reset") %
5570
                                   self.op.node_name)
5571
    elif self.op.powered is not None:
5572
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5573
                                  " as it does not support out-of-band"
5574
                                  " handling") % self.op.node_name)
5575

    
5576
    # If we're being deofflined/drained, we'll MC ourself if needed
5577
    if (self.op.drained == False or self.op.offline == False or
5578
        (self.op.master_capable and not node.master_capable)):
5579
      if _DecideSelfPromotion(self):
5580
        self.op.master_candidate = True
5581
        self.LogInfo("Auto-promoting node to master candidate")
5582

    
5583
    # If we're no longer master capable, we'll demote ourselves from MC
5584
    if self.op.master_capable == False and node.master_candidate:
5585
      self.LogInfo("Demoting from master candidate")
5586
      self.op.master_candidate = False
5587

    
5588
    # Compute new role
5589
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5590
    if self.op.master_candidate:
5591
      new_role = self._ROLE_CANDIDATE
5592
    elif self.op.drained:
5593
      new_role = self._ROLE_DRAINED
5594
    elif self.op.offline:
5595
      new_role = self._ROLE_OFFLINE
5596
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5597
      # False is still in new flags, which means we're un-setting (the
5598
      # only) True flag
5599
      new_role = self._ROLE_REGULAR
5600
    else: # no new flags, nothing, keep old role
5601
      new_role = old_role
5602

    
5603
    self.new_role = new_role
5604

    
5605
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5606
      # Trying to transition out of offline status
5607
      # TODO: Use standard RPC runner, but make sure it works when the node is
5608
      # still marked offline
5609
      result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5610
      if result.fail_msg:
5611
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5612
                                   " to report its version: %s" %
5613
                                   (node.name, result.fail_msg),
5614
                                   errors.ECODE_STATE)
5615
      else:
5616
        self.LogWarning("Transitioning node from offline to online state"
5617
                        " without using re-add. Please make sure the node"
5618
                        " is healthy!")
5619

    
5620
    if self.op.secondary_ip:
5621
      # Ok even without locking, because this can't be changed by any LU
5622
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5623
      master_singlehomed = master.secondary_ip == master.primary_ip
5624
      if master_singlehomed and self.op.secondary_ip:
5625
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5626
                                   " homed cluster", errors.ECODE_INVAL)
5627

    
5628
      assert not (frozenset(affected_instances) -
5629
                  self.owned_locks(locking.LEVEL_INSTANCE))
5630

    
5631
      if node.offline:
5632
        if affected_instances:
5633
          raise errors.OpPrereqError("Cannot change secondary IP address:"
5634
                                     " offline node has instances (%s)"
5635
                                     " configured to use it" %
5636
                                     utils.CommaJoin(affected_instances.keys()))
5637
      else:
5638
        # On online nodes, check that no instances are running, and that
5639
        # the node has the new ip and we can reach it.
5640
        for instance in affected_instances.values():
5641
          _CheckInstanceState(self, instance, INSTANCE_DOWN,
5642
                              msg="cannot change secondary ip")
5643

    
5644
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5645
        if master.name != node.name:
5646
          # check reachability from master secondary ip to new secondary ip
5647
          if not netutils.TcpPing(self.op.secondary_ip,
5648
                                  constants.DEFAULT_NODED_PORT,
5649
                                  source=master.secondary_ip):
5650
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5651
                                       " based ping to node daemon port",
5652
                                       errors.ECODE_ENVIRON)
5653

    
5654
    if self.op.ndparams:
5655
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5656
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5657
      self.new_ndparams = new_ndparams
5658

    
5659
    if self.op.hv_state:
5660
      self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5661
                                                 self.node.hv_state_static)
5662

    
5663
    if self.op.disk_state:
5664
      self.new_disk_state = \
5665
        _MergeAndVerifyDiskState(self.op.disk_state,
5666
                                 self.node.disk_state_static)
5667

    
5668
  def Exec(self, feedback_fn):
5669
    """Modifies a node.
5670

5671
    """
5672
    node = self.node
5673
    old_role = self.old_role
5674
    new_role = self.new_role
5675

    
5676
    result = []
5677

    
5678
    if self.op.ndparams:
5679
      node.ndparams = self.new_ndparams
5680

    
5681
    if self.op.powered is not None:
5682
      node.powered = self.op.powered
5683

    
5684
    if self.op.hv_state:
5685
      node.hv_state_static = self.new_hv_state
5686

    
5687
    if self.op.disk_state:
5688
      node.disk_state_static = self.new_disk_state
5689

    
5690
    for attr in ["master_capable", "vm_capable"]:
5691
      val = getattr(self.op, attr)
5692
      if val is not None:
5693
        setattr(node, attr, val)
5694
        result.append((attr, str(val)))
5695

    
5696
    if new_role != old_role:
5697
      # Tell the node to demote itself, if no longer MC and not offline
5698
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5699
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5700
        if msg:
5701
          self.LogWarning("Node failed to demote itself: %s", msg)
5702

    
5703
      new_flags = self._R2F[new_role]
5704
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5705
        if of != nf:
5706
          result.append((desc, str(nf)))
5707
      (node.master_candidate, node.drained, node.offline) = new_flags
5708

    
5709
      # we locked all nodes, we adjust the CP before updating this node
5710
      if self.lock_all:
5711
        _AdjustCandidatePool(self, [node.name])
5712

    
5713
    if self.op.secondary_ip:
5714
      node.secondary_ip = self.op.secondary_ip
5715
      result.append(("secondary_ip", self.op.secondary_ip))
5716

    
5717
    # this will trigger configuration file update, if needed
5718
    self.cfg.Update(node, feedback_fn)
5719

    
5720
    # this will trigger job queue propagation or cleanup if the mc
5721
    # flag changed
5722
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5723
      self.context.ReaddNode(node)
5724

    
5725
    return result
5726

    
5727

    
5728
class LUNodePowercycle(NoHooksLU):
5729
  """Powercycles a node.
5730

5731
  """
5732
  REQ_BGL = False
5733

    
5734
  def CheckArguments(self):
5735
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5736
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5737
      raise errors.OpPrereqError("The node is the master and the force"
5738
                                 " parameter was not set",
5739
                                 errors.ECODE_INVAL)
5740

    
5741
  def ExpandNames(self):
5742
    """Locking for PowercycleNode.
5743

5744
    This is a last-resort option and shouldn't block on other
5745
    jobs. Therefore, we grab no locks.
5746

5747
    """
5748
    self.needed_locks = {}
5749

    
5750
  def Exec(self, feedback_fn):
5751
    """Reboots a node.
5752

5753
    """
5754
    result = self.rpc.call_node_powercycle(self.op.node_name,
5755
                                           self.cfg.GetHypervisorType())
5756
    result.Raise("Failed to schedule the reboot")
5757
    return result.payload
5758

    
5759

    
5760
class LUClusterQuery(NoHooksLU):
5761
  """Query cluster configuration.
5762

5763
  """
5764
  REQ_BGL = False
5765

    
5766
  def ExpandNames(self):
5767
    self.needed_locks = {}
5768

    
5769
  def Exec(self, feedback_fn):
5770
    """Return cluster config.
5771

5772
    """
5773
    cluster = self.cfg.GetClusterInfo()
5774
    os_hvp = {}
5775

    
5776
    # Filter just for enabled hypervisors
5777
    for os_name, hv_dict in cluster.os_hvp.items():
5778
      os_hvp[os_name] = {}
5779
      for hv_name, hv_params in hv_dict.items():
5780
        if hv_name in cluster.enabled_hypervisors:
5781
          os_hvp[os_name][hv_name] = hv_params
5782

    
5783
    # Convert ip_family to ip_version
5784
    primary_ip_version = constants.IP4_VERSION
5785
    if cluster.primary_ip_family == netutils.IP6Address.family:
5786
      primary_ip_version = constants.IP6_VERSION
5787

    
5788
    result = {
5789
      "software_version": constants.RELEASE_VERSION,
5790
      "protocol_version": constants.PROTOCOL_VERSION,
5791
      "config_version": constants.CONFIG_VERSION,
5792
      "os_api_version": max(constants.OS_API_VERSIONS),
5793
      "export_version": constants.EXPORT_VERSION,
5794
      "architecture": (platform.architecture()[0], platform.machine()),
5795
      "name": cluster.cluster_name,
5796
      "master": cluster.master_node,
5797
      "default_hypervisor": cluster.primary_hypervisor,
5798
      "enabled_hypervisors": cluster.enabled_hypervisors,
5799
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5800
                        for hypervisor_name in cluster.enabled_hypervisors]),
5801
      "os_hvp": os_hvp,
5802
      "beparams": cluster.beparams,
5803
      "osparams": cluster.osparams,
5804
      "nicparams": cluster.nicparams,
5805
      "ndparams": cluster.ndparams,
5806
      "candidate_pool_size": cluster.candidate_pool_size,
5807
      "master_netdev": cluster.master_netdev,
5808
      "master_netmask": cluster.master_netmask,
5809
      "use_external_mip_script": cluster.use_external_mip_script,
5810
      "volume_group_name": cluster.volume_group_name,
5811
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5812
      "file_storage_dir": cluster.file_storage_dir,
5813
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5814
      "maintain_node_health": cluster.maintain_node_health,
5815
      "ctime": cluster.ctime,
5816
      "mtime": cluster.mtime,
5817
      "uuid": cluster.uuid,
5818
      "tags": list(cluster.GetTags()),
5819
      "uid_pool": cluster.uid_pool,
5820
      "default_iallocator": cluster.default_iallocator,
5821
      "reserved_lvs": cluster.reserved_lvs,
5822
      "primary_ip_version": primary_ip_version,
5823
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5824
      "hidden_os": cluster.hidden_os,
5825
      "blacklisted_os": cluster.blacklisted_os,
5826
      }
5827

    
5828
    return result
5829

    
5830

    
5831
class LUClusterConfigQuery(NoHooksLU):
5832
  """Return configuration values.
5833

5834
  """
5835
  REQ_BGL = False
5836
  _FIELDS_DYNAMIC = utils.FieldSet()
5837
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5838
                                  "watcher_pause", "volume_group_name")
5839

    
5840
  def CheckArguments(self):
5841
    _CheckOutputFields(static=self._FIELDS_STATIC,
5842
                       dynamic=self._FIELDS_DYNAMIC,
5843
                       selected=self.op.output_fields)
5844

    
5845
  def ExpandNames(self):
5846
    self.needed_locks = {}
5847

    
5848
  def Exec(self, feedback_fn):
5849
    """Dump a representation of the cluster config to the standard output.
5850

5851
    """
5852
    values = []
5853
    for field in self.op.output_fields:
5854
      if field == "cluster_name":
5855
        entry = self.cfg.GetClusterName()
5856
      elif field == "master_node":
5857
        entry = self.cfg.GetMasterNode()
5858
      elif field == "drain_flag":
5859
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5860
      elif field == "watcher_pause":
5861
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5862
      elif field == "volume_group_name":
5863
        entry = self.cfg.GetVGName()
5864
      else:
5865
        raise errors.ParameterError(field)
5866
      values.append(entry)
5867
    return values
5868

    
5869

    
5870
class LUInstanceActivateDisks(NoHooksLU):
5871
  """Bring up an instance's disks.
5872

5873
  """
5874
  REQ_BGL = False
5875

    
5876
  def ExpandNames(self):
5877
    self._ExpandAndLockInstance()
5878
    self.needed_locks[locking.LEVEL_NODE] = []
5879
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5880

    
5881
  def DeclareLocks(self, level):
5882
    if level == locking.LEVEL_NODE:
5883
      self._LockInstancesNodes()
5884

    
5885
  def CheckPrereq(self):
5886
    """Check prerequisites.
5887

5888
    This checks that the instance is in the cluster.
5889

5890
    """
5891
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5892
    assert self.instance is not None, \
5893
      "Cannot retrieve locked instance %s" % self.op.instance_name
5894
    _CheckNodeOnline(self, self.instance.primary_node)
5895

    
5896
  def Exec(self, feedback_fn):
5897
    """Activate the disks.
5898

5899
    """
5900
    disks_ok, disks_info = \
5901
              _AssembleInstanceDisks(self, self.instance,
5902
                                     ignore_size=self.op.ignore_size)
5903
    if not disks_ok:
5904
      raise errors.OpExecError("Cannot activate block devices")
5905

    
5906
    return disks_info
5907

    
5908

    
5909
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5910
                           ignore_size=False):
5911
  """Prepare the block devices for an instance.
5912

5913
  This sets up the block devices on all nodes.
5914

5915
  @type lu: L{LogicalUnit}
5916
  @param lu: the logical unit on whose behalf we execute
5917
  @type instance: L{objects.Instance}
5918
  @param instance: the instance for whose disks we assemble
5919
  @type disks: list of L{objects.Disk} or None
5920
  @param disks: which disks to assemble (or all, if None)
5921
  @type ignore_secondaries: boolean
5922
  @param ignore_secondaries: if true, errors on secondary nodes
5923
      won't result in an error return from the function
5924
  @type ignore_size: boolean
5925
  @param ignore_size: if true, the current known size of the disk
5926
      will not be used during the disk activation, useful for cases
5927
      when the size is wrong
5928
  @return: False if the operation failed, otherwise a list of
5929
      (host, instance_visible_name, node_visible_name)
5930
      with the mapping from node devices to instance devices
5931

5932
  """
5933
  device_info = []
5934
  disks_ok = True
5935
  iname = instance.name
5936
  disks = _ExpandCheckDisks(instance, disks)
5937

    
5938
  # With the two passes mechanism we try to reduce the window of
5939
  # opportunity for the race condition of switching DRBD to primary
5940
  # before handshaking occured, but we do not eliminate it
5941

    
5942
  # The proper fix would be to wait (with some limits) until the
5943
  # connection has been made and drbd transitions from WFConnection
5944
  # into any other network-connected state (Connected, SyncTarget,
5945
  # SyncSource, etc.)
5946

    
5947
  # 1st pass, assemble on all nodes in secondary mode
5948
  for idx, inst_disk in enumerate(disks):
5949
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5950
      if ignore_size:
5951
        node_disk = node_disk.Copy()
5952
        node_disk.UnsetSize()
5953
      lu.cfg.SetDiskID(node_disk, node)
5954
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5955
      msg = result.fail_msg
5956
      if msg:
5957
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5958
                           " (is_primary=False, pass=1): %s",
5959
                           inst_disk.iv_name, node, msg)
5960
        if not ignore_secondaries:
5961
          disks_ok = False
5962

    
5963
  # FIXME: race condition on drbd migration to primary
5964

    
5965
  # 2nd pass, do only the primary node
5966
  for idx, inst_disk in enumerate(disks):
5967
    dev_path = None
5968

    
5969
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5970
      if node != instance.primary_node:
5971
        continue
5972
      if ignore_size:
5973
        node_disk = node_disk.Copy()
5974
        node_disk.UnsetSize()
5975
      lu.cfg.SetDiskID(node_disk, node)
5976
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5977
      msg = result.fail_msg
5978
      if msg:
5979
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5980
                           " (is_primary=True, pass=2): %s",
5981
                           inst_disk.iv_name, node, msg)
5982
        disks_ok = False
5983
      else:
5984
        dev_path = result.payload
5985

    
5986
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5987

    
5988
  # leave the disks configured for the primary node
5989
  # this is a workaround that would be fixed better by
5990
  # improving the logical/physical id handling
5991
  for disk in disks:
5992
    lu.cfg.SetDiskID(disk, instance.primary_node)
5993

    
5994
  return disks_ok, device_info
5995

    
5996

    
5997
def _StartInstanceDisks(lu, instance, force):
5998
  """Start the disks of an instance.
5999

6000
  """
6001
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6002
                                           ignore_secondaries=force)
6003
  if not disks_ok:
6004
    _ShutdownInstanceDisks(lu, instance)
6005
    if force is not None and not force:
6006
      lu.proc.LogWarning("", hint="If the message above refers to a"
6007
                         " secondary node,"
6008
                         " you can retry the operation using '--force'.")
6009
    raise errors.OpExecError("Disk consistency error")
6010

    
6011

    
6012
class LUInstanceDeactivateDisks(NoHooksLU):
6013
  """Shutdown an instance's disks.
6014

6015
  """
6016
  REQ_BGL = False
6017

    
6018
  def ExpandNames(self):
6019
    self._ExpandAndLockInstance()
6020
    self.needed_locks[locking.LEVEL_NODE] = []
6021
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6022

    
6023
  def DeclareLocks(self, level):
6024
    if level == locking.LEVEL_NODE:
6025
      self._LockInstancesNodes()
6026

    
6027
  def CheckPrereq(self):
6028
    """Check prerequisites.
6029

6030
    This checks that the instance is in the cluster.
6031

6032
    """
6033
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6034
    assert self.instance is not None, \
6035
      "Cannot retrieve locked instance %s" % self.op.instance_name
6036

    
6037
  def Exec(self, feedback_fn):
6038
    """Deactivate the disks
6039

6040
    """
6041
    instance = self.instance
6042
    if self.op.force:
6043
      _ShutdownInstanceDisks(self, instance)
6044
    else:
6045
      _SafeShutdownInstanceDisks(self, instance)
6046

    
6047

    
6048
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6049
  """Shutdown block devices of an instance.
6050

6051
  This function checks if an instance is running, before calling
6052
  _ShutdownInstanceDisks.
6053

6054
  """
6055
  _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6056
  _ShutdownInstanceDisks(lu, instance, disks=disks)
6057

    
6058

    
6059
def _ExpandCheckDisks(instance, disks):
6060
  """Return the instance disks selected by the disks list
6061

6062
  @type disks: list of L{objects.Disk} or None
6063
  @param disks: selected disks
6064
  @rtype: list of L{objects.Disk}
6065
  @return: selected instance disks to act on
6066

6067
  """
6068
  if disks is None:
6069
    return instance.disks
6070
  else:
6071
    if not set(disks).issubset(instance.disks):
6072
      raise errors.ProgrammerError("Can only act on disks belonging to the"
6073
                                   " target instance")
6074
    return disks
6075

    
6076

    
6077
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6078
  """Shutdown block devices of an instance.
6079

6080
  This does the shutdown on all nodes of the instance.
6081

6082
  If the ignore_primary is false, errors on the primary node are
6083
  ignored.
6084

6085
  """
6086
  all_result = True
6087
  disks = _ExpandCheckDisks(instance, disks)
6088

    
6089
  for disk in disks:
6090
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6091
      lu.cfg.SetDiskID(top_disk, node)
6092
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6093
      msg = result.fail_msg
6094
      if msg:
6095
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6096
                      disk.iv_name, node, msg)
6097
        if ((node == instance.primary_node and not ignore_primary) or
6098
            (node != instance.primary_node and not result.offline)):
6099
          all_result = False
6100
  return all_result
6101

    
6102

    
6103
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6104
  """Checks if a node has enough free memory.
6105

6106
  This function check if a given node has the needed amount of free
6107
  memory. In case the node has less memory or we cannot get the
6108
  information from the node, this function raise an OpPrereqError
6109
  exception.
6110

6111
  @type lu: C{LogicalUnit}
6112
  @param lu: a logical unit from which we get configuration data
6113
  @type node: C{str}
6114
  @param node: the node to check
6115
  @type reason: C{str}
6116
  @param reason: string to use in the error message
6117
  @type requested: C{int}
6118
  @param requested: the amount of memory in MiB to check for
6119
  @type hypervisor_name: C{str}
6120
  @param hypervisor_name: the hypervisor to ask for memory stats
6121
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6122
      we cannot check the node
6123

6124
  """
6125
  nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6126
  nodeinfo[node].Raise("Can't get data from node %s" % node,
6127
                       prereq=True, ecode=errors.ECODE_ENVIRON)
6128
  (_, _, (hv_info, )) = nodeinfo[node].payload
6129

    
6130
  free_mem = hv_info.get("memory_free", None)
6131
  if not isinstance(free_mem, int):
6132
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6133
                               " was '%s'" % (node, free_mem),
6134
                               errors.ECODE_ENVIRON)
6135
  if requested > free_mem:
6136
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6137
                               " needed %s MiB, available %s MiB" %
6138
                               (node, reason, requested, free_mem),
6139
                               errors.ECODE_NORES)
6140

    
6141

    
6142
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6143
  """Checks if nodes have enough free disk space in the all VGs.
6144

6145
  This function check if all given nodes have the needed amount of
6146
  free disk. In case any node has less disk or we cannot get the
6147
  information from the node, this function raise an OpPrereqError
6148
  exception.
6149

6150
  @type lu: C{LogicalUnit}
6151
  @param lu: a logical unit from which we get configuration data
6152
  @type nodenames: C{list}
6153
  @param nodenames: the list of node names to check
6154
  @type req_sizes: C{dict}
6155
  @param req_sizes: the hash of vg and corresponding amount of disk in
6156
      MiB to check for
6157
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6158
      or we cannot check the node
6159

6160
  """
6161
  for vg, req_size in req_sizes.items():
6162
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6163

    
6164

    
6165
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6166
  """Checks if nodes have enough free disk space in the specified VG.
6167

6168
  This function check if all given nodes have the needed amount of
6169
  free disk. In case any node has less disk or we cannot get the
6170
  information from the node, this function raise an OpPrereqError
6171
  exception.
6172

6173
  @type lu: C{LogicalUnit}
6174
  @param lu: a logical unit from which we get configuration data
6175
  @type nodenames: C{list}
6176
  @param nodenames: the list of node names to check
6177
  @type vg: C{str}
6178
  @param vg: the volume group to check
6179
  @type requested: C{int}
6180
  @param requested: the amount of disk in MiB to check for
6181
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6182
      or we cannot check the node
6183

6184
  """
6185
  nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6186
  for node in nodenames:
6187
    info = nodeinfo[node]
6188
    info.Raise("Cannot get current information from node %s" % node,
6189
               prereq=True, ecode=errors.ECODE_ENVIRON)
6190
    (_, (vg_info, ), _) = info.payload
6191
    vg_free = vg_info.get("vg_free", None)
6192
    if not isinstance(vg_free, int):
6193
      raise errors.OpPrereqError("Can't compute free disk space on node"
6194
                                 " %s for vg %s, result was '%s'" %
6195
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
6196
    if requested > vg_free:
6197
      raise errors.OpPrereqError("Not enough disk space on target node %s"
6198
                                 " vg %s: required %d MiB, available %d MiB" %
6199
                                 (node, vg, requested, vg_free),
6200
                                 errors.ECODE_NORES)
6201

    
6202

    
6203
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6204
  """Checks if nodes have enough physical CPUs
6205

6206
  This function checks if all given nodes have the needed number of
6207
  physical CPUs. In case any node has less CPUs or we cannot get the
6208
  information from the node, this function raises an OpPrereqError
6209
  exception.
6210

6211
  @type lu: C{LogicalUnit}
6212
  @param lu: a logical unit from which we get configuration data
6213
  @type nodenames: C{list}
6214
  @param nodenames: the list of node names to check
6215
  @type requested: C{int}
6216
  @param requested: the minimum acceptable number of physical CPUs
6217
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6218
      or we cannot check the node
6219

6220
  """
6221
  nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6222
  for node in nodenames:
6223
    info = nodeinfo[node]
6224
    info.Raise("Cannot get current information from node %s" % node,
6225
               prereq=True, ecode=errors.ECODE_ENVIRON)
6226
    (_, _, (hv_info, )) = info.payload
6227
    num_cpus = hv_info.get("cpu_total", None)
6228
    if not isinstance(num_cpus, int):
6229
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6230
                                 " on node %s, result was '%s'" %
6231
                                 (node, num_cpus), errors.ECODE_ENVIRON)
6232
    if requested > num_cpus:
6233
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6234
                                 "required" % (node, num_cpus, requested),
6235
                                 errors.ECODE_NORES)
6236

    
6237

    
6238
class LUInstanceStartup(LogicalUnit):
6239
  """Starts an instance.
6240

6241
  """
6242
  HPATH = "instance-start"
6243
  HTYPE = constants.HTYPE_INSTANCE
6244
  REQ_BGL = False
6245

    
6246
  def CheckArguments(self):
6247
    # extra beparams
6248
    if self.op.beparams:
6249
      # fill the beparams dict
6250
      objects.UpgradeBeParams(self.op.beparams)
6251
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6252

    
6253
  def ExpandNames(self):
6254
    self._ExpandAndLockInstance()
6255

    
6256
  def BuildHooksEnv(self):
6257
    """Build hooks env.
6258

6259
    This runs on master, primary and secondary nodes of the instance.
6260

6261
    """
6262
    env = {
6263
      "FORCE": self.op.force,
6264
      }
6265

    
6266
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6267

    
6268
    return env
6269

    
6270
  def BuildHooksNodes(self):
6271
    """Build hooks nodes.
6272

6273
    """
6274
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6275
    return (nl, nl)
6276

    
6277
  def CheckPrereq(self):
6278
    """Check prerequisites.
6279

6280
    This checks that the instance is in the cluster.
6281

6282
    """
6283
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6284
    assert self.instance is not None, \
6285
      "Cannot retrieve locked instance %s" % self.op.instance_name
6286

    
6287
    # extra hvparams
6288
    if self.op.hvparams:
6289
      # check hypervisor parameter syntax (locally)
6290
      cluster = self.cfg.GetClusterInfo()
6291
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6292
      filled_hvp = cluster.FillHV(instance)
6293
      filled_hvp.update(self.op.hvparams)
6294
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6295
      hv_type.CheckParameterSyntax(filled_hvp)
6296
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6297

    
6298
    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6299

    
6300
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6301

    
6302
    if self.primary_offline and self.op.ignore_offline_nodes:
6303
      self.proc.LogWarning("Ignoring offline primary node")
6304

    
6305
      if self.op.hvparams or self.op.beparams:
6306
        self.proc.LogWarning("Overridden parameters are ignored")
6307
    else:
6308
      _CheckNodeOnline(self, instance.primary_node)
6309

    
6310
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6311

    
6312
      # check bridges existence
6313
      _CheckInstanceBridgesExist(self, instance)
6314

    
6315
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6316
                                                instance.name,
6317
                                                instance.hypervisor)
6318
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6319
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6320
      if not remote_info.payload: # not running already
6321
        _CheckNodeFreeMemory(self, instance.primary_node,
6322
                             "starting instance %s" % instance.name,
6323
                             bep[constants.BE_MAXMEM], instance.hypervisor)
6324

    
6325
  def Exec(self, feedback_fn):
6326
    """Start the instance.
6327

6328
    """
6329
    instance = self.instance
6330
    force = self.op.force
6331

    
6332
    if not self.op.no_remember:
6333
      self.cfg.MarkInstanceUp(instance.name)
6334

    
6335
    if self.primary_offline:
6336
      assert self.op.ignore_offline_nodes
6337
      self.proc.LogInfo("Primary node offline, marked instance as started")
6338
    else:
6339
      node_current = instance.primary_node
6340

    
6341
      _StartInstanceDisks(self, instance, force)
6342

    
6343
      result = \
6344
        self.rpc.call_instance_start(node_current,
6345
                                     (instance, self.op.hvparams,
6346
                                      self.op.beparams),
6347
                                     self.op.startup_paused)
6348
      msg = result.fail_msg
6349
      if msg:
6350
        _ShutdownInstanceDisks(self, instance)
6351
        raise errors.OpExecError("Could not start instance: %s" % msg)
6352

    
6353

    
6354
class LUInstanceReboot(LogicalUnit):
6355
  """Reboot an instance.
6356

6357
  """
6358
  HPATH = "instance-reboot"
6359
  HTYPE = constants.HTYPE_INSTANCE
6360
  REQ_BGL = False
6361

    
6362
  def ExpandNames(self):
6363
    self._ExpandAndLockInstance()
6364

    
6365
  def BuildHooksEnv(self):
6366
    """Build hooks env.
6367

6368
    This runs on master, primary and secondary nodes of the instance.
6369

6370
    """
6371
    env = {
6372
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6373
      "REBOOT_TYPE": self.op.reboot_type,
6374
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6375
      }
6376

    
6377
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6378

    
6379
    return env
6380

    
6381
  def BuildHooksNodes(self):
6382
    """Build hooks nodes.
6383

6384
    """
6385
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6386
    return (nl, nl)
6387

    
6388
  def CheckPrereq(self):
6389
    """Check prerequisites.
6390

6391
    This checks that the instance is in the cluster.
6392

6393
    """
6394
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6395
    assert self.instance is not None, \
6396
      "Cannot retrieve locked instance %s" % self.op.instance_name
6397
    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6398
    _CheckNodeOnline(self, instance.primary_node)
6399

    
6400
    # check bridges existence
6401
    _CheckInstanceBridgesExist(self, instance)
6402

    
6403
  def Exec(self, feedback_fn):
6404
    """Reboot the instance.
6405

6406
    """
6407
    instance = self.instance
6408
    ignore_secondaries = self.op.ignore_secondaries
6409
    reboot_type = self.op.reboot_type
6410

    
6411
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6412
                                              instance.name,
6413
                                              instance.hypervisor)
6414
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6415
    instance_running = bool(remote_info.payload)
6416

    
6417
    node_current = instance.primary_node
6418

    
6419
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6420
                                            constants.INSTANCE_REBOOT_HARD]:
6421
      for disk in instance.disks:
6422
        self.cfg.SetDiskID(disk, node_current)
6423
      result = self.rpc.call_instance_reboot(node_current, instance,
6424
                                             reboot_type,
6425
                                             self.op.shutdown_timeout)
6426
      result.Raise("Could not reboot instance")
6427
    else:
6428
      if instance_running:
6429
        result = self.rpc.call_instance_shutdown(node_current, instance,
6430
                                                 self.op.shutdown_timeout)
6431
        result.Raise("Could not shutdown instance for full reboot")
6432
        _ShutdownInstanceDisks(self, instance)
6433
      else:
6434
        self.LogInfo("Instance %s was already stopped, starting now",
6435
                     instance.name)
6436
      _StartInstanceDisks(self, instance, ignore_secondaries)
6437
      result = self.rpc.call_instance_start(node_current,
6438
                                            (instance, None, None), False)
6439
      msg = result.fail_msg
6440
      if msg:
6441
        _ShutdownInstanceDisks(self, instance)
6442
        raise errors.OpExecError("Could not start instance for"
6443
                                 " full reboot: %s" % msg)
6444

    
6445
    self.cfg.MarkInstanceUp(instance.name)
6446

    
6447

    
6448
class LUInstanceShutdown(LogicalUnit):
6449
  """Shutdown an instance.
6450

6451
  """
6452
  HPATH = "instance-stop"
6453
  HTYPE = constants.HTYPE_INSTANCE
6454
  REQ_BGL = False
6455

    
6456
  def ExpandNames(self):
6457
    self._ExpandAndLockInstance()
6458

    
6459
  def BuildHooksEnv(self):
6460
    """Build hooks env.
6461

6462
    This runs on master, primary and secondary nodes of the instance.
6463

6464
    """
6465
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6466
    env["TIMEOUT"] = self.op.timeout
6467
    return env
6468

    
6469
  def BuildHooksNodes(self):
6470
    """Build hooks nodes.
6471

6472
    """
6473
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6474
    return (nl, nl)
6475

    
6476
  def CheckPrereq(self):
6477
    """Check prerequisites.
6478

6479
    This checks that the instance is in the cluster.
6480

6481
    """
6482
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6483
    assert self.instance is not None, \
6484
      "Cannot retrieve locked instance %s" % self.op.instance_name
6485

    
6486
    _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6487

    
6488
    self.primary_offline = \
6489
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6490

    
6491
    if self.primary_offline and self.op.ignore_offline_nodes:
6492
      self.proc.LogWarning("Ignoring offline primary node")
6493
    else:
6494
      _CheckNodeOnline(self, self.instance.primary_node)
6495

    
6496
  def Exec(self, feedback_fn):
6497
    """Shutdown the instance.
6498

6499
    """
6500
    instance = self.instance
6501
    node_current = instance.primary_node
6502
    timeout = self.op.timeout
6503

    
6504
    if not self.op.no_remember:
6505
      self.cfg.MarkInstanceDown(instance.name)
6506

    
6507
    if self.primary_offline:
6508
      assert self.op.ignore_offline_nodes
6509
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6510
    else:
6511
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6512
      msg = result.fail_msg
6513
      if msg:
6514
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6515

    
6516
      _ShutdownInstanceDisks(self, instance)
6517

    
6518

    
6519
class LUInstanceReinstall(LogicalUnit):
6520
  """Reinstall an instance.
6521

6522
  """
6523
  HPATH = "instance-reinstall"
6524
  HTYPE = constants.HTYPE_INSTANCE
6525
  REQ_BGL = False
6526

    
6527
  def ExpandNames(self):
6528
    self._ExpandAndLockInstance()
6529

    
6530
  def BuildHooksEnv(self):
6531
    """Build hooks env.
6532

6533
    This runs on master, primary and secondary nodes of the instance.
6534

6535
    """
6536
    return _BuildInstanceHookEnvByObject(self, self.instance)
6537

    
6538
  def BuildHooksNodes(self):
6539
    """Build hooks nodes.
6540

6541
    """
6542
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6543
    return (nl, nl)
6544

    
6545
  def CheckPrereq(self):
6546
    """Check prerequisites.
6547

6548
    This checks that the instance is in the cluster and is not running.
6549

6550
    """
6551
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6552
    assert instance is not None, \
6553
      "Cannot retrieve locked instance %s" % self.op.instance_name
6554
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6555
                     " offline, cannot reinstall")
6556
    for node in instance.secondary_nodes:
6557
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6558
                       " cannot reinstall")
6559

    
6560
    if instance.disk_template == constants.DT_DISKLESS:
6561
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6562
                                 self.op.instance_name,
6563
                                 errors.ECODE_INVAL)
6564
    _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6565

    
6566
    if self.op.os_type is not None:
6567
      # OS verification
6568
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6569
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6570
      instance_os = self.op.os_type
6571
    else:
6572
      instance_os = instance.os
6573

    
6574
    nodelist = list(instance.all_nodes)
6575

    
6576
    if self.op.osparams:
6577
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6578
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6579
      self.os_inst = i_osdict # the new dict (without defaults)
6580
    else:
6581
      self.os_inst = None
6582

    
6583
    self.instance = instance
6584

    
6585
  def Exec(self, feedback_fn):
6586
    """Reinstall the instance.
6587

6588
    """
6589
    inst = self.instance
6590

    
6591
    if self.op.os_type is not None:
6592
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6593
      inst.os = self.op.os_type
6594
      # Write to configuration
6595
      self.cfg.Update(inst, feedback_fn)
6596

    
6597
    _StartInstanceDisks(self, inst, None)
6598
    try:
6599
      feedback_fn("Running the instance OS create scripts...")
6600
      # FIXME: pass debug option from opcode to backend
6601
      result = self.rpc.call_instance_os_add(inst.primary_node,
6602
                                             (inst, self.os_inst), True,
6603
                                             self.op.debug_level)
6604
      result.Raise("Could not install OS for instance %s on node %s" %
6605
                   (inst.name, inst.primary_node))
6606
    finally:
6607
      _ShutdownInstanceDisks(self, inst)
6608

    
6609

    
6610
class LUInstanceRecreateDisks(LogicalUnit):
6611
  """Recreate an instance's missing disks.
6612

6613
  """
6614
  HPATH = "instance-recreate-disks"
6615
  HTYPE = constants.HTYPE_INSTANCE
6616
  REQ_BGL = False
6617

    
6618
  def CheckArguments(self):
6619
    # normalise the disk list
6620
    self.op.disks = sorted(frozenset(self.op.disks))
6621

    
6622
  def ExpandNames(self):
6623
    self._ExpandAndLockInstance()
6624
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6625
    if self.op.nodes:
6626
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6627
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6628
    else:
6629
      self.needed_locks[locking.LEVEL_NODE] = []
6630

    
6631
  def DeclareLocks(self, level):
6632
    if level == locking.LEVEL_NODE:
6633
      # if we replace the nodes, we only need to lock the old primary,
6634
      # otherwise we need to lock all nodes for disk re-creation
6635
      primary_only = bool(self.op.nodes)
6636
      self._LockInstancesNodes(primary_only=primary_only)
6637
    elif level == locking.LEVEL_NODE_RES:
6638
      # Copy node locks
6639
      self.needed_locks[locking.LEVEL_NODE_RES] = \
6640
        self.needed_locks[locking.LEVEL_NODE][:]
6641

    
6642
  def BuildHooksEnv(self):
6643
    """Build hooks env.
6644

6645
    This runs on master, primary and secondary nodes of the instance.
6646

6647
    """
6648
    return _BuildInstanceHookEnvByObject(self, self.instance)
6649

    
6650
  def BuildHooksNodes(self):
6651
    """Build hooks nodes.
6652

6653
    """
6654
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6655
    return (nl, nl)
6656

    
6657
  def CheckPrereq(self):
6658
    """Check prerequisites.
6659

6660
    This checks that the instance is in the cluster and is not running.
6661

6662
    """
6663
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6664
    assert instance is not None, \
6665
      "Cannot retrieve locked instance %s" % self.op.instance_name
6666
    if self.op.nodes:
6667
      if len(self.op.nodes) != len(instance.all_nodes):
6668
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6669
                                   " %d replacement nodes were specified" %
6670
                                   (instance.name, len(instance.all_nodes),
6671
                                    len(self.op.nodes)),
6672
                                   errors.ECODE_INVAL)
6673
      assert instance.disk_template != constants.DT_DRBD8 or \
6674
          len(self.op.nodes) == 2
6675
      assert instance.disk_template != constants.DT_PLAIN or \
6676
          len(self.op.nodes) == 1
6677
      primary_node = self.op.nodes[0]
6678
    else:
6679
      primary_node = instance.primary_node
6680
    _CheckNodeOnline(self, primary_node)
6681

    
6682
    if instance.disk_template == constants.DT_DISKLESS:
6683
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6684
                                 self.op.instance_name, errors.ECODE_INVAL)
6685
    # if we replace nodes *and* the old primary is offline, we don't
6686
    # check
6687
    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6688
    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6689
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6690
    if not (self.op.nodes and old_pnode.offline):
6691
      _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6692
                          msg="cannot recreate disks")
6693

    
6694
    if not self.op.disks:
6695
      self.op.disks = range(len(instance.disks))
6696
    else:
6697
      for idx in self.op.disks:
6698
        if idx >= len(instance.disks):
6699
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6700
                                     errors.ECODE_INVAL)
6701
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6702
      raise errors.OpPrereqError("Can't recreate disks partially and"
6703
                                 " change the nodes at the same time",
6704
                                 errors.ECODE_INVAL)
6705
    self.instance = instance
6706

    
6707
  def Exec(self, feedback_fn):
6708
    """Recreate the disks.
6709

6710
    """
6711
    instance = self.instance
6712

    
6713
    assert (self.owned_locks(locking.LEVEL_NODE) ==
6714
            self.owned_locks(locking.LEVEL_NODE_RES))
6715

    
6716
    to_skip = []
6717
    mods = [] # keeps track of needed logical_id changes
6718

    
6719
    for idx, disk in enumerate(instance.disks):
6720
      if idx not in self.op.disks: # disk idx has not been passed in
6721
        to_skip.append(idx)
6722
        continue
6723
      # update secondaries for disks, if needed
6724
      if self.op.nodes:
6725
        if disk.dev_type == constants.LD_DRBD8:
6726
          # need to update the nodes and minors
6727
          assert len(self.op.nodes) == 2
6728
          assert len(disk.logical_id) == 6 # otherwise disk internals
6729
                                           # have changed
6730
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6731
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6732
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6733
                    new_minors[0], new_minors[1], old_secret)
6734
          assert len(disk.logical_id) == len(new_id)
6735
          mods.append((idx, new_id))
6736

    
6737
    # now that we have passed all asserts above, we can apply the mods
6738
    # in a single run (to avoid partial changes)
6739
    for idx, new_id in mods:
6740
      instance.disks[idx].logical_id = new_id
6741

    
6742
    # change primary node, if needed
6743
    if self.op.nodes:
6744
      instance.primary_node = self.op.nodes[0]
6745
      self.LogWarning("Changing the instance's nodes, you will have to"
6746
                      " remove any disks left on the older nodes manually")
6747

    
6748
    if self.op.nodes:
6749
      self.cfg.Update(instance, feedback_fn)
6750

    
6751
    _CreateDisks(self, instance, to_skip=to_skip)
6752

    
6753

    
6754
class LUInstanceRename(LogicalUnit):
6755
  """Rename an instance.
6756

6757
  """
6758
  HPATH = "instance-rename"
6759
  HTYPE = constants.HTYPE_INSTANCE
6760

    
6761
  def CheckArguments(self):
6762
    """Check arguments.
6763

6764
    """
6765
    if self.op.ip_check and not self.op.name_check:
6766
      # TODO: make the ip check more flexible and not depend on the name check
6767
      raise errors.OpPrereqError("IP address check requires a name check",
6768
                                 errors.ECODE_INVAL)
6769

    
6770
  def BuildHooksEnv(self):
6771
    """Build hooks env.
6772

6773
    This runs on master, primary and secondary nodes of the instance.
6774

6775
    """
6776
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6777
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6778
    return env
6779

    
6780
  def BuildHooksNodes(self):
6781
    """Build hooks nodes.
6782

6783
    """
6784
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6785
    return (nl, nl)
6786

    
6787
  def CheckPrereq(self):
6788
    """Check prerequisites.
6789

6790
    This checks that the instance is in the cluster and is not running.
6791

6792
    """
6793
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6794
                                                self.op.instance_name)
6795
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6796
    assert instance is not None
6797
    _CheckNodeOnline(self, instance.primary_node)
6798
    _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6799
                        msg="cannot rename")
6800
    self.instance = instance
6801

    
6802
    new_name = self.op.new_name
6803
    if self.op.name_check:
6804
      hostname = netutils.GetHostname(name=new_name)
6805
      if hostname.name != new_name:
6806
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6807
                     hostname.name)
6808
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6809
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6810
                                    " same as given hostname '%s'") %
6811
                                    (hostname.name, self.op.new_name),
6812
                                    errors.ECODE_INVAL)
6813
      new_name = self.op.new_name = hostname.name
6814
      if (self.op.ip_check and
6815
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6816
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6817
                                   (hostname.ip, new_name),
6818
                                   errors.ECODE_NOTUNIQUE)
6819

    
6820
    instance_list = self.cfg.GetInstanceList()
6821
    if new_name in instance_list and new_name != instance.name:
6822
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6823
                                 new_name, errors.ECODE_EXISTS)
6824

    
6825
  def Exec(self, feedback_fn):
6826
    """Rename the instance.
6827

6828
    """
6829
    inst = self.instance
6830
    old_name = inst.name
6831

    
6832
    rename_file_storage = False
6833
    if (inst.disk_template in constants.DTS_FILEBASED and
6834
        self.op.new_name != inst.name):
6835
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6836
      rename_file_storage = True
6837

    
6838
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6839
    # Change the instance lock. This is definitely safe while we hold the BGL.
6840
    # Otherwise the new lock would have to be added in acquired mode.
6841
    assert self.REQ_BGL
6842
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6843
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6844

    
6845
    # re-read the instance from the configuration after rename
6846
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6847

    
6848
    if rename_file_storage:
6849
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6850
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6851
                                                     old_file_storage_dir,
6852
                                                     new_file_storage_dir)
6853
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6854
                   " (but the instance has been renamed in Ganeti)" %
6855
                   (inst.primary_node, old_file_storage_dir,
6856
                    new_file_storage_dir))
6857

    
6858
    _StartInstanceDisks(self, inst, None)
6859
    try:
6860
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6861
                                                 old_name, self.op.debug_level)
6862
      msg = result.fail_msg
6863
      if msg:
6864
        msg = ("Could not run OS rename script for instance %s on node %s"
6865
               " (but the instance has been renamed in Ganeti): %s" %
6866
               (inst.name, inst.primary_node, msg))
6867
        self.proc.LogWarning(msg)
6868
    finally:
6869
      _ShutdownInstanceDisks(self, inst)
6870

    
6871
    return inst.name
6872

    
6873

    
6874
class LUInstanceRemove(LogicalUnit):
6875
  """Remove an instance.
6876

6877
  """
6878
  HPATH = "instance-remove"
6879
  HTYPE = constants.HTYPE_INSTANCE
6880
  REQ_BGL = False
6881

    
6882
  def ExpandNames(self):
6883
    self._ExpandAndLockInstance()
6884
    self.needed_locks[locking.LEVEL_NODE] = []
6885
    self.needed_locks[locking.LEVEL_NODE_RES] = []
6886
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6887

    
6888
  def DeclareLocks(self, level):
6889
    if level == locking.LEVEL_NODE:
6890
      self._LockInstancesNodes()
6891
    elif level == locking.LEVEL_NODE_RES:
6892
      # Copy node locks
6893
      self.needed_locks[locking.LEVEL_NODE_RES] = \
6894
        self.needed_locks[locking.LEVEL_NODE][:]
6895

    
6896
  def BuildHooksEnv(self):
6897
    """Build hooks env.
6898

6899
    This runs on master, primary and secondary nodes of the instance.
6900

6901
    """
6902
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6903
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6904
    return env
6905

    
6906
  def BuildHooksNodes(self):
6907
    """Build hooks nodes.
6908

6909
    """
6910
    nl = [self.cfg.GetMasterNode()]
6911
    nl_post = list(self.instance.all_nodes) + nl
6912
    return (nl, nl_post)
6913

    
6914
  def CheckPrereq(self):
6915
    """Check prerequisites.
6916

6917
    This checks that the instance is in the cluster.
6918

6919
    """
6920
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6921
    assert self.instance is not None, \
6922
      "Cannot retrieve locked instance %s" % self.op.instance_name
6923

    
6924
  def Exec(self, feedback_fn):
6925
    """Remove the instance.
6926

6927
    """
6928
    instance = self.instance
6929
    logging.info("Shutting down instance %s on node %s",
6930
                 instance.name, instance.primary_node)
6931

    
6932
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6933
                                             self.op.shutdown_timeout)
6934
    msg = result.fail_msg
6935
    if msg:
6936
      if self.op.ignore_failures:
6937
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6938
      else:
6939
        raise errors.OpExecError("Could not shutdown instance %s on"
6940
                                 " node %s: %s" %
6941
                                 (instance.name, instance.primary_node, msg))
6942

    
6943
    assert (self.owned_locks(locking.LEVEL_NODE) ==
6944
            self.owned_locks(locking.LEVEL_NODE_RES))
6945
    assert not (set(instance.all_nodes) -
6946
                self.owned_locks(locking.LEVEL_NODE)), \
6947
      "Not owning correct locks"
6948

    
6949
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6950

    
6951

    
6952
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6953
  """Utility function to remove an instance.
6954

6955
  """
6956
  logging.info("Removing block devices for instance %s", instance.name)
6957

    
6958
  if not _RemoveDisks(lu, instance):
6959
    if not ignore_failures:
6960
      raise errors.OpExecError("Can't remove instance's disks")
6961
    feedback_fn("Warning: can't remove instance's disks")
6962

    
6963
  logging.info("Removing instance %s out of cluster config", instance.name)
6964

    
6965
  lu.cfg.RemoveInstance(instance.name)
6966

    
6967
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6968
    "Instance lock removal conflict"
6969

    
6970
  # Remove lock for the instance
6971
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6972

    
6973

    
6974
class LUInstanceQuery(NoHooksLU):
6975
  """Logical unit for querying instances.
6976

6977
  """
6978
  # pylint: disable=W0142
6979
  REQ_BGL = False
6980

    
6981
  def CheckArguments(self):
6982
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6983
                             self.op.output_fields, self.op.use_locking)
6984

    
6985
  def ExpandNames(self):
6986
    self.iq.ExpandNames(self)
6987

    
6988
  def DeclareLocks(self, level):
6989
    self.iq.DeclareLocks(self, level)
6990

    
6991
  def Exec(self, feedback_fn):
6992
    return self.iq.OldStyleQuery(self)
6993

    
6994

    
6995
class LUInstanceFailover(LogicalUnit):
6996
  """Failover an instance.
6997

6998
  """
6999
  HPATH = "instance-failover"
7000
  HTYPE = constants.HTYPE_INSTANCE
7001
  REQ_BGL = False
7002

    
7003
  def CheckArguments(self):
7004
    """Check the arguments.
7005

7006
    """
7007
    self.iallocator = getattr(self.op, "iallocator", None)
7008
    self.target_node = getattr(self.op, "target_node", None)
7009

    
7010
  def ExpandNames(self):
7011
    self._ExpandAndLockInstance()
7012

    
7013
    if self.op.target_node is not None:
7014
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7015

    
7016
    self.needed_locks[locking.LEVEL_NODE] = []
7017
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7018

    
7019
    ignore_consistency = self.op.ignore_consistency
7020
    shutdown_timeout = self.op.shutdown_timeout
7021
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
7022
                                       cleanup=False,
7023
                                       failover=True,
7024
                                       ignore_consistency=ignore_consistency,
7025
                                       shutdown_timeout=shutdown_timeout)
7026
    self.tasklets = [self._migrater]
7027

    
7028
  def DeclareLocks(self, level):
7029
    if level == locking.LEVEL_NODE:
7030
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7031
      if instance.disk_template in constants.DTS_EXT_MIRROR:
7032
        if self.op.target_node is None:
7033
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7034
        else:
7035
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7036
                                                   self.op.target_node]
7037
        del self.recalculate_locks[locking.LEVEL_NODE]
7038
      else:
7039
        self._LockInstancesNodes()
7040

    
7041
  def BuildHooksEnv(self):
7042
    """Build hooks env.
7043

7044
    This runs on master, primary and secondary nodes of the instance.
7045

7046
    """
7047
    instance = self._migrater.instance
7048
    source_node = instance.primary_node
7049
    target_node = self.op.target_node
7050
    env = {
7051
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7052
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7053
      "OLD_PRIMARY": source_node,
7054
      "NEW_PRIMARY": target_node,
7055
      }
7056

    
7057
    if instance.disk_template in constants.DTS_INT_MIRROR:
7058
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7059
      env["NEW_SECONDARY"] = source_node
7060
    else:
7061
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7062

    
7063
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7064

    
7065
    return env
7066

    
7067
  def BuildHooksNodes(self):
7068
    """Build hooks nodes.
7069

7070
    """
7071
    instance = self._migrater.instance
7072
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7073
    return (nl, nl + [instance.primary_node])
7074

    
7075

    
7076
class LUInstanceMigrate(LogicalUnit):
7077
  """Migrate an instance.
7078

7079
  This is migration without shutting down, compared to the failover,
7080
  which is done with shutdown.
7081

7082
  """
7083
  HPATH = "instance-migrate"
7084
  HTYPE = constants.HTYPE_INSTANCE
7085
  REQ_BGL = False
7086

    
7087
  def ExpandNames(self):
7088
    self._ExpandAndLockInstance()
7089

    
7090
    if self.op.target_node is not None:
7091
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7092

    
7093
    self.needed_locks[locking.LEVEL_NODE] = []
7094
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7095

    
7096
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
7097
                                       cleanup=self.op.cleanup,
7098
                                       failover=False,
7099
                                       fallback=self.op.allow_failover)
7100
    self.tasklets = [self._migrater]
7101

    
7102
  def DeclareLocks(self, level):
7103
    if level == locking.LEVEL_NODE:
7104
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7105
      if instance.disk_template in constants.DTS_EXT_MIRROR:
7106
        if self.op.target_node is None:
7107
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7108
        else:
7109
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7110
                                                   self.op.target_node]
7111
        del self.recalculate_locks[locking.LEVEL_NODE]
7112
      else:
7113
        self._LockInstancesNodes()
7114

    
7115
  def BuildHooksEnv(self):
7116
    """Build hooks env.
7117

7118
    This runs on master, primary and secondary nodes of the instance.
7119

7120
    """
7121
    instance = self._migrater.instance
7122
    source_node = instance.primary_node
7123
    target_node = self.op.target_node
7124
    env = _BuildInstanceHookEnvByObject(self, instance)
7125
    env.update({
7126
      "MIGRATE_LIVE": self._migrater.live,
7127
      "MIGRATE_CLEANUP": self.op.cleanup,
7128
      "OLD_PRIMARY": source_node,
7129
      "NEW_PRIMARY": target_node,
7130
      })
7131

    
7132
    if instance.disk_template in constants.DTS_INT_MIRROR:
7133
      env["OLD_SECONDARY"] = target_node
7134
      env["NEW_SECONDARY"] = source_node
7135
    else:
7136
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7137

    
7138
    return env
7139

    
7140
  def BuildHooksNodes(self):
7141
    """Build hooks nodes.
7142

7143
    """
7144
    instance = self._migrater.instance
7145
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7146
    return (nl, nl + [instance.primary_node])
7147

    
7148

    
7149
class LUInstanceMove(LogicalUnit):
7150
  """Move an instance by data-copying.
7151

7152
  """
7153
  HPATH = "instance-move"
7154
  HTYPE = constants.HTYPE_INSTANCE
7155
  REQ_BGL = False
7156

    
7157
  def ExpandNames(self):
7158
    self._ExpandAndLockInstance()
7159
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7160
    self.op.target_node = target_node
7161
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
7162
    self.needed_locks[locking.LEVEL_NODE_RES] = []
7163
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7164

    
7165
  def DeclareLocks(self, level):
7166
    if level == locking.LEVEL_NODE:
7167
      self._LockInstancesNodes(primary_only=True)
7168
    elif level == locking.LEVEL_NODE_RES:
7169
      # Copy node locks
7170
      self.needed_locks[locking.LEVEL_NODE_RES] = \
7171
        self.needed_locks[locking.LEVEL_NODE][:]
7172

    
7173
  def BuildHooksEnv(self):
7174
    """Build hooks env.
7175

7176
    This runs on master, primary and secondary nodes of the instance.
7177

7178
    """
7179
    env = {
7180
      "TARGET_NODE": self.op.target_node,
7181
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7182
      }
7183
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7184
    return env
7185

    
7186
  def BuildHooksNodes(self):
7187
    """Build hooks nodes.
7188

7189
    """
7190
    nl = [
7191
      self.cfg.GetMasterNode(),
7192
      self.instance.primary_node,
7193
      self.op.target_node,
7194
      ]
7195
    return (nl, nl)
7196

    
7197
  def CheckPrereq(self):
7198
    """Check prerequisites.
7199

7200
    This checks that the instance is in the cluster.
7201

7202
    """
7203
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7204
    assert self.instance is not None, \
7205
      "Cannot retrieve locked instance %s" % self.op.instance_name
7206

    
7207
    node = self.cfg.GetNodeInfo(self.op.target_node)
7208
    assert node is not None, \
7209
      "Cannot retrieve locked node %s" % self.op.target_node
7210

    
7211
    self.target_node = target_node = node.name
7212

    
7213
    if target_node == instance.primary_node:
7214
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
7215
                                 (instance.name, target_node),
7216
                                 errors.ECODE_STATE)
7217

    
7218
    bep = self.cfg.GetClusterInfo().FillBE(instance)
7219

    
7220
    for idx, dsk in enumerate(instance.disks):
7221
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7222
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7223
                                   " cannot copy" % idx, errors.ECODE_STATE)
7224

    
7225
    _CheckNodeOnline(self, target_node)
7226
    _CheckNodeNotDrained(self, target_node)
7227
    _CheckNodeVmCapable(self, target_node)
7228

    
7229
    if instance.admin_state == constants.ADMINST_UP:
7230
      # check memory requirements on the secondary node
7231
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7232
                           instance.name, bep[constants.BE_MAXMEM],
7233
                           instance.hypervisor)
7234
    else:
7235
      self.LogInfo("Not checking memory on the secondary node as"
7236
                   " instance will not be started")
7237

    
7238
    # check bridge existance
7239
    _CheckInstanceBridgesExist(self, instance, node=target_node)
7240

    
7241
  def Exec(self, feedback_fn):
7242
    """Move an instance.
7243

7244
    The move is done by shutting it down on its present node, copying
7245
    the data over (slow) and starting it on the new node.
7246

7247
    """
7248
    instance = self.instance
7249

    
7250
    source_node = instance.primary_node
7251
    target_node = self.target_node
7252

    
7253
    self.LogInfo("Shutting down instance %s on source node %s",
7254
                 instance.name, source_node)
7255

    
7256
    assert (self.owned_locks(locking.LEVEL_NODE) ==
7257
            self.owned_locks(locking.LEVEL_NODE_RES))
7258

    
7259
    result = self.rpc.call_instance_shutdown(source_node, instance,
7260
                                             self.op.shutdown_timeout)
7261
    msg = result.fail_msg
7262
    if msg:
7263
      if self.op.ignore_consistency:
7264
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
7265
                             " Proceeding anyway. Please make sure node"
7266
                             " %s is down. Error details: %s",
7267
                             instance.name, source_node, source_node, msg)
7268
      else:
7269
        raise errors.OpExecError("Could not shutdown instance %s on"
7270
                                 " node %s: %s" %
7271
                                 (instance.name, source_node, msg))
7272

    
7273
    # create the target disks
7274
    try:
7275
      _CreateDisks(self, instance, target_node=target_node)
7276
    except errors.OpExecError:
7277
      self.LogWarning("Device creation failed, reverting...")
7278
      try:
7279
        _RemoveDisks(self, instance, target_node=target_node)
7280
      finally:
7281
        self.cfg.ReleaseDRBDMinors(instance.name)
7282
        raise
7283

    
7284
    cluster_name = self.cfg.GetClusterInfo().cluster_name
7285

    
7286
    errs = []
7287
    # activate, get path, copy the data over
7288
    for idx, disk in enumerate(instance.disks):
7289
      self.LogInfo("Copying data for disk %d", idx)
7290
      result = self.rpc.call_blockdev_assemble(target_node, disk,
7291
                                               instance.name, True, idx)
7292
      if result.fail_msg:
7293
        self.LogWarning("Can't assemble newly created disk %d: %s",
7294
                        idx, result.fail_msg)
7295
        errs.append(result.fail_msg)
7296
        break
7297
      dev_path = result.payload
7298
      result = self.rpc.call_blockdev_export(source_node, disk,
7299
                                             target_node, dev_path,
7300
                                             cluster_name)
7301
      if result.fail_msg:
7302
        self.LogWarning("Can't copy data over for disk %d: %s",
7303
                        idx, result.fail_msg)
7304
        errs.append(result.fail_msg)
7305
        break
7306

    
7307
    if errs:
7308
      self.LogWarning("Some disks failed to copy, aborting")
7309
      try:
7310
        _RemoveDisks(self, instance, target_node=target_node)
7311
      finally:
7312
        self.cfg.ReleaseDRBDMinors(instance.name)
7313
        raise errors.OpExecError("Errors during disk copy: %s" %
7314
                                 (",".join(errs),))
7315

    
7316
    instance.primary_node = target_node
7317
    self.cfg.Update(instance, feedback_fn)
7318

    
7319
    self.LogInfo("Removing the disks on the original node")
7320
    _RemoveDisks(self, instance, target_node=source_node)
7321

    
7322
    # Only start the instance if it's marked as up
7323
    if instance.admin_state == constants.ADMINST_UP:
7324
      self.LogInfo("Starting instance %s on node %s",
7325
                   instance.name, target_node)
7326

    
7327
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7328
                                           ignore_secondaries=True)
7329
      if not disks_ok:
7330
        _ShutdownInstanceDisks(self, instance)
7331
        raise errors.OpExecError("Can't activate the instance's disks")
7332

    
7333
      result = self.rpc.call_instance_start(target_node,
7334
                                            (instance, None, None), False)
7335
      msg = result.fail_msg
7336
      if msg:
7337
        _ShutdownInstanceDisks(self, instance)
7338
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7339
                                 (instance.name, target_node, msg))
7340

    
7341

    
7342
class LUNodeMigrate(LogicalUnit):
7343
  """Migrate all instances from a node.
7344

7345
  """
7346
  HPATH = "node-migrate"
7347
  HTYPE = constants.HTYPE_NODE
7348
  REQ_BGL = False
7349

    
7350
  def CheckArguments(self):
7351
    pass
7352

    
7353
  def ExpandNames(self):
7354
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7355

    
7356
    self.share_locks = _ShareAll()
7357
    self.needed_locks = {
7358
      locking.LEVEL_NODE: [self.op.node_name],
7359
      }
7360

    
7361
  def BuildHooksEnv(self):
7362
    """Build hooks env.
7363

7364
    This runs on the master, the primary and all the secondaries.
7365

7366
    """
7367
    return {
7368
      "NODE_NAME": self.op.node_name,
7369
      }
7370

    
7371
  def BuildHooksNodes(self):
7372
    """Build hooks nodes.
7373

7374
    """
7375
    nl = [self.cfg.GetMasterNode()]
7376
    return (nl, nl)
7377

    
7378
  def CheckPrereq(self):
7379
    pass
7380

    
7381
  def Exec(self, feedback_fn):
7382
    # Prepare jobs for migration instances
7383
    jobs = [
7384
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7385
                                 mode=self.op.mode,
7386
                                 live=self.op.live,
7387
                                 iallocator=self.op.iallocator,
7388
                                 target_node=self.op.target_node)]
7389
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7390
      ]
7391

    
7392
    # TODO: Run iallocator in this opcode and pass correct placement options to
7393
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7394
    # running the iallocator and the actual migration, a good consistency model
7395
    # will have to be found.
7396

    
7397
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7398
            frozenset([self.op.node_name]))
7399

    
7400
    return ResultWithJobs(jobs)
7401

    
7402

    
7403
class TLMigrateInstance(Tasklet):
7404
  """Tasklet class for instance migration.
7405

7406
  @type live: boolean
7407
  @ivar live: whether the migration will be done live or non-live;
7408
      this variable is initalized only after CheckPrereq has run
7409
  @type cleanup: boolean
7410
  @ivar cleanup: Wheater we cleanup from a failed migration
7411
  @type iallocator: string
7412
  @ivar iallocator: The iallocator used to determine target_node
7413
  @type target_node: string
7414
  @ivar target_node: If given, the target_node to reallocate the instance to
7415
  @type failover: boolean
7416
  @ivar failover: Whether operation results in failover or migration
7417
  @type fallback: boolean
7418
  @ivar fallback: Whether fallback to failover is allowed if migration not
7419
                  possible
7420
  @type ignore_consistency: boolean
7421
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7422
                            and target node
7423
  @type shutdown_timeout: int
7424
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7425

7426
  """
7427

    
7428
  # Constants
7429
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7430
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7431

    
7432
  def __init__(self, lu, instance_name, cleanup=False,
7433
               failover=False, fallback=False,
7434
               ignore_consistency=False,
7435
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7436
    """Initializes this class.
7437

7438
    """
7439
    Tasklet.__init__(self, lu)
7440

    
7441
    # Parameters
7442
    self.instance_name = instance_name
7443
    self.cleanup = cleanup
7444
    self.live = False # will be overridden later
7445
    self.failover = failover
7446
    self.fallback = fallback
7447
    self.ignore_consistency = ignore_consistency
7448
    self.shutdown_timeout = shutdown_timeout
7449

    
7450
  def CheckPrereq(self):
7451
    """Check prerequisites.
7452

7453
    This checks that the instance is in the cluster.
7454

7455
    """
7456
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7457
    instance = self.cfg.GetInstanceInfo(instance_name)
7458
    assert instance is not None
7459
    self.instance = instance
7460

    
7461
    if (not self.cleanup and
7462
        not instance.admin_state == constants.ADMINST_UP and
7463
        not self.failover and self.fallback):
7464
      self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7465
                      " switching to failover")
7466
      self.failover = True
7467

    
7468
    if instance.disk_template not in constants.DTS_MIRRORED:
7469
      if self.failover:
7470
        text = "failovers"
7471
      else:
7472
        text = "migrations"
7473
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7474
                                 " %s" % (instance.disk_template, text),
7475
                                 errors.ECODE_STATE)
7476

    
7477
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7478
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7479

    
7480
      if self.lu.op.iallocator:
7481
        self._RunAllocator()
7482
      else:
7483
        # We set set self.target_node as it is required by
7484
        # BuildHooksEnv
7485
        self.target_node = self.lu.op.target_node
7486

    
7487
      # self.target_node is already populated, either directly or by the
7488
      # iallocator run
7489
      target_node = self.target_node
7490
      if self.target_node == instance.primary_node:
7491
        raise errors.OpPrereqError("Cannot migrate instance %s"
7492
                                   " to its primary (%s)" %
7493
                                   (instance.name, instance.primary_node))
7494

    
7495
      if len(self.lu.tasklets) == 1:
7496
        # It is safe to release locks only when we're the only tasklet
7497
        # in the LU
7498
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7499
                      keep=[instance.primary_node, self.target_node])
7500

    
7501
    else:
7502
      secondary_nodes = instance.secondary_nodes
7503
      if not secondary_nodes:
7504
        raise errors.ConfigurationError("No secondary node but using"
7505
                                        " %s disk template" %
7506
                                        instance.disk_template)
7507
      target_node = secondary_nodes[0]
7508
      if self.lu.op.iallocator or (self.lu.op.target_node and
7509
                                   self.lu.op.target_node != target_node):
7510
        if self.failover:
7511
          text = "failed over"
7512
        else:
7513
          text = "migrated"
7514
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7515
                                   " be %s to arbitrary nodes"
7516
                                   " (neither an iallocator nor a target"
7517
                                   " node can be passed)" %
7518
                                   (instance.disk_template, text),
7519
                                   errors.ECODE_INVAL)
7520

    
7521
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7522

    
7523
    # check memory requirements on the secondary node
7524
    if not self.failover or instance.admin_state == constants.ADMINST_UP:
7525
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7526
                           instance.name, i_be[constants.BE_MAXMEM],
7527
                           instance.hypervisor)
7528
    else:
7529
      self.lu.LogInfo("Not checking memory on the secondary node as"
7530
                      " instance will not be started")
7531

    
7532
    # check bridge existance
7533
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7534

    
7535
    if not self.cleanup:
7536
      _CheckNodeNotDrained(self.lu, target_node)
7537
      if not self.failover:
7538
        result = self.rpc.call_instance_migratable(instance.primary_node,
7539
                                                   instance)
7540
        if result.fail_msg and self.fallback:
7541
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7542
                          " failover")
7543
          self.failover = True
7544
        else:
7545
          result.Raise("Can't migrate, please use failover",
7546
                       prereq=True, ecode=errors.ECODE_STATE)
7547

    
7548
    assert not (self.failover and self.cleanup)
7549

    
7550
    if not self.failover:
7551
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7552
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7553
                                   " parameters are accepted",
7554
                                   errors.ECODE_INVAL)
7555
      if self.lu.op.live is not None:
7556
        if self.lu.op.live:
7557
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7558
        else:
7559
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7560
        # reset the 'live' parameter to None so that repeated
7561
        # invocations of CheckPrereq do not raise an exception
7562
        self.lu.op.live = None
7563
      elif self.lu.op.mode is None:
7564
        # read the default value from the hypervisor
7565
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7566
                                                skip_globals=False)
7567
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7568

    
7569
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7570
    else:
7571
      # Failover is never live
7572
      self.live = False
7573

    
7574
  def _RunAllocator(self):
7575
    """Run the allocator based on input opcode.
7576

7577
    """
7578
    ial = IAllocator(self.cfg, self.rpc,
7579
                     mode=constants.IALLOCATOR_MODE_RELOC,
7580
                     name=self.instance_name,
7581
                     # TODO See why hail breaks with a single node below
7582
                     relocate_from=[self.instance.primary_node,
7583
                                    self.instance.primary_node],
7584
                     )
7585

    
7586
    ial.Run(self.lu.op.iallocator)
7587

    
7588
    if not ial.success:
7589
      raise errors.OpPrereqError("Can't compute nodes using"
7590
                                 " iallocator '%s': %s" %
7591
                                 (self.lu.op.iallocator, ial.info),
7592
                                 errors.ECODE_NORES)
7593
    if len(ial.result) != ial.required_nodes:
7594
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7595
                                 " of nodes (%s), required %s" %
7596
                                 (self.lu.op.iallocator, len(ial.result),
7597
                                  ial.required_nodes), errors.ECODE_FAULT)
7598
    self.target_node = ial.result[0]
7599
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7600
                 self.instance_name, self.lu.op.iallocator,
7601
                 utils.CommaJoin(ial.result))
7602

    
7603
  def _WaitUntilSync(self):
7604
    """Poll with custom rpc for disk sync.
7605

7606
    This uses our own step-based rpc call.
7607

7608
    """
7609
    self.feedback_fn("* wait until resync is done")
7610
    all_done = False
7611
    while not all_done:
7612
      all_done = True
7613
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7614
                                            self.nodes_ip,
7615
                                            self.instance.disks)
7616
      min_percent = 100
7617
      for node, nres in result.items():
7618
        nres.Raise("Cannot resync disks on node %s" % node)
7619
        node_done, node_percent = nres.payload
7620
        all_done = all_done and node_done
7621
        if node_percent is not None:
7622
          min_percent = min(min_percent, node_percent)
7623
      if not all_done:
7624
        if min_percent < 100:
7625
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7626
        time.sleep(2)
7627

    
7628
  def _EnsureSecondary(self, node):
7629
    """Demote a node to secondary.
7630

7631
    """
7632
    self.feedback_fn("* switching node %s to secondary mode" % node)
7633

    
7634
    for dev in self.instance.disks:
7635
      self.cfg.SetDiskID(dev, node)
7636

    
7637
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7638
                                          self.instance.disks)
7639
    result.Raise("Cannot change disk to secondary on node %s" % node)
7640

    
7641
  def _GoStandalone(self):
7642
    """Disconnect from the network.
7643

7644
    """
7645
    self.feedback_fn("* changing into standalone mode")
7646
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7647
                                               self.instance.disks)
7648
    for node, nres in result.items():
7649
      nres.Raise("Cannot disconnect disks node %s" % node)
7650

    
7651
  def _GoReconnect(self, multimaster):
7652
    """Reconnect to the network.
7653

7654
    """
7655
    if multimaster:
7656
      msg = "dual-master"
7657
    else:
7658
      msg = "single-master"
7659
    self.feedback_fn("* changing disks into %s mode" % msg)
7660
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7661
                                           self.instance.disks,
7662
                                           self.instance.name, multimaster)
7663
    for node, nres in result.items():
7664
      nres.Raise("Cannot change disks config on node %s" % node)
7665

    
7666
  def _ExecCleanup(self):
7667
    """Try to cleanup after a failed migration.
7668

7669
    The cleanup is done by:
7670
      - check that the instance is running only on one node
7671
        (and update the config if needed)
7672
      - change disks on its secondary node to secondary
7673
      - wait until disks are fully synchronized
7674
      - disconnect from the network
7675
      - change disks into single-master mode
7676
      - wait again until disks are fully synchronized
7677

7678
    """
7679
    instance = self.instance
7680
    target_node = self.target_node
7681
    source_node = self.source_node
7682

    
7683
    # check running on only one node
7684
    self.feedback_fn("* checking where the instance actually runs"
7685
                     " (if this hangs, the hypervisor might be in"
7686
                     " a bad state)")
7687
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7688
    for node, result in ins_l.items():
7689
      result.Raise("Can't contact node %s" % node)
7690

    
7691
    runningon_source = instance.name in ins_l[source_node].payload
7692
    runningon_target = instance.name in ins_l[target_node].payload
7693

    
7694
    if runningon_source and runningon_target:
7695
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7696
                               " or the hypervisor is confused; you will have"
7697
                               " to ensure manually that it runs only on one"
7698
                               " and restart this operation")
7699

    
7700
    if not (runningon_source or runningon_target):
7701
      raise errors.OpExecError("Instance does not seem to be running at all;"
7702
                               " in this case it's safer to repair by"
7703
                               " running 'gnt-instance stop' to ensure disk"
7704
                               " shutdown, and then restarting it")
7705

    
7706
    if runningon_target:
7707
      # the migration has actually succeeded, we need to update the config
7708
      self.feedback_fn("* instance running on secondary node (%s),"
7709
                       " updating config" % target_node)
7710
      instance.primary_node = target_node
7711
      self.cfg.Update(instance, self.feedback_fn)
7712
      demoted_node = source_node
7713
    else:
7714
      self.feedback_fn("* instance confirmed to be running on its"
7715
                       " primary node (%s)" % source_node)
7716
      demoted_node = target_node
7717

    
7718
    if instance.disk_template in constants.DTS_INT_MIRROR:
7719
      self._EnsureSecondary(demoted_node)
7720
      try:
7721
        self._WaitUntilSync()
7722
      except errors.OpExecError:
7723
        # we ignore here errors, since if the device is standalone, it
7724
        # won't be able to sync
7725
        pass
7726
      self._GoStandalone()
7727
      self._GoReconnect(False)
7728
      self._WaitUntilSync()
7729

    
7730
    self.feedback_fn("* done")
7731

    
7732
  def _RevertDiskStatus(self):
7733
    """Try to revert the disk status after a failed migration.
7734

7735
    """
7736
    target_node = self.target_node
7737
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7738
      return
7739

    
7740
    try:
7741
      self._EnsureSecondary(target_node)
7742
      self._GoStandalone()
7743
      self._GoReconnect(False)
7744
      self._WaitUntilSync()
7745
    except errors.OpExecError, err:
7746
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7747
                         " please try to recover the instance manually;"
7748
                         " error '%s'" % str(err))
7749

    
7750
  def _AbortMigration(self):
7751
    """Call the hypervisor code to abort a started migration.
7752

7753
    """
7754
    instance = self.instance
7755
    target_node = self.target_node
7756
    source_node = self.source_node
7757
    migration_info = self.migration_info
7758

    
7759
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7760
                                                                 instance,
7761
                                                                 migration_info,
7762
                                                                 False)
7763
    abort_msg = abort_result.fail_msg
7764
    if abort_msg:
7765
      logging.error("Aborting migration failed on target node %s: %s",
7766
                    target_node, abort_msg)
7767
      # Don't raise an exception here, as we stil have to try to revert the
7768
      # disk status, even if this step failed.
7769

    
7770
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7771
        instance, False, self.live)
7772
    abort_msg = abort_result.fail_msg
7773
    if abort_msg:
7774
      logging.error("Aborting migration failed on source node %s: %s",
7775
                    source_node, abort_msg)
7776

    
7777
  def _ExecMigration(self):
7778
    """Migrate an instance.
7779

7780
    The migrate is done by:
7781
      - change the disks into dual-master mode
7782
      - wait until disks are fully synchronized again
7783
      - migrate the instance
7784
      - change disks on the new secondary node (the old primary) to secondary
7785
      - wait until disks are fully synchronized
7786
      - change disks into single-master mode
7787

7788
    """
7789
    instance = self.instance
7790
    target_node = self.target_node
7791
    source_node = self.source_node
7792

    
7793
    # Check for hypervisor version mismatch and warn the user.
7794
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7795
                                       None, [self.instance.hypervisor])
7796
    for ninfo in nodeinfo.values():
7797
      ninfo.Raise("Unable to retrieve node information from node '%s'" %
7798
                  ninfo.node)
7799
    (_, _, (src_info, )) = nodeinfo[source_node].payload
7800
    (_, _, (dst_info, )) = nodeinfo[target_node].payload
7801

    
7802
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
7803
        (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
7804
      src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
7805
      dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
7806
      if src_version != dst_version:
7807
        self.feedback_fn("* warning: hypervisor version mismatch between"
7808
                         " source (%s) and target (%s) node" %
7809
                         (src_version, dst_version))
7810

    
7811
    self.feedback_fn("* checking disk consistency between source and target")
7812
    for dev in instance.disks:
7813
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7814
        raise errors.OpExecError("Disk %s is degraded or not fully"
7815
                                 " synchronized on target node,"
7816
                                 " aborting migration" % dev.iv_name)
7817

    
7818
    # First get the migration information from the remote node
7819
    result = self.rpc.call_migration_info(source_node, instance)
7820
    msg = result.fail_msg
7821
    if msg:
7822
      log_err = ("Failed fetching source migration information from %s: %s" %
7823
                 (source_node, msg))
7824
      logging.error(log_err)
7825
      raise errors.OpExecError(log_err)
7826

    
7827
    self.migration_info = migration_info = result.payload
7828

    
7829
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7830
      # Then switch the disks to master/master mode
7831
      self._EnsureSecondary(target_node)
7832
      self._GoStandalone()
7833
      self._GoReconnect(True)
7834
      self._WaitUntilSync()
7835

    
7836
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7837
    result = self.rpc.call_accept_instance(target_node,
7838
                                           instance,
7839
                                           migration_info,
7840
                                           self.nodes_ip[target_node])
7841

    
7842
    msg = result.fail_msg
7843
    if msg:
7844
      logging.error("Instance pre-migration failed, trying to revert"
7845
                    " disk status: %s", msg)
7846
      self.feedback_fn("Pre-migration failed, aborting")
7847
      self._AbortMigration()
7848
      self._RevertDiskStatus()
7849
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7850
                               (instance.name, msg))
7851

    
7852
    self.feedback_fn("* migrating instance to %s" % target_node)
7853
    result = self.rpc.call_instance_migrate(source_node, instance,
7854
                                            self.nodes_ip[target_node],
7855
                                            self.live)
7856
    msg = result.fail_msg
7857
    if msg:
7858
      logging.error("Instance migration failed, trying to revert"
7859
                    " disk status: %s", msg)
7860
      self.feedback_fn("Migration failed, aborting")
7861
      self._AbortMigration()
7862
      self._RevertDiskStatus()
7863
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7864
                               (instance.name, msg))
7865

    
7866
    self.feedback_fn("* starting memory transfer")
7867
    last_feedback = time.time()
7868
    while True:
7869
      result = self.rpc.call_instance_get_migration_status(source_node,
7870
                                                           instance)
7871
      msg = result.fail_msg
7872
      ms = result.payload   # MigrationStatus instance
7873
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7874
        logging.error("Instance migration failed, trying to revert"
7875
                      " disk status: %s", msg)
7876
        self.feedback_fn("Migration failed, aborting")
7877
        self._AbortMigration()
7878
        self._RevertDiskStatus()
7879
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7880
                                 (instance.name, msg))
7881

    
7882
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7883
        self.feedback_fn("* memory transfer complete")
7884
        break
7885

    
7886
      if (utils.TimeoutExpired(last_feedback,
7887
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7888
          ms.transferred_ram is not None):
7889
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7890
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7891
        last_feedback = time.time()
7892

    
7893
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7894

    
7895
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7896
                                                           instance,
7897
                                                           True,
7898
                                                           self.live)
7899
    msg = result.fail_msg
7900
    if msg:
7901
      logging.error("Instance migration succeeded, but finalization failed"
7902
                    " on the source node: %s", msg)
7903
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7904
                               msg)
7905

    
7906
    instance.primary_node = target_node
7907

    
7908
    # distribute new instance config to the other nodes
7909
    self.cfg.Update(instance, self.feedback_fn)
7910

    
7911
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7912
                                                           instance,
7913
                                                           migration_info,
7914
                                                           True)
7915
    msg = result.fail_msg
7916
    if msg:
7917
      logging.error("Instance migration succeeded, but finalization failed"
7918
                    " on the target node: %s", msg)
7919
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7920
                               msg)
7921

    
7922
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7923
      self._EnsureSecondary(source_node)
7924
      self._WaitUntilSync()
7925
      self._GoStandalone()
7926
      self._GoReconnect(False)
7927
      self._WaitUntilSync()
7928

    
7929
    self.feedback_fn("* done")
7930

    
7931
  def _ExecFailover(self):
7932
    """Failover an instance.
7933

7934
    The failover is done by shutting it down on its present node and
7935
    starting it on the secondary.
7936

7937
    """
7938
    instance = self.instance
7939
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7940

    
7941
    source_node = instance.primary_node
7942
    target_node = self.target_node
7943

    
7944
    if instance.admin_state == constants.ADMINST_UP:
7945
      self.feedback_fn("* checking disk consistency between source and target")
7946
      for dev in instance.disks:
7947
        # for drbd, these are drbd over lvm
7948
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7949
          if primary_node.offline:
7950
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7951
                             " target node %s" %
7952
                             (primary_node.name, dev.iv_name, target_node))
7953
          elif not self.ignore_consistency:
7954
            raise errors.OpExecError("Disk %s is degraded on target node,"
7955
                                     " aborting failover" % dev.iv_name)
7956
    else:
7957
      self.feedback_fn("* not checking disk consistency as instance is not"
7958
                       " running")
7959

    
7960
    self.feedback_fn("* shutting down instance on source node")
7961
    logging.info("Shutting down instance %s on node %s",
7962
                 instance.name, source_node)
7963

    
7964
    result = self.rpc.call_instance_shutdown(source_node, instance,
7965
                                             self.shutdown_timeout)
7966
    msg = result.fail_msg
7967
    if msg:
7968
      if self.ignore_consistency or primary_node.offline:
7969
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7970
                           " proceeding anyway; please make sure node"
7971
                           " %s is down; error details: %s",
7972
                           instance.name, source_node, source_node, msg)
7973
      else:
7974
        raise errors.OpExecError("Could not shutdown instance %s on"
7975
                                 " node %s: %s" %
7976
                                 (instance.name, source_node, msg))
7977

    
7978
    self.feedback_fn("* deactivating the instance's disks on source node")
7979
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7980
      raise errors.OpExecError("Can't shut down the instance's disks")
7981

    
7982
    instance.primary_node = target_node
7983
    # distribute new instance config to the other nodes
7984
    self.cfg.Update(instance, self.feedback_fn)
7985

    
7986
    # Only start the instance if it's marked as up
7987
    if instance.admin_state == constants.ADMINST_UP:
7988
      self.feedback_fn("* activating the instance's disks on target node %s" %
7989
                       target_node)
7990
      logging.info("Starting instance %s on node %s",
7991
                   instance.name, target_node)
7992

    
7993
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7994
                                           ignore_secondaries=True)
7995
      if not disks_ok:
7996
        _ShutdownInstanceDisks(self.lu, instance)
7997
        raise errors.OpExecError("Can't activate the instance's disks")
7998

    
7999
      self.feedback_fn("* starting the instance on the target node %s" %
8000
                       target_node)
8001
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
8002
                                            False)
8003
      msg = result.fail_msg
8004
      if msg:
8005
        _ShutdownInstanceDisks(self.lu, instance)
8006
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8007
                                 (instance.name, target_node, msg))
8008

    
8009
  def Exec(self, feedback_fn):
8010
    """Perform the migration.
8011

8012
    """
8013
    self.feedback_fn = feedback_fn
8014
    self.source_node = self.instance.primary_node
8015

    
8016
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8017
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
8018
      self.target_node = self.instance.secondary_nodes[0]
8019
      # Otherwise self.target_node has been populated either
8020
      # directly, or through an iallocator.
8021

    
8022
    self.all_nodes = [self.source_node, self.target_node]
8023
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8024
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
8025

    
8026
    if self.failover:
8027
      feedback_fn("Failover instance %s" % self.instance.name)
8028
      self._ExecFailover()
8029
    else:
8030
      feedback_fn("Migrating instance %s" % self.instance.name)
8031

    
8032
      if self.cleanup:
8033
        return self._ExecCleanup()
8034
      else:
8035
        return self._ExecMigration()
8036

    
8037

    
8038
def _CreateBlockDev(lu, node, instance, device, force_create,
8039
                    info, force_open):
8040
  """Create a tree of block devices on a given node.
8041

8042
  If this device type has to be created on secondaries, create it and
8043
  all its children.
8044

8045
  If not, just recurse to children keeping the same 'force' value.
8046

8047
  @param lu: the lu on whose behalf we execute
8048
  @param node: the node on which to create the device
8049
  @type instance: L{objects.Instance}
8050
  @param instance: the instance which owns the device
8051
  @type device: L{objects.Disk}
8052
  @param device: the device to create
8053
  @type force_create: boolean
8054
  @param force_create: whether to force creation of this device; this
8055
      will be change to True whenever we find a device which has
8056
      CreateOnSecondary() attribute
8057
  @param info: the extra 'metadata' we should attach to the device
8058
      (this will be represented as a LVM tag)
8059
  @type force_open: boolean
8060
  @param force_open: this parameter will be passes to the
8061
      L{backend.BlockdevCreate} function where it specifies
8062
      whether we run on primary or not, and it affects both
8063
      the child assembly and the device own Open() execution
8064

8065
  """
8066
  if device.CreateOnSecondary():
8067
    force_create = True
8068

    
8069
  if device.children:
8070
    for child in device.children:
8071
      _CreateBlockDev(lu, node, instance, child, force_create,
8072
                      info, force_open)
8073

    
8074
  if not force_create:
8075
    return
8076

    
8077
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8078

    
8079

    
8080
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8081
  """Create a single block device on a given node.
8082

8083
  This will not recurse over children of the device, so they must be
8084
  created in advance.
8085

8086
  @param lu: the lu on whose behalf we execute
8087
  @param node: the node on which to create the device
8088
  @type instance: L{objects.Instance}
8089
  @param instance: the instance which owns the device
8090
  @type device: L{objects.Disk}
8091
  @param device: the device to create
8092
  @param info: the extra 'metadata' we should attach to the device
8093
      (this will be represented as a LVM tag)
8094
  @type force_open: boolean
8095
  @param force_open: this parameter will be passes to the
8096
      L{backend.BlockdevCreate} function where it specifies
8097
      whether we run on primary or not, and it affects both
8098
      the child assembly and the device own Open() execution
8099

8100
  """
8101
  lu.cfg.SetDiskID(device, node)
8102
  result = lu.rpc.call_blockdev_create(node, device, device.size,
8103
                                       instance.name, force_open, info)
8104
  result.Raise("Can't create block device %s on"
8105
               " node %s for instance %s" % (device, node, instance.name))
8106
  if device.physical_id is None:
8107
    device.physical_id = result.payload
8108

    
8109

    
8110
def _GenerateUniqueNames(lu, exts):
8111
  """Generate a suitable LV name.
8112

8113
  This will generate a logical volume name for the given instance.
8114

8115
  """
8116
  results = []
8117
  for val in exts:
8118
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8119
    results.append("%s%s" % (new_id, val))
8120
  return results
8121

    
8122

    
8123
def _ComputeLDParams(disk_template, disk_params):
8124
  """Computes Logical Disk parameters from Disk Template parameters.
8125

8126
  @type disk_template: string
8127
  @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8128
  @type disk_params: dict
8129
  @param disk_params: disk template parameters; dict(template_name -> parameters
8130
  @rtype: list(dict)
8131
  @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8132
    contains the LD parameters of the node. The tree is flattened in-order.
8133

8134
  """
8135
  if disk_template not in constants.DISK_TEMPLATES:
8136
    raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8137

    
8138
  result = list()
8139
  dt_params = disk_params[disk_template]
8140
  if disk_template == constants.DT_DRBD8:
8141
    drbd_params = {
8142
      constants.RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8143
      constants.BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8144
      constants.NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8145
      }
8146

    
8147
    drbd_params = \
8148
      objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8149
                       drbd_params)
8150

    
8151
    result.append(drbd_params)
8152

    
8153
    # data LV
8154
    data_params = {
8155
      constants.STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8156
      }
8157
    data_params = \
8158
      objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8159
                       data_params)
8160
    result.append(data_params)
8161

    
8162
    # metadata LV
8163
    meta_params = {
8164
      constants.STRIPES: dt_params[constants.DRBD_META_STRIPES],
8165
      }
8166
    meta_params = \
8167
      objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8168
                       meta_params)
8169
    result.append(meta_params)
8170

    
8171
  elif (disk_template == constants.DT_FILE or
8172
        disk_template == constants.DT_SHARED_FILE):
8173
    result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8174

    
8175
  elif disk_template == constants.DT_PLAIN:
8176
    params = {
8177
      constants.STRIPES: dt_params[constants.LV_STRIPES],
8178
      }
8179
    params = \
8180
      objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8181
                       params)
8182
    result.append(params)
8183

    
8184
  elif disk_template == constants.DT_BLOCK:
8185
    result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8186

    
8187
  return result
8188

    
8189

    
8190
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8191
                         iv_name, p_minor, s_minor, drbd_params, data_params,
8192
                         meta_params):
8193
  """Generate a drbd8 device complete with its children.
8194

8195
  """
8196
  assert len(vgnames) == len(names) == 2
8197
  port = lu.cfg.AllocatePort()
8198
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8199

    
8200
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8201
                          logical_id=(vgnames[0], names[0]),
8202
                          params=data_params)
8203
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8204
                          logical_id=(vgnames[1], names[1]),
8205
                          params=meta_params)
8206
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8207
                          logical_id=(primary, secondary, port,
8208
                                      p_minor, s_minor,
8209
                                      shared_secret),
8210
                          children=[dev_data, dev_meta],
8211
                          iv_name=iv_name, params=drbd_params)
8212
  return drbd_dev
8213

    
8214

    
8215
def _GenerateDiskTemplate(lu, template_name,
8216
                          instance_name, primary_node,
8217
                          secondary_nodes, disk_info,
8218
                          file_storage_dir, file_driver,
8219
                          base_index, feedback_fn, disk_params):
8220
  """Generate the entire disk layout for a given template type.
8221

8222
  """
8223
  #TODO: compute space requirements
8224

    
8225
  vgname = lu.cfg.GetVGName()
8226
  disk_count = len(disk_info)
8227
  disks = []
8228
  ld_params = _ComputeLDParams(template_name, disk_params)
8229
  if template_name == constants.DT_DISKLESS:
8230
    pass
8231
  elif template_name == constants.DT_PLAIN:
8232
    if len(secondary_nodes) != 0:
8233
      raise errors.ProgrammerError("Wrong template configuration")
8234

    
8235
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8236
                                      for i in range(disk_count)])
8237
    for idx, disk in enumerate(disk_info):
8238
      disk_index = idx + base_index
8239
      vg = disk.get(constants.IDISK_VG, vgname)
8240
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8241
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
8242
                              size=disk[constants.IDISK_SIZE],
8243
                              logical_id=(vg, names[idx]),
8244
                              iv_name="disk/%d" % disk_index,
8245
                              mode=disk[constants.IDISK_MODE],
8246
                              params=ld_params[0])
8247
      disks.append(disk_dev)
8248
  elif template_name == constants.DT_DRBD8:
8249
    drbd_params, data_params, meta_params = ld_params
8250
    if len(secondary_nodes) != 1:
8251
      raise errors.ProgrammerError("Wrong template configuration")
8252
    remote_node = secondary_nodes[0]
8253
    minors = lu.cfg.AllocateDRBDMinor(
8254
      [primary_node, remote_node] * len(disk_info), instance_name)
8255

    
8256
    names = []
8257
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8258
                                               for i in range(disk_count)]):
8259
      names.append(lv_prefix + "_data")
8260
      names.append(lv_prefix + "_meta")
8261
    for idx, disk in enumerate(disk_info):
8262
      disk_index = idx + base_index
8263
      data_vg = disk.get(constants.IDISK_VG, vgname)
8264
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
8265
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8266
                                      disk[constants.IDISK_SIZE],
8267
                                      [data_vg, meta_vg],
8268
                                      names[idx * 2:idx * 2 + 2],
8269
                                      "disk/%d" % disk_index,
8270
                                      minors[idx * 2], minors[idx * 2 + 1],
8271
                                      drbd_params, data_params, meta_params)
8272
      disk_dev.mode = disk[constants.IDISK_MODE]
8273
      disks.append(disk_dev)
8274
  elif template_name == constants.DT_FILE:
8275
    if len(secondary_nodes) != 0:
8276
      raise errors.ProgrammerError("Wrong template configuration")
8277

    
8278
    opcodes.RequireFileStorage()
8279

    
8280
    for idx, disk in enumerate(disk_info):
8281
      disk_index = idx + base_index
8282
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8283
                              size=disk[constants.IDISK_SIZE],
8284
                              iv_name="disk/%d" % disk_index,
8285
                              logical_id=(file_driver,
8286
                                          "%s/disk%d" % (file_storage_dir,
8287
                                                         disk_index)),
8288
                              mode=disk[constants.IDISK_MODE],
8289
                              params=ld_params[0])
8290
      disks.append(disk_dev)
8291
  elif template_name == constants.DT_SHARED_FILE:
8292
    if len(secondary_nodes) != 0:
8293
      raise errors.ProgrammerError("Wrong template configuration")
8294

    
8295
    opcodes.RequireSharedFileStorage()
8296

    
8297
    for idx, disk in enumerate(disk_info):
8298
      disk_index = idx + base_index
8299
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8300
                              size=disk[constants.IDISK_SIZE],
8301
                              iv_name="disk/%d" % disk_index,
8302
                              logical_id=(file_driver,
8303
                                          "%s/disk%d" % (file_storage_dir,
8304
                                                         disk_index)),
8305
                              mode=disk[constants.IDISK_MODE],
8306
                              params=ld_params[0])
8307
      disks.append(disk_dev)
8308
  elif template_name == constants.DT_BLOCK:
8309
    if len(secondary_nodes) != 0:
8310
      raise errors.ProgrammerError("Wrong template configuration")
8311

    
8312
    for idx, disk in enumerate(disk_info):
8313
      disk_index = idx + base_index
8314
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8315
                              size=disk[constants.IDISK_SIZE],
8316
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8317
                                          disk[constants.IDISK_ADOPT]),
8318
                              iv_name="disk/%d" % disk_index,
8319
                              mode=disk[constants.IDISK_MODE],
8320
                              params=ld_params[0])
8321
      disks.append(disk_dev)
8322

    
8323
  else:
8324
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8325
  return disks
8326

    
8327

    
8328
def _GetInstanceInfoText(instance):
8329
  """Compute that text that should be added to the disk's metadata.
8330

8331
  """
8332
  return "originstname+%s" % instance.name
8333

    
8334

    
8335
def _CalcEta(time_taken, written, total_size):
8336
  """Calculates the ETA based on size written and total size.
8337

8338
  @param time_taken: The time taken so far
8339
  @param written: amount written so far
8340
  @param total_size: The total size of data to be written
8341
  @return: The remaining time in seconds
8342

8343
  """
8344
  avg_time = time_taken / float(written)
8345
  return (total_size - written) * avg_time
8346

    
8347

    
8348
def _WipeDisks(lu, instance):
8349
  """Wipes instance disks.
8350

8351
  @type lu: L{LogicalUnit}
8352
  @param lu: the logical unit on whose behalf we execute
8353
  @type instance: L{objects.Instance}
8354
  @param instance: the instance whose disks we should create
8355
  @return: the success of the wipe
8356

8357
  """
8358
  node = instance.primary_node
8359

    
8360
  for device in instance.disks:
8361
    lu.cfg.SetDiskID(device, node)
8362

    
8363
  logging.info("Pause sync of instance %s disks", instance.name)
8364
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8365

    
8366
  for idx, success in enumerate(result.payload):
8367
    if not success:
8368
      logging.warn("pause-sync of instance %s for disks %d failed",
8369
                   instance.name, idx)
8370

    
8371
  try:
8372
    for idx, device in enumerate(instance.disks):
8373
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8374
      # MAX_WIPE_CHUNK at max
8375
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8376
                            constants.MIN_WIPE_CHUNK_PERCENT)
8377
      # we _must_ make this an int, otherwise rounding errors will
8378
      # occur
8379
      wipe_chunk_size = int(wipe_chunk_size)
8380

    
8381
      lu.LogInfo("* Wiping disk %d", idx)
8382
      logging.info("Wiping disk %d for instance %s, node %s using"
8383
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8384

    
8385
      offset = 0
8386
      size = device.size
8387
      last_output = 0
8388
      start_time = time.time()
8389

    
8390
      while offset < size:
8391
        wipe_size = min(wipe_chunk_size, size - offset)
8392
        logging.debug("Wiping disk %d, offset %s, chunk %s",
8393
                      idx, offset, wipe_size)
8394
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8395
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8396
                     (idx, offset, wipe_size))
8397
        now = time.time()
8398
        offset += wipe_size
8399
        if now - last_output >= 60:
8400
          eta = _CalcEta(now - start_time, offset, size)
8401
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8402
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8403
          last_output = now
8404
  finally:
8405
    logging.info("Resume sync of instance %s disks", instance.name)
8406

    
8407
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8408

    
8409
    for idx, success in enumerate(result.payload):
8410
      if not success:
8411
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8412
                      " look at the status and troubleshoot the issue", idx)
8413
        logging.warn("resume-sync of instance %s for disks %d failed",
8414
                     instance.name, idx)
8415

    
8416

    
8417
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8418
  """Create all disks for an instance.
8419

8420
  This abstracts away some work from AddInstance.
8421

8422
  @type lu: L{LogicalUnit}
8423
  @param lu: the logical unit on whose behalf we execute
8424
  @type instance: L{objects.Instance}
8425
  @param instance: the instance whose disks we should create
8426
  @type to_skip: list
8427
  @param to_skip: list of indices to skip
8428
  @type target_node: string
8429
  @param target_node: if passed, overrides the target node for creation
8430
  @rtype: boolean
8431
  @return: the success of the creation
8432

8433
  """
8434
  info = _GetInstanceInfoText(instance)
8435
  if target_node is None:
8436
    pnode = instance.primary_node
8437
    all_nodes = instance.all_nodes
8438
  else:
8439
    pnode = target_node
8440
    all_nodes = [pnode]
8441

    
8442
  if instance.disk_template in constants.DTS_FILEBASED:
8443
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8444
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8445

    
8446
    result.Raise("Failed to create directory '%s' on"
8447
                 " node %s" % (file_storage_dir, pnode))
8448

    
8449
  # Note: this needs to be kept in sync with adding of disks in
8450
  # LUInstanceSetParams
8451
  for idx, device in enumerate(instance.disks):
8452
    if to_skip and idx in to_skip:
8453
      continue
8454
    logging.info("Creating volume %s for instance %s",
8455
                 device.iv_name, instance.name)
8456
    #HARDCODE
8457
    for node in all_nodes:
8458
      f_create = node == pnode
8459
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8460

    
8461

    
8462
def _RemoveDisks(lu, instance, target_node=None):
8463
  """Remove all disks for an instance.
8464

8465
  This abstracts away some work from `AddInstance()` and
8466
  `RemoveInstance()`. Note that in case some of the devices couldn't
8467
  be removed, the removal will continue with the other ones (compare
8468
  with `_CreateDisks()`).
8469

8470
  @type lu: L{LogicalUnit}
8471
  @param lu: the logical unit on whose behalf we execute
8472
  @type instance: L{objects.Instance}
8473
  @param instance: the instance whose disks we should remove
8474
  @type target_node: string
8475
  @param target_node: used to override the node on which to remove the disks
8476
  @rtype: boolean
8477
  @return: the success of the removal
8478

8479
  """
8480
  logging.info("Removing block devices for instance %s", instance.name)
8481

    
8482
  all_result = True
8483
  for device in instance.disks:
8484
    if target_node:
8485
      edata = [(target_node, device)]
8486
    else:
8487
      edata = device.ComputeNodeTree(instance.primary_node)
8488
    for node, disk in edata:
8489
      lu.cfg.SetDiskID(disk, node)
8490
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8491
      if msg:
8492
        lu.LogWarning("Could not remove block device %s on node %s,"
8493
                      " continuing anyway: %s", device.iv_name, node, msg)
8494
        all_result = False
8495

    
8496
    # if this is a DRBD disk, return its port to the pool
8497
    if device.dev_type in constants.LDS_DRBD:
8498
      tcp_port = device.logical_id[2]
8499
      lu.cfg.AddTcpUdpPort(tcp_port)
8500

    
8501
  if instance.disk_template == constants.DT_FILE:
8502
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8503
    if target_node:
8504
      tgt = target_node
8505
    else:
8506
      tgt = instance.primary_node
8507
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8508
    if result.fail_msg:
8509
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8510
                    file_storage_dir, instance.primary_node, result.fail_msg)
8511
      all_result = False
8512

    
8513
  return all_result
8514

    
8515

    
8516
def _ComputeDiskSizePerVG(disk_template, disks):
8517
  """Compute disk size requirements in the volume group
8518

8519
  """
8520
  def _compute(disks, payload):
8521
    """Universal algorithm.
8522

8523
    """
8524
    vgs = {}
8525
    for disk in disks:
8526
      vgs[disk[constants.IDISK_VG]] = \
8527
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8528

    
8529
    return vgs
8530

    
8531
  # Required free disk space as a function of disk and swap space
8532
  req_size_dict = {
8533
    constants.DT_DISKLESS: {},
8534
    constants.DT_PLAIN: _compute(disks, 0),
8535
    # 128 MB are added for drbd metadata for each disk
8536
    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8537
    constants.DT_FILE: {},
8538
    constants.DT_SHARED_FILE: {},
8539
  }
8540

    
8541
  if disk_template not in req_size_dict:
8542
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8543
                                 " is unknown" % disk_template)
8544

    
8545
  return req_size_dict[disk_template]
8546

    
8547

    
8548
def _ComputeDiskSize(disk_template, disks):
8549
  """Compute disk size requirements in the volume group
8550

8551
  """
8552
  # Required free disk space as a function of disk and swap space
8553
  req_size_dict = {
8554
    constants.DT_DISKLESS: None,
8555
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8556
    # 128 MB are added for drbd metadata for each disk
8557
    constants.DT_DRBD8:
8558
      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8559
    constants.DT_FILE: None,
8560
    constants.DT_SHARED_FILE: 0,
8561
    constants.DT_BLOCK: 0,
8562
  }
8563

    
8564
  if disk_template not in req_size_dict:
8565
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8566
                                 " is unknown" % disk_template)
8567

    
8568
  return req_size_dict[disk_template]
8569

    
8570

    
8571
def _FilterVmNodes(lu, nodenames):
8572
  """Filters out non-vm_capable nodes from a list.
8573

8574
  @type lu: L{LogicalUnit}
8575
  @param lu: the logical unit for which we check
8576
  @type nodenames: list
8577
  @param nodenames: the list of nodes on which we should check
8578
  @rtype: list
8579
  @return: the list of vm-capable nodes
8580

8581
  """
8582
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8583
  return [name for name in nodenames if name not in vm_nodes]
8584

    
8585

    
8586
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8587
  """Hypervisor parameter validation.
8588

8589
  This function abstract the hypervisor parameter validation to be
8590
  used in both instance create and instance modify.
8591

8592
  @type lu: L{LogicalUnit}
8593
  @param lu: the logical unit for which we check
8594
  @type nodenames: list
8595
  @param nodenames: the list of nodes on which we should check
8596
  @type hvname: string
8597
  @param hvname: the name of the hypervisor we should use
8598
  @type hvparams: dict
8599
  @param hvparams: the parameters which we need to check
8600
  @raise errors.OpPrereqError: if the parameters are not valid
8601

8602
  """
8603
  nodenames = _FilterVmNodes(lu, nodenames)
8604

    
8605
  cluster = lu.cfg.GetClusterInfo()
8606
  hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8607

    
8608
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8609
  for node in nodenames:
8610
    info = hvinfo[node]
8611
    if info.offline:
8612
      continue
8613
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8614

    
8615

    
8616
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8617
  """OS parameters validation.
8618

8619
  @type lu: L{LogicalUnit}
8620
  @param lu: the logical unit for which we check
8621
  @type required: boolean
8622
  @param required: whether the validation should fail if the OS is not
8623
      found
8624
  @type nodenames: list
8625
  @param nodenames: the list of nodes on which we should check
8626
  @type osname: string
8627
  @param osname: the name of the hypervisor we should use
8628
  @type osparams: dict
8629
  @param osparams: the parameters which we need to check
8630
  @raise errors.OpPrereqError: if the parameters are not valid
8631

8632
  """
8633
  nodenames = _FilterVmNodes(lu, nodenames)
8634
  result = lu.rpc.call_os_validate(nodenames, required, osname,
8635
                                   [constants.OS_VALIDATE_PARAMETERS],
8636
                                   osparams)
8637
  for node, nres in result.items():
8638
    # we don't check for offline cases since this should be run only
8639
    # against the master node and/or an instance's nodes
8640
    nres.Raise("OS Parameters validation failed on node %s" % node)
8641
    if not nres.payload:
8642
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8643
                 osname, node)
8644

    
8645

    
8646
class LUInstanceCreate(LogicalUnit):
8647
  """Create an instance.
8648

8649
  """
8650
  HPATH = "instance-add"
8651
  HTYPE = constants.HTYPE_INSTANCE
8652
  REQ_BGL = False
8653

    
8654
  def CheckArguments(self):
8655
    """Check arguments.
8656

8657
    """
8658
    # do not require name_check to ease forward/backward compatibility
8659
    # for tools
8660
    if self.op.no_install and self.op.start:
8661
      self.LogInfo("No-installation mode selected, disabling startup")
8662
      self.op.start = False
8663
    # validate/normalize the instance name
8664
    self.op.instance_name = \
8665
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8666

    
8667
    if self.op.ip_check and not self.op.name_check:
8668
      # TODO: make the ip check more flexible and not depend on the name check
8669
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8670
                                 " check", errors.ECODE_INVAL)
8671

    
8672
    # check nics' parameter names
8673
    for nic in self.op.nics:
8674
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8675

    
8676
    # check disks. parameter names and consistent adopt/no-adopt strategy
8677
    has_adopt = has_no_adopt = False
8678
    for disk in self.op.disks:
8679
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8680
      if constants.IDISK_ADOPT in disk:
8681
        has_adopt = True
8682
      else:
8683
        has_no_adopt = True
8684
    if has_adopt and has_no_adopt:
8685
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8686
                                 errors.ECODE_INVAL)
8687
    if has_adopt:
8688
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8689
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8690
                                   " '%s' disk template" %
8691
                                   self.op.disk_template,
8692
                                   errors.ECODE_INVAL)
8693
      if self.op.iallocator is not None:
8694
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8695
                                   " iallocator script", errors.ECODE_INVAL)
8696
      if self.op.mode == constants.INSTANCE_IMPORT:
8697
        raise errors.OpPrereqError("Disk adoption not allowed for"
8698
                                   " instance import", errors.ECODE_INVAL)
8699
    else:
8700
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8701
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8702
                                   " but no 'adopt' parameter given" %
8703
                                   self.op.disk_template,
8704
                                   errors.ECODE_INVAL)
8705

    
8706
    self.adopt_disks = has_adopt
8707

    
8708
    # instance name verification
8709
    if self.op.name_check:
8710
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8711
      self.op.instance_name = self.hostname1.name
8712
      # used in CheckPrereq for ip ping check
8713
      self.check_ip = self.hostname1.ip
8714
    else:
8715
      self.check_ip = None
8716

    
8717
    # file storage checks
8718
    if (self.op.file_driver and
8719
        not self.op.file_driver in constants.FILE_DRIVER):
8720
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8721
                                 self.op.file_driver, errors.ECODE_INVAL)
8722

    
8723
    if self.op.disk_template == constants.DT_FILE:
8724
      opcodes.RequireFileStorage()
8725
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8726
      opcodes.RequireSharedFileStorage()
8727

    
8728
    ### Node/iallocator related checks
8729
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8730

    
8731
    if self.op.pnode is not None:
8732
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8733
        if self.op.snode is None:
8734
          raise errors.OpPrereqError("The networked disk templates need"
8735
                                     " a mirror node", errors.ECODE_INVAL)
8736
      elif self.op.snode:
8737
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8738
                        " template")
8739
        self.op.snode = None
8740

    
8741
    self._cds = _GetClusterDomainSecret()
8742

    
8743
    if self.op.mode == constants.INSTANCE_IMPORT:
8744
      # On import force_variant must be True, because if we forced it at
8745
      # initial install, our only chance when importing it back is that it
8746
      # works again!
8747
      self.op.force_variant = True
8748

    
8749
      if self.op.no_install:
8750
        self.LogInfo("No-installation mode has no effect during import")
8751

    
8752
    elif self.op.mode == constants.INSTANCE_CREATE:
8753
      if self.op.os_type is None:
8754
        raise errors.OpPrereqError("No guest OS specified",
8755
                                   errors.ECODE_INVAL)
8756
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8757
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8758
                                   " installation" % self.op.os_type,
8759
                                   errors.ECODE_STATE)
8760
      if self.op.disk_template is None:
8761
        raise errors.OpPrereqError("No disk template specified",
8762
                                   errors.ECODE_INVAL)
8763

    
8764
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8765
      # Check handshake to ensure both clusters have the same domain secret
8766
      src_handshake = self.op.source_handshake
8767
      if not src_handshake:
8768
        raise errors.OpPrereqError("Missing source handshake",
8769
                                   errors.ECODE_INVAL)
8770

    
8771
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8772
                                                           src_handshake)
8773
      if errmsg:
8774
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8775
                                   errors.ECODE_INVAL)
8776

    
8777
      # Load and check source CA
8778
      self.source_x509_ca_pem = self.op.source_x509_ca
8779
      if not self.source_x509_ca_pem:
8780
        raise errors.OpPrereqError("Missing source X509 CA",
8781
                                   errors.ECODE_INVAL)
8782

    
8783
      try:
8784
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8785
                                                    self._cds)
8786
      except OpenSSL.crypto.Error, err:
8787
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8788
                                   (err, ), errors.ECODE_INVAL)
8789

    
8790
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8791
      if errcode is not None:
8792
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8793
                                   errors.ECODE_INVAL)
8794

    
8795
      self.source_x509_ca = cert
8796

    
8797
      src_instance_name = self.op.source_instance_name
8798
      if not src_instance_name:
8799
        raise errors.OpPrereqError("Missing source instance name",
8800
                                   errors.ECODE_INVAL)
8801

    
8802
      self.source_instance_name = \
8803
          netutils.GetHostname(name=src_instance_name).name
8804

    
8805
    else:
8806
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8807
                                 self.op.mode, errors.ECODE_INVAL)
8808

    
8809
  def ExpandNames(self):
8810
    """ExpandNames for CreateInstance.
8811

8812
    Figure out the right locks for instance creation.
8813

8814
    """
8815
    self.needed_locks = {}
8816

    
8817
    instance_name = self.op.instance_name
8818
    # this is just a preventive check, but someone might still add this
8819
    # instance in the meantime, and creation will fail at lock-add time
8820
    if instance_name in self.cfg.GetInstanceList():
8821
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8822
                                 instance_name, errors.ECODE_EXISTS)
8823

    
8824
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8825

    
8826
    if self.op.iallocator:
8827
      # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8828
      # specifying a group on instance creation and then selecting nodes from
8829
      # that group
8830
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8831
      self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8832
    else:
8833
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8834
      nodelist = [self.op.pnode]
8835
      if self.op.snode is not None:
8836
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8837
        nodelist.append(self.op.snode)
8838
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8839
      # Lock resources of instance's primary and secondary nodes (copy to
8840
      # prevent accidential modification)
8841
      self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8842

    
8843
    # in case of import lock the source node too
8844
    if self.op.mode == constants.INSTANCE_IMPORT:
8845
      src_node = self.op.src_node
8846
      src_path = self.op.src_path
8847

    
8848
      if src_path is None:
8849
        self.op.src_path = src_path = self.op.instance_name
8850

    
8851
      if src_node is None:
8852
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8853
        self.op.src_node = None
8854
        if os.path.isabs(src_path):
8855
          raise errors.OpPrereqError("Importing an instance from a path"
8856
                                     " requires a source node option",
8857
                                     errors.ECODE_INVAL)
8858
      else:
8859
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8860
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8861
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8862
        if not os.path.isabs(src_path):
8863
          self.op.src_path = src_path = \
8864
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8865

    
8866
  def _RunAllocator(self):
8867
    """Run the allocator based on input opcode.
8868

8869
    """
8870
    nics = [n.ToDict() for n in self.nics]
8871
    ial = IAllocator(self.cfg, self.rpc,
8872
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8873
                     name=self.op.instance_name,
8874
                     disk_template=self.op.disk_template,
8875
                     tags=self.op.tags,
8876
                     os=self.op.os_type,
8877
                     vcpus=self.be_full[constants.BE_VCPUS],
8878
                     memory=self.be_full[constants.BE_MAXMEM],
8879
                     disks=self.disks,
8880
                     nics=nics,
8881
                     hypervisor=self.op.hypervisor,
8882
                     )
8883

    
8884
    ial.Run(self.op.iallocator)
8885

    
8886
    if not ial.success:
8887
      raise errors.OpPrereqError("Can't compute nodes using"
8888
                                 " iallocator '%s': %s" %
8889
                                 (self.op.iallocator, ial.info),
8890
                                 errors.ECODE_NORES)
8891
    if len(ial.result) != ial.required_nodes:
8892
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8893
                                 " of nodes (%s), required %s" %
8894
                                 (self.op.iallocator, len(ial.result),
8895
                                  ial.required_nodes), errors.ECODE_FAULT)
8896
    self.op.pnode = ial.result[0]
8897
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8898
                 self.op.instance_name, self.op.iallocator,
8899
                 utils.CommaJoin(ial.result))
8900
    if ial.required_nodes == 2:
8901
      self.op.snode = ial.result[1]
8902

    
8903
  def BuildHooksEnv(self):
8904
    """Build hooks env.
8905

8906
    This runs on master, primary and secondary nodes of the instance.
8907

8908
    """
8909
    env = {
8910
      "ADD_MODE": self.op.mode,
8911
      }
8912
    if self.op.mode == constants.INSTANCE_IMPORT:
8913
      env["SRC_NODE"] = self.op.src_node
8914
      env["SRC_PATH"] = self.op.src_path
8915
      env["SRC_IMAGES"] = self.src_images
8916

    
8917
    env.update(_BuildInstanceHookEnv(
8918
      name=self.op.instance_name,
8919
      primary_node=self.op.pnode,
8920
      secondary_nodes=self.secondaries,
8921
      status=self.op.start,
8922
      os_type=self.op.os_type,
8923
      minmem=self.be_full[constants.BE_MINMEM],
8924
      maxmem=self.be_full[constants.BE_MAXMEM],
8925
      vcpus=self.be_full[constants.BE_VCPUS],
8926
      nics=_NICListToTuple(self, self.nics),
8927
      disk_template=self.op.disk_template,
8928
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8929
             for d in self.disks],
8930
      bep=self.be_full,
8931
      hvp=self.hv_full,
8932
      hypervisor_name=self.op.hypervisor,
8933
      tags=self.op.tags,
8934
    ))
8935

    
8936
    return env
8937

    
8938
  def BuildHooksNodes(self):
8939
    """Build hooks nodes.
8940

8941
    """
8942
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8943
    return nl, nl
8944

    
8945
  def _ReadExportInfo(self):
8946
    """Reads the export information from disk.
8947

8948
    It will override the opcode source node and path with the actual
8949
    information, if these two were not specified before.
8950

8951
    @return: the export information
8952

8953
    """
8954
    assert self.op.mode == constants.INSTANCE_IMPORT
8955

    
8956
    src_node = self.op.src_node
8957
    src_path = self.op.src_path
8958

    
8959
    if src_node is None:
8960
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8961
      exp_list = self.rpc.call_export_list(locked_nodes)
8962
      found = False
8963
      for node in exp_list:
8964
        if exp_list[node].fail_msg:
8965
          continue
8966
        if src_path in exp_list[node].payload:
8967
          found = True
8968
          self.op.src_node = src_node = node
8969
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8970
                                                       src_path)
8971
          break
8972
      if not found:
8973
        raise errors.OpPrereqError("No export found for relative path %s" %
8974
                                    src_path, errors.ECODE_INVAL)
8975

    
8976
    _CheckNodeOnline(self, src_node)
8977
    result = self.rpc.call_export_info(src_node, src_path)
8978
    result.Raise("No export or invalid export found in dir %s" % src_path)
8979

    
8980
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8981
    if not export_info.has_section(constants.INISECT_EXP):
8982
      raise errors.ProgrammerError("Corrupted export config",
8983
                                   errors.ECODE_ENVIRON)
8984

    
8985
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8986
    if (int(ei_version) != constants.EXPORT_VERSION):
8987
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8988
                                 (ei_version, constants.EXPORT_VERSION),
8989
                                 errors.ECODE_ENVIRON)
8990
    return export_info
8991

    
8992
  def _ReadExportParams(self, einfo):
8993
    """Use export parameters as defaults.
8994

8995
    In case the opcode doesn't specify (as in override) some instance
8996
    parameters, then try to use them from the export information, if
8997
    that declares them.
8998

8999
    """
9000
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9001

    
9002
    if self.op.disk_template is None:
9003
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
9004
        self.op.disk_template = einfo.get(constants.INISECT_INS,
9005
                                          "disk_template")
9006
        if self.op.disk_template not in constants.DISK_TEMPLATES:
9007
          raise errors.OpPrereqError("Disk template specified in configuration"
9008
                                     " file is not one of the allowed values:"
9009
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
9010
      else:
9011
        raise errors.OpPrereqError("No disk template specified and the export"
9012
                                   " is missing the disk_template information",
9013
                                   errors.ECODE_INVAL)
9014

    
9015
    if not self.op.disks:
9016
      disks = []
9017
      # TODO: import the disk iv_name too
9018
      for idx in range(constants.MAX_DISKS):
9019
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9020
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9021
          disks.append({constants.IDISK_SIZE: disk_sz})
9022
      self.op.disks = disks
9023
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
9024
        raise errors.OpPrereqError("No disk info specified and the export"
9025
                                   " is missing the disk information",
9026
                                   errors.ECODE_INVAL)
9027

    
9028
    if not self.op.nics:
9029
      nics = []
9030
      for idx in range(constants.MAX_NICS):
9031
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9032
          ndict = {}
9033
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9034
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9035
            ndict[name] = v
9036
          nics.append(ndict)
9037
        else:
9038
          break
9039
      self.op.nics = nics
9040

    
9041
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9042
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9043

    
9044
    if (self.op.hypervisor is None and
9045
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
9046
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9047

    
9048
    if einfo.has_section(constants.INISECT_HYP):
9049
      # use the export parameters but do not override the ones
9050
      # specified by the user
9051
      for name, value in einfo.items(constants.INISECT_HYP):
9052
        if name not in self.op.hvparams:
9053
          self.op.hvparams[name] = value
9054

    
9055
    if einfo.has_section(constants.INISECT_BEP):
9056
      # use the parameters, without overriding
9057
      for name, value in einfo.items(constants.INISECT_BEP):
9058
        if name not in self.op.beparams:
9059
          self.op.beparams[name] = value
9060
        # Compatibility for the old "memory" be param
9061
        if name == constants.BE_MEMORY:
9062
          if constants.BE_MAXMEM not in self.op.beparams:
9063
            self.op.beparams[constants.BE_MAXMEM] = value
9064
          if constants.BE_MINMEM not in self.op.beparams:
9065
            self.op.beparams[constants.BE_MINMEM] = value
9066
    else:
9067
      # try to read the parameters old style, from the main section
9068
      for name in constants.BES_PARAMETERS:
9069
        if (name not in self.op.beparams and
9070
            einfo.has_option(constants.INISECT_INS, name)):
9071
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9072

    
9073
    if einfo.has_section(constants.INISECT_OSP):
9074
      # use the parameters, without overriding
9075
      for name, value in einfo.items(constants.INISECT_OSP):
9076
        if name not in self.op.osparams:
9077
          self.op.osparams[name] = value
9078

    
9079
  def _RevertToDefaults(self, cluster):
9080
    """Revert the instance parameters to the default values.
9081

9082
    """
9083
    # hvparams
9084
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9085
    for name in self.op.hvparams.keys():
9086
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9087
        del self.op.hvparams[name]
9088
    # beparams
9089
    be_defs = cluster.SimpleFillBE({})
9090
    for name in self.op.beparams.keys():
9091
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
9092
        del self.op.beparams[name]
9093
    # nic params
9094
    nic_defs = cluster.SimpleFillNIC({})
9095
    for nic in self.op.nics:
9096
      for name in constants.NICS_PARAMETERS:
9097
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9098
          del nic[name]
9099
    # osparams
9100
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9101
    for name in self.op.osparams.keys():
9102
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
9103
        del self.op.osparams[name]
9104

    
9105
  def _CalculateFileStorageDir(self):
9106
    """Calculate final instance file storage dir.
9107

9108
    """
9109
    # file storage dir calculation/check
9110
    self.instance_file_storage_dir = None
9111
    if self.op.disk_template in constants.DTS_FILEBASED:
9112
      # build the full file storage dir path
9113
      joinargs = []
9114

    
9115
      if self.op.disk_template == constants.DT_SHARED_FILE:
9116
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
9117
      else:
9118
        get_fsd_fn = self.cfg.GetFileStorageDir
9119

    
9120
      cfg_storagedir = get_fsd_fn()
9121
      if not cfg_storagedir:
9122
        raise errors.OpPrereqError("Cluster file storage dir not defined")
9123
      joinargs.append(cfg_storagedir)
9124

    
9125
      if self.op.file_storage_dir is not None:
9126
        joinargs.append(self.op.file_storage_dir)
9127

    
9128
      joinargs.append(self.op.instance_name)
9129

    
9130
      # pylint: disable=W0142
9131
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9132

    
9133
  def CheckPrereq(self):
9134
    """Check prerequisites.
9135

9136
    """
9137
    self._CalculateFileStorageDir()
9138

    
9139
    if self.op.mode == constants.INSTANCE_IMPORT:
9140
      export_info = self._ReadExportInfo()
9141
      self._ReadExportParams(export_info)
9142

    
9143
    if (not self.cfg.GetVGName() and
9144
        self.op.disk_template not in constants.DTS_NOT_LVM):
9145
      raise errors.OpPrereqError("Cluster does not support lvm-based"
9146
                                 " instances", errors.ECODE_STATE)
9147

    
9148
    if (self.op.hypervisor is None or
9149
        self.op.hypervisor == constants.VALUE_AUTO):
9150
      self.op.hypervisor = self.cfg.GetHypervisorType()
9151

    
9152
    cluster = self.cfg.GetClusterInfo()
9153
    enabled_hvs = cluster.enabled_hypervisors
9154
    if self.op.hypervisor not in enabled_hvs:
9155
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9156
                                 " cluster (%s)" % (self.op.hypervisor,
9157
                                  ",".join(enabled_hvs)),
9158
                                 errors.ECODE_STATE)
9159

    
9160
    # Check tag validity
9161
    for tag in self.op.tags:
9162
      objects.TaggableObject.ValidateTag(tag)
9163

    
9164
    # check hypervisor parameter syntax (locally)
9165
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9166
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9167
                                      self.op.hvparams)
9168
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9169
    hv_type.CheckParameterSyntax(filled_hvp)
9170
    self.hv_full = filled_hvp
9171
    # check that we don't specify global parameters on an instance
9172
    _CheckGlobalHvParams(self.op.hvparams)
9173

    
9174
    # fill and remember the beparams dict
9175
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
9176
    for param, value in self.op.beparams.iteritems():
9177
      if value == constants.VALUE_AUTO:
9178
        self.op.beparams[param] = default_beparams[param]
9179
    objects.UpgradeBeParams(self.op.beparams)
9180
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9181
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
9182

    
9183
    # build os parameters
9184
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9185

    
9186
    # now that hvp/bep are in final format, let's reset to defaults,
9187
    # if told to do so
9188
    if self.op.identify_defaults:
9189
      self._RevertToDefaults(cluster)
9190

    
9191
    # NIC buildup
9192
    self.nics = []
9193
    for idx, nic in enumerate(self.op.nics):
9194
      nic_mode_req = nic.get(constants.INIC_MODE, None)
9195
      nic_mode = nic_mode_req
9196
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9197
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9198

    
9199
      # in routed mode, for the first nic, the default ip is 'auto'
9200
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9201
        default_ip_mode = constants.VALUE_AUTO
9202
      else:
9203
        default_ip_mode = constants.VALUE_NONE
9204

    
9205
      # ip validity checks
9206
      ip = nic.get(constants.INIC_IP, default_ip_mode)
9207
      if ip is None or ip.lower() == constants.VALUE_NONE:
9208
        nic_ip = None
9209
      elif ip.lower() == constants.VALUE_AUTO:
9210
        if not self.op.name_check:
9211
          raise errors.OpPrereqError("IP address set to auto but name checks"
9212
                                     " have been skipped",
9213
                                     errors.ECODE_INVAL)
9214
        nic_ip = self.hostname1.ip
9215
      else:
9216
        if not netutils.IPAddress.IsValid(ip):
9217
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9218
                                     errors.ECODE_INVAL)
9219
        nic_ip = ip
9220

    
9221
      # TODO: check the ip address for uniqueness
9222
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9223
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
9224
                                   errors.ECODE_INVAL)
9225

    
9226
      # MAC address verification
9227
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9228
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9229
        mac = utils.NormalizeAndValidateMac(mac)
9230

    
9231
        try:
9232
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
9233
        except errors.ReservationError:
9234
          raise errors.OpPrereqError("MAC address %s already in use"
9235
                                     " in cluster" % mac,
9236
                                     errors.ECODE_NOTUNIQUE)
9237

    
9238
      #  Build nic parameters
9239
      link = nic.get(constants.INIC_LINK, None)
9240
      if link == constants.VALUE_AUTO:
9241
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9242
      nicparams = {}
9243
      if nic_mode_req:
9244
        nicparams[constants.NIC_MODE] = nic_mode
9245
      if link:
9246
        nicparams[constants.NIC_LINK] = link
9247

    
9248
      check_params = cluster.SimpleFillNIC(nicparams)
9249
      objects.NIC.CheckParameterSyntax(check_params)
9250
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9251

    
9252
    # disk checks/pre-build
9253
    default_vg = self.cfg.GetVGName()
9254
    self.disks = []
9255
    for disk in self.op.disks:
9256
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9257
      if mode not in constants.DISK_ACCESS_SET:
9258
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9259
                                   mode, errors.ECODE_INVAL)
9260
      size = disk.get(constants.IDISK_SIZE, None)
9261
      if size is None:
9262
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9263
      try:
9264
        size = int(size)
9265
      except (TypeError, ValueError):
9266
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9267
                                   errors.ECODE_INVAL)
9268

    
9269
      data_vg = disk.get(constants.IDISK_VG, default_vg)
9270
      new_disk = {
9271
        constants.IDISK_SIZE: size,
9272
        constants.IDISK_MODE: mode,
9273
        constants.IDISK_VG: data_vg,
9274
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
9275
        }
9276
      if constants.IDISK_ADOPT in disk:
9277
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9278
      self.disks.append(new_disk)
9279

    
9280
    if self.op.mode == constants.INSTANCE_IMPORT:
9281
      disk_images = []
9282
      for idx in range(len(self.disks)):
9283
        option = "disk%d_dump" % idx
9284
        if export_info.has_option(constants.INISECT_INS, option):
9285
          # FIXME: are the old os-es, disk sizes, etc. useful?
9286
          export_name = export_info.get(constants.INISECT_INS, option)
9287
          image = utils.PathJoin(self.op.src_path, export_name)
9288
          disk_images.append(image)
9289
        else:
9290
          disk_images.append(False)
9291

    
9292
      self.src_images = disk_images
9293

    
9294
      old_name = export_info.get(constants.INISECT_INS, "name")
9295
      if self.op.instance_name == old_name:
9296
        for idx, nic in enumerate(self.nics):
9297
          if nic.mac == constants.VALUE_AUTO:
9298
            nic_mac_ini = "nic%d_mac" % idx
9299
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9300

    
9301
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9302

    
9303
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
9304
    if self.op.ip_check:
9305
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9306
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
9307
                                   (self.check_ip, self.op.instance_name),
9308
                                   errors.ECODE_NOTUNIQUE)
9309

    
9310
    #### mac address generation
9311
    # By generating here the mac address both the allocator and the hooks get
9312
    # the real final mac address rather than the 'auto' or 'generate' value.
9313
    # There is a race condition between the generation and the instance object
9314
    # creation, which means that we know the mac is valid now, but we're not
9315
    # sure it will be when we actually add the instance. If things go bad
9316
    # adding the instance will abort because of a duplicate mac, and the
9317
    # creation job will fail.
9318
    for nic in self.nics:
9319
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9320
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9321

    
9322
    #### allocator run
9323

    
9324
    if self.op.iallocator is not None:
9325
      self._RunAllocator()
9326

    
9327
    # Release all unneeded node locks
9328
    _ReleaseLocks(self, locking.LEVEL_NODE,
9329
                  keep=filter(None, [self.op.pnode, self.op.snode,
9330
                                     self.op.src_node]))
9331

    
9332
    #### node related checks
9333

    
9334
    # check primary node
9335
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9336
    assert self.pnode is not None, \
9337
      "Cannot retrieve locked node %s" % self.op.pnode
9338
    if pnode.offline:
9339
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9340
                                 pnode.name, errors.ECODE_STATE)
9341
    if pnode.drained:
9342
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9343
                                 pnode.name, errors.ECODE_STATE)
9344
    if not pnode.vm_capable:
9345
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9346
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
9347

    
9348
    self.secondaries = []
9349

    
9350
    # mirror node verification
9351
    if self.op.disk_template in constants.DTS_INT_MIRROR:
9352
      if self.op.snode == pnode.name:
9353
        raise errors.OpPrereqError("The secondary node cannot be the"
9354
                                   " primary node", errors.ECODE_INVAL)
9355
      _CheckNodeOnline(self, self.op.snode)
9356
      _CheckNodeNotDrained(self, self.op.snode)
9357
      _CheckNodeVmCapable(self, self.op.snode)
9358
      self.secondaries.append(self.op.snode)
9359

    
9360
      snode = self.cfg.GetNodeInfo(self.op.snode)
9361
      if pnode.group != snode.group:
9362
        self.LogWarning("The primary and secondary nodes are in two"
9363
                        " different node groups; the disk parameters"
9364
                        " from the first disk's node group will be"
9365
                        " used")
9366

    
9367
    nodenames = [pnode.name] + self.secondaries
9368

    
9369
    # disk parameters (not customizable at instance or node level)
9370
    # just use the primary node parameters, ignoring the secondary.
9371
    self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9372

    
9373
    if not self.adopt_disks:
9374
      # Check lv size requirements, if not adopting
9375
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9376
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9377

    
9378
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9379
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9380
                                disk[constants.IDISK_ADOPT])
9381
                     for disk in self.disks])
9382
      if len(all_lvs) != len(self.disks):
9383
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
9384
                                   errors.ECODE_INVAL)
9385
      for lv_name in all_lvs:
9386
        try:
9387
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9388
          # to ReserveLV uses the same syntax
9389
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9390
        except errors.ReservationError:
9391
          raise errors.OpPrereqError("LV named %s used by another instance" %
9392
                                     lv_name, errors.ECODE_NOTUNIQUE)
9393

    
9394
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9395
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9396

    
9397
      node_lvs = self.rpc.call_lv_list([pnode.name],
9398
                                       vg_names.payload.keys())[pnode.name]
9399
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9400
      node_lvs = node_lvs.payload
9401

    
9402
      delta = all_lvs.difference(node_lvs.keys())
9403
      if delta:
9404
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
9405
                                   utils.CommaJoin(delta),
9406
                                   errors.ECODE_INVAL)
9407
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9408
      if online_lvs:
9409
        raise errors.OpPrereqError("Online logical volumes found, cannot"
9410
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
9411
                                   errors.ECODE_STATE)
9412
      # update the size of disk based on what is found
9413
      for dsk in self.disks:
9414
        dsk[constants.IDISK_SIZE] = \
9415
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9416
                                        dsk[constants.IDISK_ADOPT])][0]))
9417

    
9418
    elif self.op.disk_template == constants.DT_BLOCK:
9419
      # Normalize and de-duplicate device paths
9420
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9421
                       for disk in self.disks])
9422
      if len(all_disks) != len(self.disks):
9423
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
9424
                                   errors.ECODE_INVAL)
9425
      baddisks = [d for d in all_disks
9426
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9427
      if baddisks:
9428
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9429
                                   " cannot be adopted" %
9430
                                   (", ".join(baddisks),
9431
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
9432
                                   errors.ECODE_INVAL)
9433

    
9434
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9435
                                            list(all_disks))[pnode.name]
9436
      node_disks.Raise("Cannot get block device information from node %s" %
9437
                       pnode.name)
9438
      node_disks = node_disks.payload
9439
      delta = all_disks.difference(node_disks.keys())
9440
      if delta:
9441
        raise errors.OpPrereqError("Missing block device(s): %s" %
9442
                                   utils.CommaJoin(delta),
9443
                                   errors.ECODE_INVAL)
9444
      for dsk in self.disks:
9445
        dsk[constants.IDISK_SIZE] = \
9446
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9447

    
9448
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9449

    
9450
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9451
    # check OS parameters (remotely)
9452
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9453

    
9454
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9455

    
9456
    # memory check on primary node
9457
    #TODO(dynmem): use MINMEM for checking
9458
    if self.op.start:
9459
      _CheckNodeFreeMemory(self, self.pnode.name,
9460
                           "creating instance %s" % self.op.instance_name,
9461
                           self.be_full[constants.BE_MAXMEM],
9462
                           self.op.hypervisor)
9463

    
9464
    self.dry_run_result = list(nodenames)
9465

    
9466
  def Exec(self, feedback_fn):
9467
    """Create and add the instance to the cluster.
9468

9469
    """
9470
    instance = self.op.instance_name
9471
    pnode_name = self.pnode.name
9472

    
9473
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9474
                self.owned_locks(locking.LEVEL_NODE)), \
9475
      "Node locks differ from node resource locks"
9476

    
9477
    ht_kind = self.op.hypervisor
9478
    if ht_kind in constants.HTS_REQ_PORT:
9479
      network_port = self.cfg.AllocatePort()
9480
    else:
9481
      network_port = None
9482

    
9483
    disks = _GenerateDiskTemplate(self,
9484
                                  self.op.disk_template,
9485
                                  instance, pnode_name,
9486
                                  self.secondaries,
9487
                                  self.disks,
9488
                                  self.instance_file_storage_dir,
9489
                                  self.op.file_driver,
9490
                                  0,
9491
                                  feedback_fn,
9492
                                  self.diskparams)
9493

    
9494
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9495
                            primary_node=pnode_name,
9496
                            nics=self.nics, disks=disks,
9497
                            disk_template=self.op.disk_template,
9498
                            admin_state=constants.ADMINST_DOWN,
9499
                            network_port=network_port,
9500
                            beparams=self.op.beparams,
9501
                            hvparams=self.op.hvparams,
9502
                            hypervisor=self.op.hypervisor,
9503
                            osparams=self.op.osparams,
9504
                            )
9505

    
9506
    if self.op.tags:
9507
      for tag in self.op.tags:
9508
        iobj.AddTag(tag)
9509

    
9510
    if self.adopt_disks:
9511
      if self.op.disk_template == constants.DT_PLAIN:
9512
        # rename LVs to the newly-generated names; we need to construct
9513
        # 'fake' LV disks with the old data, plus the new unique_id
9514
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9515
        rename_to = []
9516
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9517
          rename_to.append(t_dsk.logical_id)
9518
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9519
          self.cfg.SetDiskID(t_dsk, pnode_name)
9520
        result = self.rpc.call_blockdev_rename(pnode_name,
9521
                                               zip(tmp_disks, rename_to))
9522
        result.Raise("Failed to rename adoped LVs")
9523
    else:
9524
      feedback_fn("* creating instance disks...")
9525
      try:
9526
        _CreateDisks(self, iobj)
9527
      except errors.OpExecError:
9528
        self.LogWarning("Device creation failed, reverting...")
9529
        try:
9530
          _RemoveDisks(self, iobj)
9531
        finally:
9532
          self.cfg.ReleaseDRBDMinors(instance)
9533
          raise
9534

    
9535
    feedback_fn("adding instance %s to cluster config" % instance)
9536

    
9537
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9538

    
9539
    # Declare that we don't want to remove the instance lock anymore, as we've
9540
    # added the instance to the config
9541
    del self.remove_locks[locking.LEVEL_INSTANCE]
9542

    
9543
    if self.op.mode == constants.INSTANCE_IMPORT:
9544
      # Release unused nodes
9545
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9546
    else:
9547
      # Release all nodes
9548
      _ReleaseLocks(self, locking.LEVEL_NODE)
9549

    
9550
    disk_abort = False
9551
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9552
      feedback_fn("* wiping instance disks...")
9553
      try:
9554
        _WipeDisks(self, iobj)
9555
      except errors.OpExecError, err:
9556
        logging.exception("Wiping disks failed")
9557
        self.LogWarning("Wiping instance disks failed (%s)", err)
9558
        disk_abort = True
9559

    
9560
    if disk_abort:
9561
      # Something is already wrong with the disks, don't do anything else
9562
      pass
9563
    elif self.op.wait_for_sync:
9564
      disk_abort = not _WaitForSync(self, iobj)
9565
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9566
      # make sure the disks are not degraded (still sync-ing is ok)
9567
      feedback_fn("* checking mirrors status")
9568
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9569
    else:
9570
      disk_abort = False
9571

    
9572
    if disk_abort:
9573
      _RemoveDisks(self, iobj)
9574
      self.cfg.RemoveInstance(iobj.name)
9575
      # Make sure the instance lock gets removed
9576
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9577
      raise errors.OpExecError("There are some degraded disks for"
9578
                               " this instance")
9579

    
9580
    # Release all node resource locks
9581
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9582

    
9583
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9584
      if self.op.mode == constants.INSTANCE_CREATE:
9585
        if not self.op.no_install:
9586
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9587
                        not self.op.wait_for_sync)
9588
          if pause_sync:
9589
            feedback_fn("* pausing disk sync to install instance OS")
9590
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9591
                                                              iobj.disks, True)
9592
            for idx, success in enumerate(result.payload):
9593
              if not success:
9594
                logging.warn("pause-sync of instance %s for disk %d failed",
9595
                             instance, idx)
9596

    
9597
          feedback_fn("* running the instance OS create scripts...")
9598
          # FIXME: pass debug option from opcode to backend
9599
          os_add_result = \
9600
            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9601
                                          self.op.debug_level)
9602
          if pause_sync:
9603
            feedback_fn("* resuming disk sync")
9604
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9605
                                                              iobj.disks, False)
9606
            for idx, success in enumerate(result.payload):
9607
              if not success:
9608
                logging.warn("resume-sync of instance %s for disk %d failed",
9609
                             instance, idx)
9610

    
9611
          os_add_result.Raise("Could not add os for instance %s"
9612
                              " on node %s" % (instance, pnode_name))
9613

    
9614
      elif self.op.mode == constants.INSTANCE_IMPORT:
9615
        feedback_fn("* running the instance OS import scripts...")
9616

    
9617
        transfers = []
9618

    
9619
        for idx, image in enumerate(self.src_images):
9620
          if not image:
9621
            continue
9622

    
9623
          # FIXME: pass debug option from opcode to backend
9624
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9625
                                             constants.IEIO_FILE, (image, ),
9626
                                             constants.IEIO_SCRIPT,
9627
                                             (iobj.disks[idx], idx),
9628
                                             None)
9629
          transfers.append(dt)
9630

    
9631
        import_result = \
9632
          masterd.instance.TransferInstanceData(self, feedback_fn,
9633
                                                self.op.src_node, pnode_name,
9634
                                                self.pnode.secondary_ip,
9635
                                                iobj, transfers)
9636
        if not compat.all(import_result):
9637
          self.LogWarning("Some disks for instance %s on node %s were not"
9638
                          " imported successfully" % (instance, pnode_name))
9639

    
9640
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9641
        feedback_fn("* preparing remote import...")
9642
        # The source cluster will stop the instance before attempting to make a
9643
        # connection. In some cases stopping an instance can take a long time,
9644
        # hence the shutdown timeout is added to the connection timeout.
9645
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9646
                           self.op.source_shutdown_timeout)
9647
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9648

    
9649
        assert iobj.primary_node == self.pnode.name
9650
        disk_results = \
9651
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9652
                                        self.source_x509_ca,
9653
                                        self._cds, timeouts)
9654
        if not compat.all(disk_results):
9655
          # TODO: Should the instance still be started, even if some disks
9656
          # failed to import (valid for local imports, too)?
9657
          self.LogWarning("Some disks for instance %s on node %s were not"
9658
                          " imported successfully" % (instance, pnode_name))
9659

    
9660
        # Run rename script on newly imported instance
9661
        assert iobj.name == instance
9662
        feedback_fn("Running rename script for %s" % instance)
9663
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9664
                                                   self.source_instance_name,
9665
                                                   self.op.debug_level)
9666
        if result.fail_msg:
9667
          self.LogWarning("Failed to run rename script for %s on node"
9668
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9669

    
9670
      else:
9671
        # also checked in the prereq part
9672
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9673
                                     % self.op.mode)
9674

    
9675
    assert not self.owned_locks(locking.LEVEL_NODE_RES)
9676

    
9677
    if self.op.start:
9678
      iobj.admin_state = constants.ADMINST_UP
9679
      self.cfg.Update(iobj, feedback_fn)
9680
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9681
      feedback_fn("* starting instance...")
9682
      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9683
                                            False)
9684
      result.Raise("Could not start instance")
9685

    
9686
    return list(iobj.all_nodes)
9687

    
9688

    
9689
class LUInstanceConsole(NoHooksLU):
9690
  """Connect to an instance's console.
9691

9692
  This is somewhat special in that it returns the command line that
9693
  you need to run on the master node in order to connect to the
9694
  console.
9695

9696
  """
9697
  REQ_BGL = False
9698

    
9699
  def ExpandNames(self):
9700
    self.share_locks = _ShareAll()
9701
    self._ExpandAndLockInstance()
9702

    
9703
  def CheckPrereq(self):
9704
    """Check prerequisites.
9705

9706
    This checks that the instance is in the cluster.
9707

9708
    """
9709
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9710
    assert self.instance is not None, \
9711
      "Cannot retrieve locked instance %s" % self.op.instance_name
9712
    _CheckNodeOnline(self, self.instance.primary_node)
9713

    
9714
  def Exec(self, feedback_fn):
9715
    """Connect to the console of an instance
9716

9717
    """
9718
    instance = self.instance
9719
    node = instance.primary_node
9720

    
9721
    node_insts = self.rpc.call_instance_list([node],
9722
                                             [instance.hypervisor])[node]
9723
    node_insts.Raise("Can't get node information from %s" % node)
9724

    
9725
    if instance.name not in node_insts.payload:
9726
      if instance.admin_state == constants.ADMINST_UP:
9727
        state = constants.INSTST_ERRORDOWN
9728
      elif instance.admin_state == constants.ADMINST_DOWN:
9729
        state = constants.INSTST_ADMINDOWN
9730
      else:
9731
        state = constants.INSTST_ADMINOFFLINE
9732
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9733
                               (instance.name, state))
9734

    
9735
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9736

    
9737
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9738

    
9739

    
9740
def _GetInstanceConsole(cluster, instance):
9741
  """Returns console information for an instance.
9742

9743
  @type cluster: L{objects.Cluster}
9744
  @type instance: L{objects.Instance}
9745
  @rtype: dict
9746

9747
  """
9748
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9749
  # beparams and hvparams are passed separately, to avoid editing the
9750
  # instance and then saving the defaults in the instance itself.
9751
  hvparams = cluster.FillHV(instance)
9752
  beparams = cluster.FillBE(instance)
9753
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9754

    
9755
  assert console.instance == instance.name
9756
  assert console.Validate()
9757

    
9758
  return console.ToDict()
9759

    
9760

    
9761
class LUInstanceReplaceDisks(LogicalUnit):
9762
  """Replace the disks of an instance.
9763

9764
  """
9765
  HPATH = "mirrors-replace"
9766
  HTYPE = constants.HTYPE_INSTANCE
9767
  REQ_BGL = False
9768

    
9769
  def CheckArguments(self):
9770
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9771
                                  self.op.iallocator)
9772

    
9773
  def ExpandNames(self):
9774
    self._ExpandAndLockInstance()
9775

    
9776
    assert locking.LEVEL_NODE not in self.needed_locks
9777
    assert locking.LEVEL_NODE_RES not in self.needed_locks
9778
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9779

    
9780
    assert self.op.iallocator is None or self.op.remote_node is None, \
9781
      "Conflicting options"
9782

    
9783
    if self.op.remote_node is not None:
9784
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9785

    
9786
      # Warning: do not remove the locking of the new secondary here
9787
      # unless DRBD8.AddChildren is changed to work in parallel;
9788
      # currently it doesn't since parallel invocations of
9789
      # FindUnusedMinor will conflict
9790
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9791
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9792
    else:
9793
      self.needed_locks[locking.LEVEL_NODE] = []
9794
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9795

    
9796
      if self.op.iallocator is not None:
9797
        # iallocator will select a new node in the same group
9798
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9799

    
9800
    self.needed_locks[locking.LEVEL_NODE_RES] = []
9801

    
9802
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9803
                                   self.op.iallocator, self.op.remote_node,
9804
                                   self.op.disks, False, self.op.early_release)
9805

    
9806
    self.tasklets = [self.replacer]
9807

    
9808
  def DeclareLocks(self, level):
9809
    if level == locking.LEVEL_NODEGROUP:
9810
      assert self.op.remote_node is None
9811
      assert self.op.iallocator is not None
9812
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9813

    
9814
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9815
      # Lock all groups used by instance optimistically; this requires going
9816
      # via the node before it's locked, requiring verification later on
9817
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9818
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9819

    
9820
    elif level == locking.LEVEL_NODE:
9821
      if self.op.iallocator is not None:
9822
        assert self.op.remote_node is None
9823
        assert not self.needed_locks[locking.LEVEL_NODE]
9824

    
9825
        # Lock member nodes of all locked groups
9826
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9827
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9828
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9829
      else:
9830
        self._LockInstancesNodes()
9831
    elif level == locking.LEVEL_NODE_RES:
9832
      # Reuse node locks
9833
      self.needed_locks[locking.LEVEL_NODE_RES] = \
9834
        self.needed_locks[locking.LEVEL_NODE]
9835

    
9836
  def BuildHooksEnv(self):
9837
    """Build hooks env.
9838

9839
    This runs on the master, the primary and all the secondaries.
9840

9841
    """
9842
    instance = self.replacer.instance
9843
    env = {
9844
      "MODE": self.op.mode,
9845
      "NEW_SECONDARY": self.op.remote_node,
9846
      "OLD_SECONDARY": instance.secondary_nodes[0],
9847
      }
9848
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9849
    return env
9850

    
9851
  def BuildHooksNodes(self):
9852
    """Build hooks nodes.
9853

9854
    """
9855
    instance = self.replacer.instance
9856
    nl = [
9857
      self.cfg.GetMasterNode(),
9858
      instance.primary_node,
9859
      ]
9860
    if self.op.remote_node is not None:
9861
      nl.append(self.op.remote_node)
9862
    return nl, nl
9863

    
9864
  def CheckPrereq(self):
9865
    """Check prerequisites.
9866

9867
    """
9868
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9869
            self.op.iallocator is None)
9870

    
9871
    # Verify if node group locks are still correct
9872
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9873
    if owned_groups:
9874
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9875

    
9876
    return LogicalUnit.CheckPrereq(self)
9877

    
9878

    
9879
class TLReplaceDisks(Tasklet):
9880
  """Replaces disks for an instance.
9881

9882
  Note: Locking is not within the scope of this class.
9883

9884
  """
9885
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9886
               disks, delay_iallocator, early_release):
9887
    """Initializes this class.
9888

9889
    """
9890
    Tasklet.__init__(self, lu)
9891

    
9892
    # Parameters
9893
    self.instance_name = instance_name
9894
    self.mode = mode
9895
    self.iallocator_name = iallocator_name
9896
    self.remote_node = remote_node
9897
    self.disks = disks
9898
    self.delay_iallocator = delay_iallocator
9899
    self.early_release = early_release
9900

    
9901
    # Runtime data
9902
    self.instance = None
9903
    self.new_node = None
9904
    self.target_node = None
9905
    self.other_node = None
9906
    self.remote_node_info = None
9907
    self.node_secondary_ip = None
9908

    
9909
  @staticmethod
9910
  def CheckArguments(mode, remote_node, iallocator):
9911
    """Helper function for users of this class.
9912

9913
    """
9914
    # check for valid parameter combination
9915
    if mode == constants.REPLACE_DISK_CHG:
9916
      if remote_node is None and iallocator is None:
9917
        raise errors.OpPrereqError("When changing the secondary either an"
9918
                                   " iallocator script must be used or the"
9919
                                   " new node given", errors.ECODE_INVAL)
9920

    
9921
      if remote_node is not None and iallocator is not None:
9922
        raise errors.OpPrereqError("Give either the iallocator or the new"
9923
                                   " secondary, not both", errors.ECODE_INVAL)
9924

    
9925
    elif remote_node is not None or iallocator is not None:
9926
      # Not replacing the secondary
9927
      raise errors.OpPrereqError("The iallocator and new node options can"
9928
                                 " only be used when changing the"
9929
                                 " secondary node", errors.ECODE_INVAL)
9930

    
9931
  @staticmethod
9932
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9933
    """Compute a new secondary node using an IAllocator.
9934

9935
    """
9936
    ial = IAllocator(lu.cfg, lu.rpc,
9937
                     mode=constants.IALLOCATOR_MODE_RELOC,
9938
                     name=instance_name,
9939
                     relocate_from=list(relocate_from))
9940

    
9941
    ial.Run(iallocator_name)
9942

    
9943
    if not ial.success:
9944
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9945
                                 " %s" % (iallocator_name, ial.info),
9946
                                 errors.ECODE_NORES)
9947

    
9948
    if len(ial.result) != ial.required_nodes:
9949
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9950
                                 " of nodes (%s), required %s" %
9951
                                 (iallocator_name,
9952
                                  len(ial.result), ial.required_nodes),
9953
                                 errors.ECODE_FAULT)
9954

    
9955
    remote_node_name = ial.result[0]
9956

    
9957
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9958
               instance_name, remote_node_name)
9959

    
9960
    return remote_node_name
9961

    
9962
  def _FindFaultyDisks(self, node_name):
9963
    """Wrapper for L{_FindFaultyInstanceDisks}.
9964

9965
    """
9966
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9967
                                    node_name, True)
9968

    
9969
  def _CheckDisksActivated(self, instance):
9970
    """Checks if the instance disks are activated.
9971

9972
    @param instance: The instance to check disks
9973
    @return: True if they are activated, False otherwise
9974

9975
    """
9976
    nodes = instance.all_nodes
9977

    
9978
    for idx, dev in enumerate(instance.disks):
9979
      for node in nodes:
9980
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9981
        self.cfg.SetDiskID(dev, node)
9982

    
9983
        result = self.rpc.call_blockdev_find(node, dev)
9984

    
9985
        if result.offline:
9986
          continue
9987
        elif result.fail_msg or not result.payload:
9988
          return False
9989

    
9990
    return True
9991

    
9992
  def CheckPrereq(self):
9993
    """Check prerequisites.
9994

9995
    This checks that the instance is in the cluster.
9996

9997
    """
9998
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9999
    assert instance is not None, \
10000
      "Cannot retrieve locked instance %s" % self.instance_name
10001

    
10002
    if instance.disk_template != constants.DT_DRBD8:
10003
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10004
                                 " instances", errors.ECODE_INVAL)
10005

    
10006
    if len(instance.secondary_nodes) != 1:
10007
      raise errors.OpPrereqError("The instance has a strange layout,"
10008
                                 " expected one secondary but found %d" %
10009
                                 len(instance.secondary_nodes),
10010
                                 errors.ECODE_FAULT)
10011

    
10012
    if not self.delay_iallocator:
10013
      self._CheckPrereq2()
10014

    
10015
  def _CheckPrereq2(self):
10016
    """Check prerequisites, second part.
10017

10018
    This function should always be part of CheckPrereq. It was separated and is
10019
    now called from Exec because during node evacuation iallocator was only
10020
    called with an unmodified cluster model, not taking planned changes into
10021
    account.
10022

10023
    """
10024
    instance = self.instance
10025
    secondary_node = instance.secondary_nodes[0]
10026

    
10027
    if self.iallocator_name is None:
10028
      remote_node = self.remote_node
10029
    else:
10030
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10031
                                       instance.name, instance.secondary_nodes)
10032

    
10033
    if remote_node is None:
10034
      self.remote_node_info = None
10035
    else:
10036
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10037
             "Remote node '%s' is not locked" % remote_node
10038

    
10039
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10040
      assert self.remote_node_info is not None, \
10041
        "Cannot retrieve locked node %s" % remote_node
10042

    
10043
    if remote_node == self.instance.primary_node:
10044
      raise errors.OpPrereqError("The specified node is the primary node of"
10045
                                 " the instance", errors.ECODE_INVAL)
10046

    
10047
    if remote_node == secondary_node:
10048
      raise errors.OpPrereqError("The specified node is already the"
10049
                                 " secondary node of the instance",
10050
                                 errors.ECODE_INVAL)
10051

    
10052
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10053
                                    constants.REPLACE_DISK_CHG):
10054
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
10055
                                 errors.ECODE_INVAL)
10056

    
10057
    if self.mode == constants.REPLACE_DISK_AUTO:
10058
      if not self._CheckDisksActivated(instance):
10059
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
10060
                                   " first" % self.instance_name,
10061
                                   errors.ECODE_STATE)
10062
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
10063
      faulty_secondary = self._FindFaultyDisks(secondary_node)
10064

    
10065
      if faulty_primary and faulty_secondary:
10066
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10067
                                   " one node and can not be repaired"
10068
                                   " automatically" % self.instance_name,
10069
                                   errors.ECODE_STATE)
10070

    
10071
      if faulty_primary:
10072
        self.disks = faulty_primary
10073
        self.target_node = instance.primary_node
10074
        self.other_node = secondary_node
10075
        check_nodes = [self.target_node, self.other_node]
10076
      elif faulty_secondary:
10077
        self.disks = faulty_secondary
10078
        self.target_node = secondary_node
10079
        self.other_node = instance.primary_node
10080
        check_nodes = [self.target_node, self.other_node]
10081
      else:
10082
        self.disks = []
10083
        check_nodes = []
10084

    
10085
    else:
10086
      # Non-automatic modes
10087
      if self.mode == constants.REPLACE_DISK_PRI:
10088
        self.target_node = instance.primary_node
10089
        self.other_node = secondary_node
10090
        check_nodes = [self.target_node, self.other_node]
10091

    
10092
      elif self.mode == constants.REPLACE_DISK_SEC:
10093
        self.target_node = secondary_node
10094
        self.other_node = instance.primary_node
10095
        check_nodes = [self.target_node, self.other_node]
10096

    
10097
      elif self.mode == constants.REPLACE_DISK_CHG:
10098
        self.new_node = remote_node
10099
        self.other_node = instance.primary_node
10100
        self.target_node = secondary_node
10101
        check_nodes = [self.new_node, self.other_node]
10102

    
10103
        _CheckNodeNotDrained(self.lu, remote_node)
10104
        _CheckNodeVmCapable(self.lu, remote_node)
10105

    
10106
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
10107
        assert old_node_info is not None
10108
        if old_node_info.offline and not self.early_release:
10109
          # doesn't make sense to delay the release
10110
          self.early_release = True
10111
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10112
                          " early-release mode", secondary_node)
10113

    
10114
      else:
10115
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10116
                                     self.mode)
10117

    
10118
      # If not specified all disks should be replaced
10119
      if not self.disks:
10120
        self.disks = range(len(self.instance.disks))
10121

    
10122
    # TODO: compute disk parameters
10123
    primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10124
    secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10125
    if primary_node_info.group != secondary_node_info.group:
10126
      self.lu.LogInfo("The instance primary and secondary nodes are in two"
10127
                      " different node groups; the disk parameters of the"
10128
                      " primary node's group will be applied.")
10129

    
10130
    self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10131

    
10132
    for node in check_nodes:
10133
      _CheckNodeOnline(self.lu, node)
10134

    
10135
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
10136
                                                          self.other_node,
10137
                                                          self.target_node]
10138
                              if node_name is not None)
10139

    
10140
    # Release unneeded node and node resource locks
10141
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10142
    _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10143

    
10144
    # Release any owned node group
10145
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10146
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10147

    
10148
    # Check whether disks are valid
10149
    for disk_idx in self.disks:
10150
      instance.FindDisk(disk_idx)
10151

    
10152
    # Get secondary node IP addresses
10153
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10154
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
10155

    
10156
  def Exec(self, feedback_fn):
10157
    """Execute disk replacement.
10158

10159
    This dispatches the disk replacement to the appropriate handler.
10160

10161
    """
10162
    if self.delay_iallocator:
10163
      self._CheckPrereq2()
10164

    
10165
    if __debug__:
10166
      # Verify owned locks before starting operation
10167
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10168
      assert set(owned_nodes) == set(self.node_secondary_ip), \
10169
          ("Incorrect node locks, owning %s, expected %s" %
10170
           (owned_nodes, self.node_secondary_ip.keys()))
10171
      assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10172
              self.lu.owned_locks(locking.LEVEL_NODE_RES))
10173

    
10174
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10175
      assert list(owned_instances) == [self.instance_name], \
10176
          "Instance '%s' not locked" % self.instance_name
10177

    
10178
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10179
          "Should not own any node group lock at this point"
10180

    
10181
    if not self.disks:
10182
      feedback_fn("No disks need replacement")
10183
      return
10184

    
10185
    feedback_fn("Replacing disk(s) %s for %s" %
10186
                (utils.CommaJoin(self.disks), self.instance.name))
10187

    
10188
    activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10189

    
10190
    # Activate the instance disks if we're replacing them on a down instance
10191
    if activate_disks:
10192
      _StartInstanceDisks(self.lu, self.instance, True)
10193

    
10194
    try:
10195
      # Should we replace the secondary node?
10196
      if self.new_node is not None:
10197
        fn = self._ExecDrbd8Secondary
10198
      else:
10199
        fn = self._ExecDrbd8DiskOnly
10200

    
10201
      result = fn(feedback_fn)
10202
    finally:
10203
      # Deactivate the instance disks if we're replacing them on a
10204
      # down instance
10205
      if activate_disks:
10206
        _SafeShutdownInstanceDisks(self.lu, self.instance)
10207

    
10208
    assert not self.lu.owned_locks(locking.LEVEL_NODE)
10209

    
10210
    if __debug__:
10211
      # Verify owned locks
10212
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10213
      nodes = frozenset(self.node_secondary_ip)
10214
      assert ((self.early_release and not owned_nodes) or
10215
              (not self.early_release and not (set(owned_nodes) - nodes))), \
10216
        ("Not owning the correct locks, early_release=%s, owned=%r,"
10217
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
10218

    
10219
    return result
10220

    
10221
  def _CheckVolumeGroup(self, nodes):
10222
    self.lu.LogInfo("Checking volume groups")
10223

    
10224
    vgname = self.cfg.GetVGName()
10225

    
10226
    # Make sure volume group exists on all involved nodes
10227
    results = self.rpc.call_vg_list(nodes)
10228
    if not results:
10229
      raise errors.OpExecError("Can't list volume groups on the nodes")
10230

    
10231
    for node in nodes:
10232
      res = results[node]
10233
      res.Raise("Error checking node %s" % node)
10234
      if vgname not in res.payload:
10235
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
10236
                                 (vgname, node))
10237

    
10238
  def _CheckDisksExistence(self, nodes):
10239
    # Check disk existence
10240
    for idx, dev in enumerate(self.instance.disks):
10241
      if idx not in self.disks:
10242
        continue
10243

    
10244
      for node in nodes:
10245
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10246
        self.cfg.SetDiskID(dev, node)
10247

    
10248
        result = self.rpc.call_blockdev_find(node, dev)
10249

    
10250
        msg = result.fail_msg
10251
        if msg or not result.payload:
10252
          if not msg:
10253
            msg = "disk not found"
10254
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10255
                                   (idx, node, msg))
10256

    
10257
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10258
    for idx, dev in enumerate(self.instance.disks):
10259
      if idx not in self.disks:
10260
        continue
10261

    
10262
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10263
                      (idx, node_name))
10264

    
10265
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10266
                                   ldisk=ldisk):
10267
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10268
                                 " replace disks for instance %s" %
10269
                                 (node_name, self.instance.name))
10270

    
10271
  def _CreateNewStorage(self, node_name):
10272
    """Create new storage on the primary or secondary node.
10273

10274
    This is only used for same-node replaces, not for changing the
10275
    secondary node, hence we don't want to modify the existing disk.
10276

10277
    """
10278
    iv_names = {}
10279

    
10280
    for idx, dev in enumerate(self.instance.disks):
10281
      if idx not in self.disks:
10282
        continue
10283

    
10284
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10285

    
10286
      self.cfg.SetDiskID(dev, node_name)
10287

    
10288
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10289
      names = _GenerateUniqueNames(self.lu, lv_names)
10290

    
10291
      _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10292

    
10293
      vg_data = dev.children[0].logical_id[0]
10294
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10295
                             logical_id=(vg_data, names[0]), params=data_p)
10296
      vg_meta = dev.children[1].logical_id[0]
10297
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10298
                             logical_id=(vg_meta, names[1]), params=meta_p)
10299

    
10300
      new_lvs = [lv_data, lv_meta]
10301
      old_lvs = [child.Copy() for child in dev.children]
10302
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10303

    
10304
      # we pass force_create=True to force the LVM creation
10305
      for new_lv in new_lvs:
10306
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10307
                        _GetInstanceInfoText(self.instance), False)
10308

    
10309
    return iv_names
10310

    
10311
  def _CheckDevices(self, node_name, iv_names):
10312
    for name, (dev, _, _) in iv_names.iteritems():
10313
      self.cfg.SetDiskID(dev, node_name)
10314

    
10315
      result = self.rpc.call_blockdev_find(node_name, dev)
10316

    
10317
      msg = result.fail_msg
10318
      if msg or not result.payload:
10319
        if not msg:
10320
          msg = "disk not found"
10321
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
10322
                                 (name, msg))
10323

    
10324
      if result.payload.is_degraded:
10325
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
10326

    
10327
  def _RemoveOldStorage(self, node_name, iv_names):
10328
    for name, (_, old_lvs, _) in iv_names.iteritems():
10329
      self.lu.LogInfo("Remove logical volumes for %s" % name)
10330

    
10331
      for lv in old_lvs:
10332
        self.cfg.SetDiskID(lv, node_name)
10333

    
10334
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10335
        if msg:
10336
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
10337
                             hint="remove unused LVs manually")
10338

    
10339
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10340
    """Replace a disk on the primary or secondary for DRBD 8.
10341

10342
    The algorithm for replace is quite complicated:
10343

10344
      1. for each disk to be replaced:
10345

10346
        1. create new LVs on the target node with unique names
10347
        1. detach old LVs from the drbd device
10348
        1. rename old LVs to name_replaced.<time_t>
10349
        1. rename new LVs to old LVs
10350
        1. attach the new LVs (with the old names now) to the drbd device
10351

10352
      1. wait for sync across all devices
10353

10354
      1. for each modified disk:
10355

10356
        1. remove old LVs (which have the name name_replaces.<time_t>)
10357

10358
    Failures are not very well handled.
10359

10360
    """
10361
    steps_total = 6
10362

    
10363
    # Step: check device activation
10364
    self.lu.LogStep(1, steps_total, "Check device existence")
10365
    self._CheckDisksExistence([self.other_node, self.target_node])
10366
    self._CheckVolumeGroup([self.target_node, self.other_node])
10367

    
10368
    # Step: check other node consistency
10369
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10370
    self._CheckDisksConsistency(self.other_node,
10371
                                self.other_node == self.instance.primary_node,
10372
                                False)
10373

    
10374
    # Step: create new storage
10375
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10376
    iv_names = self._CreateNewStorage(self.target_node)
10377

    
10378
    # Step: for each lv, detach+rename*2+attach
10379
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10380
    for dev, old_lvs, new_lvs in iv_names.itervalues():
10381
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10382

    
10383
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10384
                                                     old_lvs)
10385
      result.Raise("Can't detach drbd from local storage on node"
10386
                   " %s for device %s" % (self.target_node, dev.iv_name))
10387
      #dev.children = []
10388
      #cfg.Update(instance)
10389

    
10390
      # ok, we created the new LVs, so now we know we have the needed
10391
      # storage; as such, we proceed on the target node to rename
10392
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10393
      # using the assumption that logical_id == physical_id (which in
10394
      # turn is the unique_id on that node)
10395

    
10396
      # FIXME(iustin): use a better name for the replaced LVs
10397
      temp_suffix = int(time.time())
10398
      ren_fn = lambda d, suff: (d.physical_id[0],
10399
                                d.physical_id[1] + "_replaced-%s" % suff)
10400

    
10401
      # Build the rename list based on what LVs exist on the node
10402
      rename_old_to_new = []
10403
      for to_ren in old_lvs:
10404
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10405
        if not result.fail_msg and result.payload:
10406
          # device exists
10407
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10408

    
10409
      self.lu.LogInfo("Renaming the old LVs on the target node")
10410
      result = self.rpc.call_blockdev_rename(self.target_node,
10411
                                             rename_old_to_new)
10412
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
10413

    
10414
      # Now we rename the new LVs to the old LVs
10415
      self.lu.LogInfo("Renaming the new LVs on the target node")
10416
      rename_new_to_old = [(new, old.physical_id)
10417
                           for old, new in zip(old_lvs, new_lvs)]
10418
      result = self.rpc.call_blockdev_rename(self.target_node,
10419
                                             rename_new_to_old)
10420
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
10421

    
10422
      # Intermediate steps of in memory modifications
10423
      for old, new in zip(old_lvs, new_lvs):
10424
        new.logical_id = old.logical_id
10425
        self.cfg.SetDiskID(new, self.target_node)
10426

    
10427
      # We need to modify old_lvs so that removal later removes the
10428
      # right LVs, not the newly added ones; note that old_lvs is a
10429
      # copy here
10430
      for disk in old_lvs:
10431
        disk.logical_id = ren_fn(disk, temp_suffix)
10432
        self.cfg.SetDiskID(disk, self.target_node)
10433

    
10434
      # Now that the new lvs have the old name, we can add them to the device
10435
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10436
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10437
                                                  new_lvs)
10438
      msg = result.fail_msg
10439
      if msg:
10440
        for new_lv in new_lvs:
10441
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
10442
                                               new_lv).fail_msg
10443
          if msg2:
10444
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10445
                               hint=("cleanup manually the unused logical"
10446
                                     "volumes"))
10447
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10448

    
10449
    cstep = itertools.count(5)
10450

    
10451
    if self.early_release:
10452
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10453
      self._RemoveOldStorage(self.target_node, iv_names)
10454
      # TODO: Check if releasing locks early still makes sense
10455
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10456
    else:
10457
      # Release all resource locks except those used by the instance
10458
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10459
                    keep=self.node_secondary_ip.keys())
10460

    
10461
    # Release all node locks while waiting for sync
10462
    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10463

    
10464
    # TODO: Can the instance lock be downgraded here? Take the optional disk
10465
    # shutdown in the caller into consideration.
10466

    
10467
    # Wait for sync
10468
    # This can fail as the old devices are degraded and _WaitForSync
10469
    # does a combined result over all disks, so we don't check its return value
10470
    self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10471
    _WaitForSync(self.lu, self.instance)
10472

    
10473
    # Check all devices manually
10474
    self._CheckDevices(self.instance.primary_node, iv_names)
10475

    
10476
    # Step: remove old storage
10477
    if not self.early_release:
10478
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10479
      self._RemoveOldStorage(self.target_node, iv_names)
10480

    
10481
  def _ExecDrbd8Secondary(self, feedback_fn):
10482
    """Replace the secondary node for DRBD 8.
10483

10484
    The algorithm for replace is quite complicated:
10485
      - for all disks of the instance:
10486
        - create new LVs on the new node with same names
10487
        - shutdown the drbd device on the old secondary
10488
        - disconnect the drbd network on the primary
10489
        - create the drbd device on the new secondary
10490
        - network attach the drbd on the primary, using an artifice:
10491
          the drbd code for Attach() will connect to the network if it
10492
          finds a device which is connected to the good local disks but
10493
          not network enabled
10494
      - wait for sync across all devices
10495
      - remove all disks from the old secondary
10496

10497
    Failures are not very well handled.
10498

10499
    """
10500
    steps_total = 6
10501

    
10502
    pnode = self.instance.primary_node
10503

    
10504
    # Step: check device activation
10505
    self.lu.LogStep(1, steps_total, "Check device existence")
10506
    self._CheckDisksExistence([self.instance.primary_node])
10507
    self._CheckVolumeGroup([self.instance.primary_node])
10508

    
10509
    # Step: check other node consistency
10510
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10511
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10512

    
10513
    # Step: create new storage
10514
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10515
    for idx, dev in enumerate(self.instance.disks):
10516
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10517
                      (self.new_node, idx))
10518
      # we pass force_create=True to force LVM creation
10519
      for new_lv in dev.children:
10520
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10521
                        _GetInstanceInfoText(self.instance), False)
10522

    
10523
    # Step 4: dbrd minors and drbd setups changes
10524
    # after this, we must manually remove the drbd minors on both the
10525
    # error and the success paths
10526
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10527
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10528
                                         for dev in self.instance.disks],
10529
                                        self.instance.name)
10530
    logging.debug("Allocated minors %r", minors)
10531

    
10532
    iv_names = {}
10533
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10534
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10535
                      (self.new_node, idx))
10536
      # create new devices on new_node; note that we create two IDs:
10537
      # one without port, so the drbd will be activated without
10538
      # networking information on the new node at this stage, and one
10539
      # with network, for the latter activation in step 4
10540
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10541
      if self.instance.primary_node == o_node1:
10542
        p_minor = o_minor1
10543
      else:
10544
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10545
        p_minor = o_minor2
10546

    
10547
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10548
                      p_minor, new_minor, o_secret)
10549
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10550
                    p_minor, new_minor, o_secret)
10551

    
10552
      iv_names[idx] = (dev, dev.children, new_net_id)
10553
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10554
                    new_net_id)
10555
      drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10556
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10557
                              logical_id=new_alone_id,
10558
                              children=dev.children,
10559
                              size=dev.size,
10560
                              params=drbd_params)
10561
      try:
10562
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10563
                              _GetInstanceInfoText(self.instance), False)
10564
      except errors.GenericError:
10565
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10566
        raise
10567

    
10568
    # We have new devices, shutdown the drbd on the old secondary
10569
    for idx, dev in enumerate(self.instance.disks):
10570
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10571
      self.cfg.SetDiskID(dev, self.target_node)
10572
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10573
      if msg:
10574
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10575
                           "node: %s" % (idx, msg),
10576
                           hint=("Please cleanup this device manually as"
10577
                                 " soon as possible"))
10578

    
10579
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10580
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10581
                                               self.instance.disks)[pnode]
10582

    
10583
    msg = result.fail_msg
10584
    if msg:
10585
      # detaches didn't succeed (unlikely)
10586
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10587
      raise errors.OpExecError("Can't detach the disks from the network on"
10588
                               " old node: %s" % (msg,))
10589

    
10590
    # if we managed to detach at least one, we update all the disks of
10591
    # the instance to point to the new secondary
10592
    self.lu.LogInfo("Updating instance configuration")
10593
    for dev, _, new_logical_id in iv_names.itervalues():
10594
      dev.logical_id = new_logical_id
10595
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10596

    
10597
    self.cfg.Update(self.instance, feedback_fn)
10598

    
10599
    # Release all node locks (the configuration has been updated)
10600
    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10601

    
10602
    # and now perform the drbd attach
10603
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10604
                    " (standalone => connected)")
10605
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10606
                                            self.new_node],
10607
                                           self.node_secondary_ip,
10608
                                           self.instance.disks,
10609
                                           self.instance.name,
10610
                                           False)
10611
    for to_node, to_result in result.items():
10612
      msg = to_result.fail_msg
10613
      if msg:
10614
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10615
                           to_node, msg,
10616
                           hint=("please do a gnt-instance info to see the"
10617
                                 " status of disks"))
10618

    
10619
    cstep = itertools.count(5)
10620

    
10621
    if self.early_release:
10622
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10623
      self._RemoveOldStorage(self.target_node, iv_names)
10624
      # TODO: Check if releasing locks early still makes sense
10625
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10626
    else:
10627
      # Release all resource locks except those used by the instance
10628
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10629
                    keep=self.node_secondary_ip.keys())
10630

    
10631
    # TODO: Can the instance lock be downgraded here? Take the optional disk
10632
    # shutdown in the caller into consideration.
10633

    
10634
    # Wait for sync
10635
    # This can fail as the old devices are degraded and _WaitForSync
10636
    # does a combined result over all disks, so we don't check its return value
10637
    self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10638
    _WaitForSync(self.lu, self.instance)
10639

    
10640
    # Check all devices manually
10641
    self._CheckDevices(self.instance.primary_node, iv_names)
10642

    
10643
    # Step: remove old storage
10644
    if not self.early_release:
10645
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10646
      self._RemoveOldStorage(self.target_node, iv_names)
10647

    
10648

    
10649
class LURepairNodeStorage(NoHooksLU):
10650
  """Repairs the volume group on a node.
10651

10652
  """
10653
  REQ_BGL = False
10654

    
10655
  def CheckArguments(self):
10656
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10657

    
10658
    storage_type = self.op.storage_type
10659

    
10660
    if (constants.SO_FIX_CONSISTENCY not in
10661
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10662
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10663
                                 " repaired" % storage_type,
10664
                                 errors.ECODE_INVAL)
10665

    
10666
  def ExpandNames(self):
10667
    self.needed_locks = {
10668
      locking.LEVEL_NODE: [self.op.node_name],
10669
      }
10670

    
10671
  def _CheckFaultyDisks(self, instance, node_name):
10672
    """Ensure faulty disks abort the opcode or at least warn."""
10673
    try:
10674
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10675
                                  node_name, True):
10676
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10677
                                   " node '%s'" % (instance.name, node_name),
10678
                                   errors.ECODE_STATE)
10679
    except errors.OpPrereqError, err:
10680
      if self.op.ignore_consistency:
10681
        self.proc.LogWarning(str(err.args[0]))
10682
      else:
10683
        raise
10684

    
10685
  def CheckPrereq(self):
10686
    """Check prerequisites.
10687

10688
    """
10689
    # Check whether any instance on this node has faulty disks
10690
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10691
      if inst.admin_state != constants.ADMINST_UP:
10692
        continue
10693
      check_nodes = set(inst.all_nodes)
10694
      check_nodes.discard(self.op.node_name)
10695
      for inst_node_name in check_nodes:
10696
        self._CheckFaultyDisks(inst, inst_node_name)
10697

    
10698
  def Exec(self, feedback_fn):
10699
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10700
                (self.op.name, self.op.node_name))
10701

    
10702
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10703
    result = self.rpc.call_storage_execute(self.op.node_name,
10704
                                           self.op.storage_type, st_args,
10705
                                           self.op.name,
10706
                                           constants.SO_FIX_CONSISTENCY)
10707
    result.Raise("Failed to repair storage unit '%s' on %s" %
10708
                 (self.op.name, self.op.node_name))
10709

    
10710

    
10711
class LUNodeEvacuate(NoHooksLU):
10712
  """Evacuates instances off a list of nodes.
10713

10714
  """
10715
  REQ_BGL = False
10716

    
10717
  _MODE2IALLOCATOR = {
10718
    constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10719
    constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10720
    constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10721
    }
10722
  assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10723
  assert (frozenset(_MODE2IALLOCATOR.values()) ==
10724
          constants.IALLOCATOR_NEVAC_MODES)
10725

    
10726
  def CheckArguments(self):
10727
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10728

    
10729
  def ExpandNames(self):
10730
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10731

    
10732
    if self.op.remote_node is not None:
10733
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10734
      assert self.op.remote_node
10735

    
10736
      if self.op.remote_node == self.op.node_name:
10737
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10738
                                   " secondary node", errors.ECODE_INVAL)
10739

    
10740
      if self.op.mode != constants.NODE_EVAC_SEC:
10741
        raise errors.OpPrereqError("Without the use of an iallocator only"
10742
                                   " secondary instances can be evacuated",
10743
                                   errors.ECODE_INVAL)
10744

    
10745
    # Declare locks
10746
    self.share_locks = _ShareAll()
10747
    self.needed_locks = {
10748
      locking.LEVEL_INSTANCE: [],
10749
      locking.LEVEL_NODEGROUP: [],
10750
      locking.LEVEL_NODE: [],
10751
      }
10752

    
10753
    # Determine nodes (via group) optimistically, needs verification once locks
10754
    # have been acquired
10755
    self.lock_nodes = self._DetermineNodes()
10756

    
10757
  def _DetermineNodes(self):
10758
    """Gets the list of nodes to operate on.
10759

10760
    """
10761
    if self.op.remote_node is None:
10762
      # Iallocator will choose any node(s) in the same group
10763
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10764
    else:
10765
      group_nodes = frozenset([self.op.remote_node])
10766

    
10767
    # Determine nodes to be locked
10768
    return set([self.op.node_name]) | group_nodes
10769

    
10770
  def _DetermineInstances(self):
10771
    """Builds list of instances to operate on.
10772

10773
    """
10774
    assert self.op.mode in constants.NODE_EVAC_MODES
10775

    
10776
    if self.op.mode == constants.NODE_EVAC_PRI:
10777
      # Primary instances only
10778
      inst_fn = _GetNodePrimaryInstances
10779
      assert self.op.remote_node is None, \
10780
        "Evacuating primary instances requires iallocator"
10781
    elif self.op.mode == constants.NODE_EVAC_SEC:
10782
      # Secondary instances only
10783
      inst_fn = _GetNodeSecondaryInstances
10784
    else:
10785
      # All instances
10786
      assert self.op.mode == constants.NODE_EVAC_ALL
10787
      inst_fn = _GetNodeInstances
10788
      # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10789
      # per instance
10790
      raise errors.OpPrereqError("Due to an issue with the iallocator"
10791
                                 " interface it is not possible to evacuate"
10792
                                 " all instances at once; specify explicitly"
10793
                                 " whether to evacuate primary or secondary"
10794
                                 " instances",
10795
                                 errors.ECODE_INVAL)
10796

    
10797
    return inst_fn(self.cfg, self.op.node_name)
10798

    
10799
  def DeclareLocks(self, level):
10800
    if level == locking.LEVEL_INSTANCE:
10801
      # Lock instances optimistically, needs verification once node and group
10802
      # locks have been acquired
10803
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10804
        set(i.name for i in self._DetermineInstances())
10805

    
10806
    elif level == locking.LEVEL_NODEGROUP:
10807
      # Lock node groups for all potential target nodes optimistically, needs
10808
      # verification once nodes have been acquired
10809
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10810
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10811

    
10812
    elif level == locking.LEVEL_NODE:
10813
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10814

    
10815
  def CheckPrereq(self):
10816
    # Verify locks
10817
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10818
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10819
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10820

    
10821
    need_nodes = self._DetermineNodes()
10822

    
10823
    if not owned_nodes.issuperset(need_nodes):
10824
      raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10825
                                 " locks were acquired, current nodes are"
10826
                                 " are '%s', used to be '%s'; retry the"
10827
                                 " operation" %
10828
                                 (self.op.node_name,
10829
                                  utils.CommaJoin(need_nodes),
10830
                                  utils.CommaJoin(owned_nodes)),
10831
                                 errors.ECODE_STATE)
10832

    
10833
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10834
    if owned_groups != wanted_groups:
10835
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10836
                               " current groups are '%s', used to be '%s';"
10837
                               " retry the operation" %
10838
                               (utils.CommaJoin(wanted_groups),
10839
                                utils.CommaJoin(owned_groups)))
10840

    
10841
    # Determine affected instances
10842
    self.instances = self._DetermineInstances()
10843
    self.instance_names = [i.name for i in self.instances]
10844

    
10845
    if set(self.instance_names) != owned_instances:
10846
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10847
                               " were acquired, current instances are '%s',"
10848
                               " used to be '%s'; retry the operation" %
10849
                               (self.op.node_name,
10850
                                utils.CommaJoin(self.instance_names),
10851
                                utils.CommaJoin(owned_instances)))
10852

    
10853
    if self.instance_names:
10854
      self.LogInfo("Evacuating instances from node '%s': %s",
10855
                   self.op.node_name,
10856
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10857
    else:
10858
      self.LogInfo("No instances to evacuate from node '%s'",
10859
                   self.op.node_name)
10860

    
10861
    if self.op.remote_node is not None:
10862
      for i in self.instances:
10863
        if i.primary_node == self.op.remote_node:
10864
          raise errors.OpPrereqError("Node %s is the primary node of"
10865
                                     " instance %s, cannot use it as"
10866
                                     " secondary" %
10867
                                     (self.op.remote_node, i.name),
10868
                                     errors.ECODE_INVAL)
10869

    
10870
  def Exec(self, feedback_fn):
10871
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10872

    
10873
    if not self.instance_names:
10874
      # No instances to evacuate
10875
      jobs = []
10876

    
10877
    elif self.op.iallocator is not None:
10878
      # TODO: Implement relocation to other group
10879
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10880
                       evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10881
                       instances=list(self.instance_names))
10882

    
10883
      ial.Run(self.op.iallocator)
10884

    
10885
      if not ial.success:
10886
        raise errors.OpPrereqError("Can't compute node evacuation using"
10887
                                   " iallocator '%s': %s" %
10888
                                   (self.op.iallocator, ial.info),
10889
                                   errors.ECODE_NORES)
10890

    
10891
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10892

    
10893
    elif self.op.remote_node is not None:
10894
      assert self.op.mode == constants.NODE_EVAC_SEC
10895
      jobs = [
10896
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10897
                                        remote_node=self.op.remote_node,
10898
                                        disks=[],
10899
                                        mode=constants.REPLACE_DISK_CHG,
10900
                                        early_release=self.op.early_release)]
10901
        for instance_name in self.instance_names
10902
        ]
10903

    
10904
    else:
10905
      raise errors.ProgrammerError("No iallocator or remote node")
10906

    
10907
    return ResultWithJobs(jobs)
10908

    
10909

    
10910
def _SetOpEarlyRelease(early_release, op):
10911
  """Sets C{early_release} flag on opcodes if available.
10912

10913
  """
10914
  try:
10915
    op.early_release = early_release
10916
  except AttributeError:
10917
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10918

    
10919
  return op
10920

    
10921

    
10922
def _NodeEvacDest(use_nodes, group, nodes):
10923
  """Returns group or nodes depending on caller's choice.
10924

10925
  """
10926
  if use_nodes:
10927
    return utils.CommaJoin(nodes)
10928
  else:
10929
    return group
10930

    
10931

    
10932
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10933
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10934

10935
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10936
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10937

10938
  @type lu: L{LogicalUnit}
10939
  @param lu: Logical unit instance
10940
  @type alloc_result: tuple/list
10941
  @param alloc_result: Result from iallocator
10942
  @type early_release: bool
10943
  @param early_release: Whether to release locks early if possible
10944
  @type use_nodes: bool
10945
  @param use_nodes: Whether to display node names instead of groups
10946

10947
  """
10948
  (moved, failed, jobs) = alloc_result
10949

    
10950
  if failed:
10951
    failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10952
                                 for (name, reason) in failed)
10953
    lu.LogWarning("Unable to evacuate instances %s", failreason)
10954
    raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10955

    
10956
  if moved:
10957
    lu.LogInfo("Instances to be moved: %s",
10958
               utils.CommaJoin("%s (to %s)" %
10959
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10960
                               for (name, group, nodes) in moved))
10961

    
10962
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10963
              map(opcodes.OpCode.LoadOpCode, ops))
10964
          for ops in jobs]
10965

    
10966

    
10967
class LUInstanceGrowDisk(LogicalUnit):
10968
  """Grow a disk of an instance.
10969

10970
  """
10971
  HPATH = "disk-grow"
10972
  HTYPE = constants.HTYPE_INSTANCE
10973
  REQ_BGL = False
10974

    
10975
  def ExpandNames(self):
10976
    self._ExpandAndLockInstance()
10977
    self.needed_locks[locking.LEVEL_NODE] = []
10978
    self.needed_locks[locking.LEVEL_NODE_RES] = []
10979
    self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
10980

    
10981
  def DeclareLocks(self, level):
10982
    if level == locking.LEVEL_NODE:
10983
      self._LockInstancesNodes()
10984
    elif level == locking.LEVEL_NODE_RES:
10985
      # Copy node locks
10986
      self.needed_locks[locking.LEVEL_NODE_RES] = \
10987
        self.needed_locks[locking.LEVEL_NODE][:]
10988

    
10989
  def BuildHooksEnv(self):
10990
    """Build hooks env.
10991

10992
    This runs on the master, the primary and all the secondaries.
10993

10994
    """
10995
    env = {
10996
      "DISK": self.op.disk,
10997
      "AMOUNT": self.op.amount,
10998
      }
10999
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11000
    return env
11001

    
11002
  def BuildHooksNodes(self):
11003
    """Build hooks nodes.
11004

11005
    """
11006
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11007
    return (nl, nl)
11008

    
11009
  def CheckPrereq(self):
11010
    """Check prerequisites.
11011

11012
    This checks that the instance is in the cluster.
11013

11014
    """
11015
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11016
    assert instance is not None, \
11017
      "Cannot retrieve locked instance %s" % self.op.instance_name
11018
    nodenames = list(instance.all_nodes)
11019
    for node in nodenames:
11020
      _CheckNodeOnline(self, node)
11021

    
11022
    self.instance = instance
11023

    
11024
    if instance.disk_template not in constants.DTS_GROWABLE:
11025
      raise errors.OpPrereqError("Instance's disk layout does not support"
11026
                                 " growing", errors.ECODE_INVAL)
11027

    
11028
    self.disk = instance.FindDisk(self.op.disk)
11029

    
11030
    if instance.disk_template not in (constants.DT_FILE,
11031
                                      constants.DT_SHARED_FILE):
11032
      # TODO: check the free disk space for file, when that feature will be
11033
      # supported
11034
      _CheckNodesFreeDiskPerVG(self, nodenames,
11035
                               self.disk.ComputeGrowth(self.op.amount))
11036

    
11037
  def Exec(self, feedback_fn):
11038
    """Execute disk grow.
11039

11040
    """
11041
    instance = self.instance
11042
    disk = self.disk
11043

    
11044
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11045
    assert (self.owned_locks(locking.LEVEL_NODE) ==
11046
            self.owned_locks(locking.LEVEL_NODE_RES))
11047

    
11048
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11049
    if not disks_ok:
11050
      raise errors.OpExecError("Cannot activate block device to grow")
11051

    
11052
    feedback_fn("Growing disk %s of instance '%s' by %s" %
11053
                (self.op.disk, instance.name,
11054
                 utils.FormatUnit(self.op.amount, "h")))
11055

    
11056
    # First run all grow ops in dry-run mode
11057
    for node in instance.all_nodes:
11058
      self.cfg.SetDiskID(disk, node)
11059
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11060
      result.Raise("Grow request failed to node %s" % node)
11061

    
11062
    # We know that (as far as we can test) operations across different
11063
    # nodes will succeed, time to run it for real
11064
    for node in instance.all_nodes:
11065
      self.cfg.SetDiskID(disk, node)
11066
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11067
      result.Raise("Grow request failed to node %s" % node)
11068

    
11069
      # TODO: Rewrite code to work properly
11070
      # DRBD goes into sync mode for a short amount of time after executing the
11071
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11072
      # calling "resize" in sync mode fails. Sleeping for a short amount of
11073
      # time is a work-around.
11074
      time.sleep(5)
11075

    
11076
    disk.RecordGrow(self.op.amount)
11077
    self.cfg.Update(instance, feedback_fn)
11078

    
11079
    # Changes have been recorded, release node lock
11080
    _ReleaseLocks(self, locking.LEVEL_NODE)
11081

    
11082
    # Downgrade lock while waiting for sync
11083
    self.glm.downgrade(locking.LEVEL_INSTANCE)
11084

    
11085
    if self.op.wait_for_sync:
11086
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
11087
      if disk_abort:
11088
        self.proc.LogWarning("Disk sync-ing has not returned a good"
11089
                             " status; please check the instance")
11090
      if instance.admin_state != constants.ADMINST_UP:
11091
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11092
    elif instance.admin_state != constants.ADMINST_UP:
11093
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
11094
                           " not supposed to be running because no wait for"
11095
                           " sync mode was requested")
11096

    
11097
    assert self.owned_locks(locking.LEVEL_NODE_RES)
11098
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11099

    
11100

    
11101
class LUInstanceQueryData(NoHooksLU):
11102
  """Query runtime instance data.
11103

11104
  """
11105
  REQ_BGL = False
11106

    
11107
  def ExpandNames(self):
11108
    self.needed_locks = {}
11109

    
11110
    # Use locking if requested or when non-static information is wanted
11111
    if not (self.op.static or self.op.use_locking):
11112
      self.LogWarning("Non-static data requested, locks need to be acquired")
11113
      self.op.use_locking = True
11114

    
11115
    if self.op.instances or not self.op.use_locking:
11116
      # Expand instance names right here
11117
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
11118
    else:
11119
      # Will use acquired locks
11120
      self.wanted_names = None
11121

    
11122
    if self.op.use_locking:
11123
      self.share_locks = _ShareAll()
11124

    
11125
      if self.wanted_names is None:
11126
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11127
      else:
11128
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11129

    
11130
      self.needed_locks[locking.LEVEL_NODE] = []
11131
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11132

    
11133
  def DeclareLocks(self, level):
11134
    if self.op.use_locking and level == locking.LEVEL_NODE:
11135
      self._LockInstancesNodes()
11136

    
11137
  def CheckPrereq(self):
11138
    """Check prerequisites.
11139

11140
    This only checks the optional instance list against the existing names.
11141

11142
    """
11143
    if self.wanted_names is None:
11144
      assert self.op.use_locking, "Locking was not used"
11145
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11146

    
11147
    self.wanted_instances = \
11148
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11149

    
11150
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
11151
    """Returns the status of a block device
11152

11153
    """
11154
    if self.op.static or not node:
11155
      return None
11156

    
11157
    self.cfg.SetDiskID(dev, node)
11158

    
11159
    result = self.rpc.call_blockdev_find(node, dev)
11160
    if result.offline:
11161
      return None
11162

    
11163
    result.Raise("Can't compute disk status for %s" % instance_name)
11164

    
11165
    status = result.payload
11166
    if status is None:
11167
      return None
11168

    
11169
    return (status.dev_path, status.major, status.minor,
11170
            status.sync_percent, status.estimated_time,
11171
            status.is_degraded, status.ldisk_status)
11172

    
11173
  def _ComputeDiskStatus(self, instance, snode, dev):
11174
    """Compute block device status.
11175

11176
    """
11177
    if dev.dev_type in constants.LDS_DRBD:
11178
      # we change the snode then (otherwise we use the one passed in)
11179
      if dev.logical_id[0] == instance.primary_node:
11180
        snode = dev.logical_id[1]
11181
      else:
11182
        snode = dev.logical_id[0]
11183

    
11184
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11185
                                              instance.name, dev)
11186
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11187

    
11188
    if dev.children:
11189
      dev_children = map(compat.partial(self._ComputeDiskStatus,
11190
                                        instance, snode),
11191
                         dev.children)
11192
    else:
11193
      dev_children = []
11194

    
11195
    return {
11196
      "iv_name": dev.iv_name,
11197
      "dev_type": dev.dev_type,
11198
      "logical_id": dev.logical_id,
11199
      "physical_id": dev.physical_id,
11200
      "pstatus": dev_pstatus,
11201
      "sstatus": dev_sstatus,
11202
      "children": dev_children,
11203
      "mode": dev.mode,
11204
      "size": dev.size,
11205
      }
11206

    
11207
  def Exec(self, feedback_fn):
11208
    """Gather and return data"""
11209
    result = {}
11210

    
11211
    cluster = self.cfg.GetClusterInfo()
11212

    
11213
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11214
                                          for i in self.wanted_instances)
11215
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11216
      if self.op.static or pnode.offline:
11217
        remote_state = None
11218
        if pnode.offline:
11219
          self.LogWarning("Primary node %s is marked offline, returning static"
11220
                          " information only for instance %s" %
11221
                          (pnode.name, instance.name))
11222
      else:
11223
        remote_info = self.rpc.call_instance_info(instance.primary_node,
11224
                                                  instance.name,
11225
                                                  instance.hypervisor)
11226
        remote_info.Raise("Error checking node %s" % instance.primary_node)
11227
        remote_info = remote_info.payload
11228
        if remote_info and "state" in remote_info:
11229
          remote_state = "up"
11230
        else:
11231
          if instance.admin_state == constants.ADMINST_UP:
11232
            remote_state = "down"
11233
          else:
11234
            remote_state = instance.admin_state
11235

    
11236
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11237
                  instance.disks)
11238

    
11239
      result[instance.name] = {
11240
        "name": instance.name,
11241
        "config_state": instance.admin_state,
11242
        "run_state": remote_state,
11243
        "pnode": instance.primary_node,
11244
        "snodes": instance.secondary_nodes,
11245
        "os": instance.os,
11246
        # this happens to be the same format used for hooks
11247
        "nics": _NICListToTuple(self, instance.nics),
11248
        "disk_template": instance.disk_template,
11249
        "disks": disks,
11250
        "hypervisor": instance.hypervisor,
11251
        "network_port": instance.network_port,
11252
        "hv_instance": instance.hvparams,
11253
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
11254
        "be_instance": instance.beparams,
11255
        "be_actual": cluster.FillBE(instance),
11256
        "os_instance": instance.osparams,
11257
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11258
        "serial_no": instance.serial_no,
11259
        "mtime": instance.mtime,
11260
        "ctime": instance.ctime,
11261
        "uuid": instance.uuid,
11262
        }
11263

    
11264
    return result
11265

    
11266

    
11267
class LUInstanceSetParams(LogicalUnit):
11268
  """Modifies an instances's parameters.
11269

11270
  """
11271
  HPATH = "instance-modify"
11272
  HTYPE = constants.HTYPE_INSTANCE
11273
  REQ_BGL = False
11274

    
11275
  def CheckArguments(self):
11276
    if not (self.op.nics or self.op.disks or self.op.disk_template or
11277
            self.op.hvparams or self.op.beparams or self.op.os_name or
11278
            self.op.online_inst or self.op.offline_inst):
11279
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11280

    
11281
    if self.op.hvparams:
11282
      _CheckGlobalHvParams(self.op.hvparams)
11283

    
11284
    # Disk validation
11285
    disk_addremove = 0
11286
    for disk_op, disk_dict in self.op.disks:
11287
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11288
      if disk_op == constants.DDM_REMOVE:
11289
        disk_addremove += 1
11290
        continue
11291
      elif disk_op == constants.DDM_ADD:
11292
        disk_addremove += 1
11293
      else:
11294
        if not isinstance(disk_op, int):
11295
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11296
        if not isinstance(disk_dict, dict):
11297
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11298
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11299

    
11300
      if disk_op == constants.DDM_ADD:
11301
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11302
        if mode not in constants.DISK_ACCESS_SET:
11303
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11304
                                     errors.ECODE_INVAL)
11305
        size = disk_dict.get(constants.IDISK_SIZE, None)
11306
        if size is None:
11307
          raise errors.OpPrereqError("Required disk parameter size missing",
11308
                                     errors.ECODE_INVAL)
11309
        try:
11310
          size = int(size)
11311
        except (TypeError, ValueError), err:
11312
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11313
                                     str(err), errors.ECODE_INVAL)
11314
        disk_dict[constants.IDISK_SIZE] = size
11315
      else:
11316
        # modification of disk
11317
        if constants.IDISK_SIZE in disk_dict:
11318
          raise errors.OpPrereqError("Disk size change not possible, use"
11319
                                     " grow-disk", errors.ECODE_INVAL)
11320

    
11321
    if disk_addremove > 1:
11322
      raise errors.OpPrereqError("Only one disk add or remove operation"
11323
                                 " supported at a time", errors.ECODE_INVAL)
11324

    
11325
    if self.op.disks and self.op.disk_template is not None:
11326
      raise errors.OpPrereqError("Disk template conversion and other disk"
11327
                                 " changes not supported at the same time",
11328
                                 errors.ECODE_INVAL)
11329

    
11330
    if (self.op.disk_template and
11331
        self.op.disk_template in constants.DTS_INT_MIRROR and
11332
        self.op.remote_node is None):
11333
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
11334
                                 " one requires specifying a secondary node",
11335
                                 errors.ECODE_INVAL)
11336

    
11337
    # NIC validation
11338
    nic_addremove = 0
11339
    for nic_op, nic_dict in self.op.nics:
11340
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11341
      if nic_op == constants.DDM_REMOVE:
11342
        nic_addremove += 1
11343
        continue
11344
      elif nic_op == constants.DDM_ADD:
11345
        nic_addremove += 1
11346
      else:
11347
        if not isinstance(nic_op, int):
11348
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11349
        if not isinstance(nic_dict, dict):
11350
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11351
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11352

    
11353
      # nic_dict should be a dict
11354
      nic_ip = nic_dict.get(constants.INIC_IP, None)
11355
      if nic_ip is not None:
11356
        if nic_ip.lower() == constants.VALUE_NONE:
11357
          nic_dict[constants.INIC_IP] = None
11358
        else:
11359
          if not netutils.IPAddress.IsValid(nic_ip):
11360
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11361
                                       errors.ECODE_INVAL)
11362

    
11363
      nic_bridge = nic_dict.get("bridge", None)
11364
      nic_link = nic_dict.get(constants.INIC_LINK, None)
11365
      if nic_bridge and nic_link:
11366
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11367
                                   " at the same time", errors.ECODE_INVAL)
11368
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11369
        nic_dict["bridge"] = None
11370
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11371
        nic_dict[constants.INIC_LINK] = None
11372

    
11373
      if nic_op == constants.DDM_ADD:
11374
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
11375
        if nic_mac is None:
11376
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11377

    
11378
      if constants.INIC_MAC in nic_dict:
11379
        nic_mac = nic_dict[constants.INIC_MAC]
11380
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11381
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11382

    
11383
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11384
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11385
                                     " modifying an existing nic",
11386
                                     errors.ECODE_INVAL)
11387

    
11388
    if nic_addremove > 1:
11389
      raise errors.OpPrereqError("Only one NIC add or remove operation"
11390
                                 " supported at a time", errors.ECODE_INVAL)
11391

    
11392
  def ExpandNames(self):
11393
    self._ExpandAndLockInstance()
11394
    # Can't even acquire node locks in shared mode as upcoming changes in
11395
    # Ganeti 2.6 will start to modify the node object on disk conversion
11396
    self.needed_locks[locking.LEVEL_NODE] = []
11397
    self.needed_locks[locking.LEVEL_NODE_RES] = []
11398
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11399

    
11400
  def DeclareLocks(self, level):
11401
    if level == locking.LEVEL_NODE:
11402
      self._LockInstancesNodes()
11403
      if self.op.disk_template and self.op.remote_node:
11404
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11405
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11406
    elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11407
      # Copy node locks
11408
      self.needed_locks[locking.LEVEL_NODE_RES] = \
11409
        self.needed_locks[locking.LEVEL_NODE][:]
11410

    
11411
  def BuildHooksEnv(self):
11412
    """Build hooks env.
11413

11414
    This runs on the master, primary and secondaries.
11415

11416
    """
11417
    args = dict()
11418
    if constants.BE_MINMEM in self.be_new:
11419
      args["minmem"] = self.be_new[constants.BE_MINMEM]
11420
    if constants.BE_MAXMEM in self.be_new:
11421
      args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11422
    if constants.BE_VCPUS in self.be_new:
11423
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
11424
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11425
    # information at all.
11426
    if self.op.nics:
11427
      args["nics"] = []
11428
      nic_override = dict(self.op.nics)
11429
      for idx, nic in enumerate(self.instance.nics):
11430
        if idx in nic_override:
11431
          this_nic_override = nic_override[idx]
11432
        else:
11433
          this_nic_override = {}
11434
        if constants.INIC_IP in this_nic_override:
11435
          ip = this_nic_override[constants.INIC_IP]
11436
        else:
11437
          ip = nic.ip
11438
        if constants.INIC_MAC in this_nic_override:
11439
          mac = this_nic_override[constants.INIC_MAC]
11440
        else:
11441
          mac = nic.mac
11442
        if idx in self.nic_pnew:
11443
          nicparams = self.nic_pnew[idx]
11444
        else:
11445
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11446
        mode = nicparams[constants.NIC_MODE]
11447
        link = nicparams[constants.NIC_LINK]
11448
        args["nics"].append((ip, mac, mode, link))
11449
      if constants.DDM_ADD in nic_override:
11450
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11451
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11452
        nicparams = self.nic_pnew[constants.DDM_ADD]
11453
        mode = nicparams[constants.NIC_MODE]
11454
        link = nicparams[constants.NIC_LINK]
11455
        args["nics"].append((ip, mac, mode, link))
11456
      elif constants.DDM_REMOVE in nic_override:
11457
        del args["nics"][-1]
11458

    
11459
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11460
    if self.op.disk_template:
11461
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11462

    
11463
    return env
11464

    
11465
  def BuildHooksNodes(self):
11466
    """Build hooks nodes.
11467

11468
    """
11469
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11470
    return (nl, nl)
11471

    
11472
  def CheckPrereq(self):
11473
    """Check prerequisites.
11474

11475
    This only checks the instance list against the existing names.
11476

11477
    """
11478
    # checking the new params on the primary/secondary nodes
11479

    
11480
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11481
    cluster = self.cluster = self.cfg.GetClusterInfo()
11482
    assert self.instance is not None, \
11483
      "Cannot retrieve locked instance %s" % self.op.instance_name
11484
    pnode = instance.primary_node
11485
    nodelist = list(instance.all_nodes)
11486
    pnode_info = self.cfg.GetNodeInfo(pnode)
11487
    self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11488

    
11489
    # OS change
11490
    if self.op.os_name and not self.op.force:
11491
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11492
                      self.op.force_variant)
11493
      instance_os = self.op.os_name
11494
    else:
11495
      instance_os = instance.os
11496

    
11497
    if self.op.disk_template:
11498
      if instance.disk_template == self.op.disk_template:
11499
        raise errors.OpPrereqError("Instance already has disk template %s" %
11500
                                   instance.disk_template, errors.ECODE_INVAL)
11501

    
11502
      if (instance.disk_template,
11503
          self.op.disk_template) not in self._DISK_CONVERSIONS:
11504
        raise errors.OpPrereqError("Unsupported disk template conversion from"
11505
                                   " %s to %s" % (instance.disk_template,
11506
                                                  self.op.disk_template),
11507
                                   errors.ECODE_INVAL)
11508
      _CheckInstanceState(self, instance, INSTANCE_DOWN,
11509
                          msg="cannot change disk template")
11510
      if self.op.disk_template in constants.DTS_INT_MIRROR:
11511
        if self.op.remote_node == pnode:
11512
          raise errors.OpPrereqError("Given new secondary node %s is the same"
11513
                                     " as the primary node of the instance" %
11514
                                     self.op.remote_node, errors.ECODE_STATE)
11515
        _CheckNodeOnline(self, self.op.remote_node)
11516
        _CheckNodeNotDrained(self, self.op.remote_node)
11517
        # FIXME: here we assume that the old instance type is DT_PLAIN
11518
        assert instance.disk_template == constants.DT_PLAIN
11519
        disks = [{constants.IDISK_SIZE: d.size,
11520
                  constants.IDISK_VG: d.logical_id[0]}
11521
                 for d in instance.disks]
11522
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11523
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11524

    
11525
        snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11526
        if pnode_info.group != snode_info.group:
11527
          self.LogWarning("The primary and secondary nodes are in two"
11528
                          " different node groups; the disk parameters"
11529
                          " from the first disk's node group will be"
11530
                          " used")
11531

    
11532
    # hvparams processing
11533
    if self.op.hvparams:
11534
      hv_type = instance.hypervisor
11535
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11536
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11537
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11538

    
11539
      # local check
11540
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11541
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11542
      self.hv_proposed = self.hv_new = hv_new # the new actual values
11543
      self.hv_inst = i_hvdict # the new dict (without defaults)
11544
    else:
11545
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11546
                                              instance.hvparams)
11547
      self.hv_new = self.hv_inst = {}
11548

    
11549
    # beparams processing
11550
    if self.op.beparams:
11551
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11552
                                   use_none=True)
11553
      objects.UpgradeBeParams(i_bedict)
11554
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11555
      be_new = cluster.SimpleFillBE(i_bedict)
11556
      self.be_proposed = self.be_new = be_new # the new actual values
11557
      self.be_inst = i_bedict # the new dict (without defaults)
11558
    else:
11559
      self.be_new = self.be_inst = {}
11560
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11561
    be_old = cluster.FillBE(instance)
11562

    
11563
    # CPU param validation -- checking every time a paramtere is
11564
    # changed to cover all cases where either CPU mask or vcpus have
11565
    # changed
11566
    if (constants.BE_VCPUS in self.be_proposed and
11567
        constants.HV_CPU_MASK in self.hv_proposed):
11568
      cpu_list = \
11569
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11570
      # Verify mask is consistent with number of vCPUs. Can skip this
11571
      # test if only 1 entry in the CPU mask, which means same mask
11572
      # is applied to all vCPUs.
11573
      if (len(cpu_list) > 1 and
11574
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11575
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11576
                                   " CPU mask [%s]" %
11577
                                   (self.be_proposed[constants.BE_VCPUS],
11578
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11579
                                   errors.ECODE_INVAL)
11580

    
11581
      # Only perform this test if a new CPU mask is given
11582
      if constants.HV_CPU_MASK in self.hv_new:
11583
        # Calculate the largest CPU number requested
11584
        max_requested_cpu = max(map(max, cpu_list))
11585
        # Check that all of the instance's nodes have enough physical CPUs to
11586
        # satisfy the requested CPU mask
11587
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11588
                                max_requested_cpu + 1, instance.hypervisor)
11589

    
11590
    # osparams processing
11591
    if self.op.osparams:
11592
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11593
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11594
      self.os_inst = i_osdict # the new dict (without defaults)
11595
    else:
11596
      self.os_inst = {}
11597

    
11598
    self.warn = []
11599

    
11600
    #TODO(dynmem): do the appropriate check involving MINMEM
11601
    if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11602
        be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11603
      mem_check_list = [pnode]
11604
      if be_new[constants.BE_AUTO_BALANCE]:
11605
        # either we changed auto_balance to yes or it was from before
11606
        mem_check_list.extend(instance.secondary_nodes)
11607
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11608
                                                  instance.hypervisor)
11609
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11610
                                         [instance.hypervisor])
11611
      pninfo = nodeinfo[pnode]
11612
      msg = pninfo.fail_msg
11613
      if msg:
11614
        # Assume the primary node is unreachable and go ahead
11615
        self.warn.append("Can't get info from primary node %s: %s" %
11616
                         (pnode, msg))
11617
      else:
11618
        (_, _, (pnhvinfo, )) = pninfo.payload
11619
        if not isinstance(pnhvinfo.get("memory_free", None), int):
11620
          self.warn.append("Node data from primary node %s doesn't contain"
11621
                           " free memory information" % pnode)
11622
        elif instance_info.fail_msg:
11623
          self.warn.append("Can't get instance runtime information: %s" %
11624
                          instance_info.fail_msg)
11625
        else:
11626
          if instance_info.payload:
11627
            current_mem = int(instance_info.payload["memory"])
11628
          else:
11629
            # Assume instance not running
11630
            # (there is a slight race condition here, but it's not very
11631
            # probable, and we have no other way to check)
11632
            # TODO: Describe race condition
11633
            current_mem = 0
11634
          #TODO(dynmem): do the appropriate check involving MINMEM
11635
          miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11636
                      pnhvinfo["memory_free"])
11637
          if miss_mem > 0:
11638
            raise errors.OpPrereqError("This change will prevent the instance"
11639
                                       " from starting, due to %d MB of memory"
11640
                                       " missing on its primary node" %
11641
                                       miss_mem,
11642
                                       errors.ECODE_NORES)
11643

    
11644
      if be_new[constants.BE_AUTO_BALANCE]:
11645
        for node, nres in nodeinfo.items():
11646
          if node not in instance.secondary_nodes:
11647
            continue
11648
          nres.Raise("Can't get info from secondary node %s" % node,
11649
                     prereq=True, ecode=errors.ECODE_STATE)
11650
          (_, _, (nhvinfo, )) = nres.payload
11651
          if not isinstance(nhvinfo.get("memory_free", None), int):
11652
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11653
                                       " memory information" % node,
11654
                                       errors.ECODE_STATE)
11655
          #TODO(dynmem): do the appropriate check involving MINMEM
11656
          elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11657
            raise errors.OpPrereqError("This change will prevent the instance"
11658
                                       " from failover to its secondary node"
11659
                                       " %s, due to not enough memory" % node,
11660
                                       errors.ECODE_STATE)
11661

    
11662
    # NIC processing
11663
    self.nic_pnew = {}
11664
    self.nic_pinst = {}
11665
    for nic_op, nic_dict in self.op.nics:
11666
      if nic_op == constants.DDM_REMOVE:
11667
        if not instance.nics:
11668
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11669
                                     errors.ECODE_INVAL)
11670
        continue
11671
      if nic_op != constants.DDM_ADD:
11672
        # an existing nic
11673
        if not instance.nics:
11674
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11675
                                     " no NICs" % nic_op,
11676
                                     errors.ECODE_INVAL)
11677
        if nic_op < 0 or nic_op >= len(instance.nics):
11678
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11679
                                     " are 0 to %d" %
11680
                                     (nic_op, len(instance.nics) - 1),
11681
                                     errors.ECODE_INVAL)
11682
        old_nic_params = instance.nics[nic_op].nicparams
11683
        old_nic_ip = instance.nics[nic_op].ip
11684
      else:
11685
        old_nic_params = {}
11686
        old_nic_ip = None
11687

    
11688
      update_params_dict = dict([(key, nic_dict[key])
11689
                                 for key in constants.NICS_PARAMETERS
11690
                                 if key in nic_dict])
11691

    
11692
      if "bridge" in nic_dict:
11693
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11694

    
11695
      new_nic_params = _GetUpdatedParams(old_nic_params,
11696
                                         update_params_dict)
11697
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11698
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11699
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11700
      self.nic_pinst[nic_op] = new_nic_params
11701
      self.nic_pnew[nic_op] = new_filled_nic_params
11702
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11703

    
11704
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11705
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11706
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11707
        if msg:
11708
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11709
          if self.op.force:
11710
            self.warn.append(msg)
11711
          else:
11712
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11713
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11714
        if constants.INIC_IP in nic_dict:
11715
          nic_ip = nic_dict[constants.INIC_IP]
11716
        else:
11717
          nic_ip = old_nic_ip
11718
        if nic_ip is None:
11719
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11720
                                     " on a routed nic", errors.ECODE_INVAL)
11721
      if constants.INIC_MAC in nic_dict:
11722
        nic_mac = nic_dict[constants.INIC_MAC]
11723
        if nic_mac is None:
11724
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11725
                                     errors.ECODE_INVAL)
11726
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11727
          # otherwise generate the mac
11728
          nic_dict[constants.INIC_MAC] = \
11729
            self.cfg.GenerateMAC(self.proc.GetECId())
11730
        else:
11731
          # or validate/reserve the current one
11732
          try:
11733
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11734
          except errors.ReservationError:
11735
            raise errors.OpPrereqError("MAC address %s already in use"
11736
                                       " in cluster" % nic_mac,
11737
                                       errors.ECODE_NOTUNIQUE)
11738

    
11739
    # DISK processing
11740
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11741
      raise errors.OpPrereqError("Disk operations not supported for"
11742
                                 " diskless instances",
11743
                                 errors.ECODE_INVAL)
11744
    for disk_op, _ in self.op.disks:
11745
      if disk_op == constants.DDM_REMOVE:
11746
        if len(instance.disks) == 1:
11747
          raise errors.OpPrereqError("Cannot remove the last disk of"
11748
                                     " an instance", errors.ECODE_INVAL)
11749
        _CheckInstanceState(self, instance, INSTANCE_DOWN,
11750
                            msg="cannot remove disks")
11751

    
11752
      if (disk_op == constants.DDM_ADD and
11753
          len(instance.disks) >= constants.MAX_DISKS):
11754
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11755
                                   " add more" % constants.MAX_DISKS,
11756
                                   errors.ECODE_STATE)
11757
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11758
        # an existing disk
11759
        if disk_op < 0 or disk_op >= len(instance.disks):
11760
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11761
                                     " are 0 to %d" %
11762
                                     (disk_op, len(instance.disks)),
11763
                                     errors.ECODE_INVAL)
11764

    
11765
    # disabling the instance
11766
    if self.op.offline_inst:
11767
      _CheckInstanceState(self, instance, INSTANCE_DOWN,
11768
                          msg="cannot change instance state to offline")
11769

    
11770
    # enabling the instance
11771
    if self.op.online_inst:
11772
      _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11773
                          msg="cannot make instance go online")
11774

    
11775
  def _ConvertPlainToDrbd(self, feedback_fn):
11776
    """Converts an instance from plain to drbd.
11777

11778
    """
11779
    feedback_fn("Converting template to drbd")
11780
    instance = self.instance
11781
    pnode = instance.primary_node
11782
    snode = self.op.remote_node
11783

    
11784
    assert instance.disk_template == constants.DT_PLAIN
11785

    
11786
    # create a fake disk info for _GenerateDiskTemplate
11787
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11788
                  constants.IDISK_VG: d.logical_id[0]}
11789
                 for d in instance.disks]
11790
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11791
                                      instance.name, pnode, [snode],
11792
                                      disk_info, None, None, 0, feedback_fn,
11793
                                      self.diskparams)
11794
    info = _GetInstanceInfoText(instance)
11795
    feedback_fn("Creating aditional volumes...")
11796
    # first, create the missing data and meta devices
11797
    for disk in new_disks:
11798
      # unfortunately this is... not too nice
11799
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11800
                            info, True)
11801
      for child in disk.children:
11802
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11803
    # at this stage, all new LVs have been created, we can rename the
11804
    # old ones
11805
    feedback_fn("Renaming original volumes...")
11806
    rename_list = [(o, n.children[0].logical_id)
11807
                   for (o, n) in zip(instance.disks, new_disks)]
11808
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11809
    result.Raise("Failed to rename original LVs")
11810

    
11811
    feedback_fn("Initializing DRBD devices...")
11812
    # all child devices are in place, we can now create the DRBD devices
11813
    for disk in new_disks:
11814
      for node in [pnode, snode]:
11815
        f_create = node == pnode
11816
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11817

    
11818
    # at this point, the instance has been modified
11819
    instance.disk_template = constants.DT_DRBD8
11820
    instance.disks = new_disks
11821
    self.cfg.Update(instance, feedback_fn)
11822

    
11823
    # Release node locks while waiting for sync
11824
    _ReleaseLocks(self, locking.LEVEL_NODE)
11825

    
11826
    # disks are created, waiting for sync
11827
    disk_abort = not _WaitForSync(self, instance,
11828
                                  oneshot=not self.op.wait_for_sync)
11829
    if disk_abort:
11830
      raise errors.OpExecError("There are some degraded disks for"
11831
                               " this instance, please cleanup manually")
11832

    
11833
    # Node resource locks will be released by caller
11834

    
11835
  def _ConvertDrbdToPlain(self, feedback_fn):
11836
    """Converts an instance from drbd to plain.
11837

11838
    """
11839
    instance = self.instance
11840

    
11841
    assert len(instance.secondary_nodes) == 1
11842
    assert instance.disk_template == constants.DT_DRBD8
11843

    
11844
    pnode = instance.primary_node
11845
    snode = instance.secondary_nodes[0]
11846
    feedback_fn("Converting template to plain")
11847

    
11848
    old_disks = instance.disks
11849
    new_disks = [d.children[0] for d in old_disks]
11850

    
11851
    # copy over size and mode
11852
    for parent, child in zip(old_disks, new_disks):
11853
      child.size = parent.size
11854
      child.mode = parent.mode
11855

    
11856
    # update instance structure
11857
    instance.disks = new_disks
11858
    instance.disk_template = constants.DT_PLAIN
11859
    self.cfg.Update(instance, feedback_fn)
11860

    
11861
    # Release locks in case removing disks takes a while
11862
    _ReleaseLocks(self, locking.LEVEL_NODE)
11863

    
11864
    feedback_fn("Removing volumes on the secondary node...")
11865
    for disk in old_disks:
11866
      self.cfg.SetDiskID(disk, snode)
11867
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11868
      if msg:
11869
        self.LogWarning("Could not remove block device %s on node %s,"
11870
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11871

    
11872
    feedback_fn("Removing unneeded volumes on the primary node...")
11873
    for idx, disk in enumerate(old_disks):
11874
      meta = disk.children[1]
11875
      self.cfg.SetDiskID(meta, pnode)
11876
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11877
      if msg:
11878
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11879
                        " continuing anyway: %s", idx, pnode, msg)
11880

    
11881
    # this is a DRBD disk, return its port to the pool
11882
    for disk in old_disks:
11883
      tcp_port = disk.logical_id[2]
11884
      self.cfg.AddTcpUdpPort(tcp_port)
11885

    
11886
    # Node resource locks will be released by caller
11887

    
11888
  def Exec(self, feedback_fn):
11889
    """Modifies an instance.
11890

11891
    All parameters take effect only at the next restart of the instance.
11892

11893
    """
11894
    # Process here the warnings from CheckPrereq, as we don't have a
11895
    # feedback_fn there.
11896
    for warn in self.warn:
11897
      feedback_fn("WARNING: %s" % warn)
11898

    
11899
    assert ((self.op.disk_template is None) ^
11900
            bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
11901
      "Not owning any node resource locks"
11902

    
11903
    result = []
11904
    instance = self.instance
11905
    # disk changes
11906
    for disk_op, disk_dict in self.op.disks:
11907
      if disk_op == constants.DDM_REMOVE:
11908
        # remove the last disk
11909
        device = instance.disks.pop()
11910
        device_idx = len(instance.disks)
11911
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11912
          self.cfg.SetDiskID(disk, node)
11913
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11914
          if msg:
11915
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11916
                            " continuing anyway", device_idx, node, msg)
11917
        result.append(("disk/%d" % device_idx, "remove"))
11918

    
11919
        # if this is a DRBD disk, return its port to the pool
11920
        if device.dev_type in constants.LDS_DRBD:
11921
          tcp_port = device.logical_id[2]
11922
          self.cfg.AddTcpUdpPort(tcp_port)
11923
      elif disk_op == constants.DDM_ADD:
11924
        # add a new disk
11925
        if instance.disk_template in (constants.DT_FILE,
11926
                                        constants.DT_SHARED_FILE):
11927
          file_driver, file_path = instance.disks[0].logical_id
11928
          file_path = os.path.dirname(file_path)
11929
        else:
11930
          file_driver = file_path = None
11931
        disk_idx_base = len(instance.disks)
11932
        new_disk = _GenerateDiskTemplate(self,
11933
                                         instance.disk_template,
11934
                                         instance.name, instance.primary_node,
11935
                                         instance.secondary_nodes,
11936
                                         [disk_dict],
11937
                                         file_path,
11938
                                         file_driver,
11939
                                         disk_idx_base,
11940
                                         feedback_fn,
11941
                                         self.diskparams)[0]
11942
        instance.disks.append(new_disk)
11943
        info = _GetInstanceInfoText(instance)
11944

    
11945
        logging.info("Creating volume %s for instance %s",
11946
                     new_disk.iv_name, instance.name)
11947
        # Note: this needs to be kept in sync with _CreateDisks
11948
        #HARDCODE
11949
        for node in instance.all_nodes:
11950
          f_create = node == instance.primary_node
11951
          try:
11952
            _CreateBlockDev(self, node, instance, new_disk,
11953
                            f_create, info, f_create)
11954
          except errors.OpExecError, err:
11955
            self.LogWarning("Failed to create volume %s (%s) on"
11956
                            " node %s: %s",
11957
                            new_disk.iv_name, new_disk, node, err)
11958
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11959
                       (new_disk.size, new_disk.mode)))
11960
      else:
11961
        # change a given disk
11962
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11963
        result.append(("disk.mode/%d" % disk_op,
11964
                       disk_dict[constants.IDISK_MODE]))
11965

    
11966
    if self.op.disk_template:
11967
      if __debug__:
11968
        check_nodes = set(instance.all_nodes)
11969
        if self.op.remote_node:
11970
          check_nodes.add(self.op.remote_node)
11971
        for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
11972
          owned = self.owned_locks(level)
11973
          assert not (check_nodes - owned), \
11974
            ("Not owning the correct locks, owning %r, expected at least %r" %
11975
             (owned, check_nodes))
11976

    
11977
      r_shut = _ShutdownInstanceDisks(self, instance)
11978
      if not r_shut:
11979
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11980
                                 " proceed with disk template conversion")
11981
      mode = (instance.disk_template, self.op.disk_template)
11982
      try:
11983
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11984
      except:
11985
        self.cfg.ReleaseDRBDMinors(instance.name)
11986
        raise
11987
      result.append(("disk_template", self.op.disk_template))
11988

    
11989
      assert instance.disk_template == self.op.disk_template, \
11990
        ("Expected disk template '%s', found '%s'" %
11991
         (self.op.disk_template, instance.disk_template))
11992

    
11993
    # Release node and resource locks if there are any (they might already have
11994
    # been released during disk conversion)
11995
    _ReleaseLocks(self, locking.LEVEL_NODE)
11996
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11997

    
11998
    # NIC changes
11999
    for nic_op, nic_dict in self.op.nics:
12000
      if nic_op == constants.DDM_REMOVE:
12001
        # remove the last nic
12002
        del instance.nics[-1]
12003
        result.append(("nic.%d" % len(instance.nics), "remove"))
12004
      elif nic_op == constants.DDM_ADD:
12005
        # mac and bridge should be set, by now
12006
        mac = nic_dict[constants.INIC_MAC]
12007
        ip = nic_dict.get(constants.INIC_IP, None)
12008
        nicparams = self.nic_pinst[constants.DDM_ADD]
12009
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12010
        instance.nics.append(new_nic)
12011
        result.append(("nic.%d" % (len(instance.nics) - 1),
12012
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
12013
                       (new_nic.mac, new_nic.ip,
12014
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12015
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12016
                       )))
12017
      else:
12018
        for key in (constants.INIC_MAC, constants.INIC_IP):
12019
          if key in nic_dict:
12020
            setattr(instance.nics[nic_op], key, nic_dict[key])
12021
        if nic_op in self.nic_pinst:
12022
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12023
        for key, val in nic_dict.iteritems():
12024
          result.append(("nic.%s/%d" % (key, nic_op), val))
12025

    
12026
    # hvparams changes
12027
    if self.op.hvparams:
12028
      instance.hvparams = self.hv_inst
12029
      for key, val in self.op.hvparams.iteritems():
12030
        result.append(("hv/%s" % key, val))
12031

    
12032
    # beparams changes
12033
    if self.op.beparams:
12034
      instance.beparams = self.be_inst
12035
      for key, val in self.op.beparams.iteritems():
12036
        result.append(("be/%s" % key, val))
12037

    
12038
    # OS change
12039
    if self.op.os_name:
12040
      instance.os = self.op.os_name
12041

    
12042
    # osparams changes
12043
    if self.op.osparams:
12044
      instance.osparams = self.os_inst
12045
      for key, val in self.op.osparams.iteritems():
12046
        result.append(("os/%s" % key, val))
12047

    
12048
    # online/offline instance
12049
    if self.op.online_inst:
12050
      self.cfg.MarkInstanceDown(instance.name)
12051
      result.append(("admin_state", constants.ADMINST_DOWN))
12052
    if self.op.offline_inst:
12053
      self.cfg.MarkInstanceOffline(instance.name)
12054
      result.append(("admin_state", constants.ADMINST_OFFLINE))
12055

    
12056
    self.cfg.Update(instance, feedback_fn)
12057

    
12058
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12059
                self.owned_locks(locking.LEVEL_NODE)), \
12060
      "All node locks should have been released by now"
12061

    
12062
    return result
12063

    
12064
  _DISK_CONVERSIONS = {
12065
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12066
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12067
    }
12068

    
12069

    
12070
class LUInstanceChangeGroup(LogicalUnit):
12071
  HPATH = "instance-change-group"
12072
  HTYPE = constants.HTYPE_INSTANCE
12073
  REQ_BGL = False
12074

    
12075
  def ExpandNames(self):
12076
    self.share_locks = _ShareAll()
12077
    self.needed_locks = {
12078
      locking.LEVEL_NODEGROUP: [],
12079
      locking.LEVEL_NODE: [],
12080
      }
12081

    
12082
    self._ExpandAndLockInstance()
12083

    
12084
    if self.op.target_groups:
12085
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12086
                                  self.op.target_groups)
12087
    else:
12088
      self.req_target_uuids = None
12089

    
12090
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12091

    
12092
  def DeclareLocks(self, level):
12093
    if level == locking.LEVEL_NODEGROUP:
12094
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12095

    
12096
      if self.req_target_uuids:
12097
        lock_groups = set(self.req_target_uuids)
12098

    
12099
        # Lock all groups used by instance optimistically; this requires going
12100
        # via the node before it's locked, requiring verification later on
12101
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12102
        lock_groups.update(instance_groups)
12103
      else:
12104
        # No target groups, need to lock all of them
12105
        lock_groups = locking.ALL_SET
12106

    
12107
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12108

    
12109
    elif level == locking.LEVEL_NODE:
12110
      if self.req_target_uuids:
12111
        # Lock all nodes used by instances
12112
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12113
        self._LockInstancesNodes()
12114

    
12115
        # Lock all nodes in all potential target groups
12116
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12117
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12118
        member_nodes = [node_name
12119
                        for group in lock_groups
12120
                        for node_name in self.cfg.GetNodeGroup(group).members]
12121
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12122
      else:
12123
        # Lock all nodes as all groups are potential targets
12124
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12125

    
12126
  def CheckPrereq(self):
12127
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12128
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12129
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12130

    
12131
    assert (self.req_target_uuids is None or
12132
            owned_groups.issuperset(self.req_target_uuids))
12133
    assert owned_instances == set([self.op.instance_name])
12134

    
12135
    # Get instance information
12136
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12137

    
12138
    # Check if node groups for locked instance are still correct
12139
    assert owned_nodes.issuperset(self.instance.all_nodes), \
12140
      ("Instance %s's nodes changed while we kept the lock" %
12141
       self.op.instance_name)
12142

    
12143
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12144
                                           owned_groups)
12145

    
12146
    if self.req_target_uuids:
12147
      # User requested specific target groups
12148
      self.target_uuids = self.req_target_uuids
12149
    else:
12150
      # All groups except those used by the instance are potential targets
12151
      self.target_uuids = owned_groups - inst_groups
12152

    
12153
    conflicting_groups = self.target_uuids & inst_groups
12154
    if conflicting_groups:
12155
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12156
                                 " used by the instance '%s'" %
12157
                                 (utils.CommaJoin(conflicting_groups),
12158
                                  self.op.instance_name),
12159
                                 errors.ECODE_INVAL)
12160

    
12161
    if not self.target_uuids:
12162
      raise errors.OpPrereqError("There are no possible target groups",
12163
                                 errors.ECODE_INVAL)
12164

    
12165
  def BuildHooksEnv(self):
12166
    """Build hooks env.
12167

12168
    """
12169
    assert self.target_uuids
12170

    
12171
    env = {
12172
      "TARGET_GROUPS": " ".join(self.target_uuids),
12173
      }
12174

    
12175
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12176

    
12177
    return env
12178

    
12179
  def BuildHooksNodes(self):
12180
    """Build hooks nodes.
12181

12182
    """
12183
    mn = self.cfg.GetMasterNode()
12184
    return ([mn], [mn])
12185

    
12186
  def Exec(self, feedback_fn):
12187
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12188

    
12189
    assert instances == [self.op.instance_name], "Instance not locked"
12190

    
12191
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12192
                     instances=instances, target_groups=list(self.target_uuids))
12193

    
12194
    ial.Run(self.op.iallocator)
12195

    
12196
    if not ial.success:
12197
      raise errors.OpPrereqError("Can't compute solution for changing group of"
12198
                                 " instance '%s' using iallocator '%s': %s" %
12199
                                 (self.op.instance_name, self.op.iallocator,
12200
                                  ial.info),
12201
                                 errors.ECODE_NORES)
12202

    
12203
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12204

    
12205
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
12206
                 " instance '%s'", len(jobs), self.op.instance_name)
12207

    
12208
    return ResultWithJobs(jobs)
12209

    
12210

    
12211
class LUBackupQuery(NoHooksLU):
12212
  """Query the exports list
12213

12214
  """
12215
  REQ_BGL = False
12216

    
12217
  def ExpandNames(self):
12218
    self.needed_locks = {}
12219
    self.share_locks[locking.LEVEL_NODE] = 1
12220
    if not self.op.nodes:
12221
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12222
    else:
12223
      self.needed_locks[locking.LEVEL_NODE] = \
12224
        _GetWantedNodes(self, self.op.nodes)
12225

    
12226
  def Exec(self, feedback_fn):
12227
    """Compute the list of all the exported system images.
12228

12229
    @rtype: dict
12230
    @return: a dictionary with the structure node->(export-list)
12231
        where export-list is a list of the instances exported on
12232
        that node.
12233

12234
    """
12235
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
12236
    rpcresult = self.rpc.call_export_list(self.nodes)
12237
    result = {}
12238
    for node in rpcresult:
12239
      if rpcresult[node].fail_msg:
12240
        result[node] = False
12241
      else:
12242
        result[node] = rpcresult[node].payload
12243

    
12244
    return result
12245

    
12246

    
12247
class LUBackupPrepare(NoHooksLU):
12248
  """Prepares an instance for an export and returns useful information.
12249

12250
  """
12251
  REQ_BGL = False
12252

    
12253
  def ExpandNames(self):
12254
    self._ExpandAndLockInstance()
12255

    
12256
  def CheckPrereq(self):
12257
    """Check prerequisites.
12258

12259
    """
12260
    instance_name = self.op.instance_name
12261

    
12262
    self.instance = self.cfg.GetInstanceInfo(instance_name)
12263
    assert self.instance is not None, \
12264
          "Cannot retrieve locked instance %s" % self.op.instance_name
12265
    _CheckNodeOnline(self, self.instance.primary_node)
12266

    
12267
    self._cds = _GetClusterDomainSecret()
12268

    
12269
  def Exec(self, feedback_fn):
12270
    """Prepares an instance for an export.
12271

12272
    """
12273
    instance = self.instance
12274

    
12275
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
12276
      salt = utils.GenerateSecret(8)
12277

    
12278
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12279
      result = self.rpc.call_x509_cert_create(instance.primary_node,
12280
                                              constants.RIE_CERT_VALIDITY)
12281
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
12282

    
12283
      (name, cert_pem) = result.payload
12284

    
12285
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12286
                                             cert_pem)
12287

    
12288
      return {
12289
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12290
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12291
                          salt),
12292
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12293
        }
12294

    
12295
    return None
12296

    
12297

    
12298
class LUBackupExport(LogicalUnit):
12299
  """Export an instance to an image in the cluster.
12300

12301
  """
12302
  HPATH = "instance-export"
12303
  HTYPE = constants.HTYPE_INSTANCE
12304
  REQ_BGL = False
12305

    
12306
  def CheckArguments(self):
12307
    """Check the arguments.
12308

12309
    """
12310
    self.x509_key_name = self.op.x509_key_name
12311
    self.dest_x509_ca_pem = self.op.destination_x509_ca
12312

    
12313
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
12314
      if not self.x509_key_name:
12315
        raise errors.OpPrereqError("Missing X509 key name for encryption",
12316
                                   errors.ECODE_INVAL)
12317

    
12318
      if not self.dest_x509_ca_pem:
12319
        raise errors.OpPrereqError("Missing destination X509 CA",
12320
                                   errors.ECODE_INVAL)
12321

    
12322
  def ExpandNames(self):
12323
    self._ExpandAndLockInstance()
12324

    
12325
    # Lock all nodes for local exports
12326
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12327
      # FIXME: lock only instance primary and destination node
12328
      #
12329
      # Sad but true, for now we have do lock all nodes, as we don't know where
12330
      # the previous export might be, and in this LU we search for it and
12331
      # remove it from its current node. In the future we could fix this by:
12332
      #  - making a tasklet to search (share-lock all), then create the
12333
      #    new one, then one to remove, after
12334
      #  - removing the removal operation altogether
12335
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12336

    
12337
  def DeclareLocks(self, level):
12338
    """Last minute lock declaration."""
12339
    # All nodes are locked anyway, so nothing to do here.
12340

    
12341
  def BuildHooksEnv(self):
12342
    """Build hooks env.
12343

12344
    This will run on the master, primary node and target node.
12345

12346
    """
12347
    env = {
12348
      "EXPORT_MODE": self.op.mode,
12349
      "EXPORT_NODE": self.op.target_node,
12350
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12351
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12352
      # TODO: Generic function for boolean env variables
12353
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12354
      }
12355

    
12356
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12357

    
12358
    return env
12359

    
12360
  def BuildHooksNodes(self):
12361
    """Build hooks nodes.
12362

12363
    """
12364
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12365

    
12366
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12367
      nl.append(self.op.target_node)
12368

    
12369
    return (nl, nl)
12370

    
12371
  def CheckPrereq(self):
12372
    """Check prerequisites.
12373

12374
    This checks that the instance and node names are valid.
12375

12376
    """
12377
    instance_name = self.op.instance_name
12378

    
12379
    self.instance = self.cfg.GetInstanceInfo(instance_name)
12380
    assert self.instance is not None, \
12381
          "Cannot retrieve locked instance %s" % self.op.instance_name
12382
    _CheckNodeOnline(self, self.instance.primary_node)
12383

    
12384
    if (self.op.remove_instance and
12385
        self.instance.admin_state == constants.ADMINST_UP and
12386
        not self.op.shutdown):
12387
      raise errors.OpPrereqError("Can not remove instance without shutting it"
12388
                                 " down before")
12389

    
12390
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12391
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12392
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12393
      assert self.dst_node is not None
12394

    
12395
      _CheckNodeOnline(self, self.dst_node.name)
12396
      _CheckNodeNotDrained(self, self.dst_node.name)
12397

    
12398
      self._cds = None
12399
      self.dest_disk_info = None
12400
      self.dest_x509_ca = None
12401

    
12402
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12403
      self.dst_node = None
12404

    
12405
      if len(self.op.target_node) != len(self.instance.disks):
12406
        raise errors.OpPrereqError(("Received destination information for %s"
12407
                                    " disks, but instance %s has %s disks") %
12408
                                   (len(self.op.target_node), instance_name,
12409
                                    len(self.instance.disks)),
12410
                                   errors.ECODE_INVAL)
12411

    
12412
      cds = _GetClusterDomainSecret()
12413

    
12414
      # Check X509 key name
12415
      try:
12416
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12417
      except (TypeError, ValueError), err:
12418
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12419

    
12420
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12421
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12422
                                   errors.ECODE_INVAL)
12423

    
12424
      # Load and verify CA
12425
      try:
12426
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12427
      except OpenSSL.crypto.Error, err:
12428
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12429
                                   (err, ), errors.ECODE_INVAL)
12430

    
12431
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12432
      if errcode is not None:
12433
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12434
                                   (msg, ), errors.ECODE_INVAL)
12435

    
12436
      self.dest_x509_ca = cert
12437

    
12438
      # Verify target information
12439
      disk_info = []
12440
      for idx, disk_data in enumerate(self.op.target_node):
12441
        try:
12442
          (host, port, magic) = \
12443
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12444
        except errors.GenericError, err:
12445
          raise errors.OpPrereqError("Target info for disk %s: %s" %
12446
                                     (idx, err), errors.ECODE_INVAL)
12447

    
12448
        disk_info.append((host, port, magic))
12449

    
12450
      assert len(disk_info) == len(self.op.target_node)
12451
      self.dest_disk_info = disk_info
12452

    
12453
    else:
12454
      raise errors.ProgrammerError("Unhandled export mode %r" %
12455
                                   self.op.mode)
12456

    
12457
    # instance disk type verification
12458
    # TODO: Implement export support for file-based disks
12459
    for disk in self.instance.disks:
12460
      if disk.dev_type == constants.LD_FILE:
12461
        raise errors.OpPrereqError("Export not supported for instances with"
12462
                                   " file-based disks", errors.ECODE_INVAL)
12463

    
12464
  def _CleanupExports(self, feedback_fn):
12465
    """Removes exports of current instance from all other nodes.
12466

12467
    If an instance in a cluster with nodes A..D was exported to node C, its
12468
    exports will be removed from the nodes A, B and D.
12469

12470
    """
12471
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
12472

    
12473
    nodelist = self.cfg.GetNodeList()
12474
    nodelist.remove(self.dst_node.name)
12475

    
12476
    # on one-node clusters nodelist will be empty after the removal
12477
    # if we proceed the backup would be removed because OpBackupQuery
12478
    # substitutes an empty list with the full cluster node list.
12479
    iname = self.instance.name
12480
    if nodelist:
12481
      feedback_fn("Removing old exports for instance %s" % iname)
12482
      exportlist = self.rpc.call_export_list(nodelist)
12483
      for node in exportlist:
12484
        if exportlist[node].fail_msg:
12485
          continue
12486
        if iname in exportlist[node].payload:
12487
          msg = self.rpc.call_export_remove(node, iname).fail_msg
12488
          if msg:
12489
            self.LogWarning("Could not remove older export for instance %s"
12490
                            " on node %s: %s", iname, node, msg)
12491

    
12492
  def Exec(self, feedback_fn):
12493
    """Export an instance to an image in the cluster.
12494

12495
    """
12496
    assert self.op.mode in constants.EXPORT_MODES
12497

    
12498
    instance = self.instance
12499
    src_node = instance.primary_node
12500

    
12501
    if self.op.shutdown:
12502
      # shutdown the instance, but not the disks
12503
      feedback_fn("Shutting down instance %s" % instance.name)
12504
      result = self.rpc.call_instance_shutdown(src_node, instance,
12505
                                               self.op.shutdown_timeout)
12506
      # TODO: Maybe ignore failures if ignore_remove_failures is set
12507
      result.Raise("Could not shutdown instance %s on"
12508
                   " node %s" % (instance.name, src_node))
12509

    
12510
    # set the disks ID correctly since call_instance_start needs the
12511
    # correct drbd minor to create the symlinks
12512
    for disk in instance.disks:
12513
      self.cfg.SetDiskID(disk, src_node)
12514

    
12515
    activate_disks = (instance.admin_state != constants.ADMINST_UP)
12516

    
12517
    if activate_disks:
12518
      # Activate the instance disks if we'exporting a stopped instance
12519
      feedback_fn("Activating disks for %s" % instance.name)
12520
      _StartInstanceDisks(self, instance, None)
12521

    
12522
    try:
12523
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12524
                                                     instance)
12525

    
12526
      helper.CreateSnapshots()
12527
      try:
12528
        if (self.op.shutdown and
12529
            instance.admin_state == constants.ADMINST_UP and
12530
            not self.op.remove_instance):
12531
          assert not activate_disks
12532
          feedback_fn("Starting instance %s" % instance.name)
12533
          result = self.rpc.call_instance_start(src_node,
12534
                                                (instance, None, None), False)
12535
          msg = result.fail_msg
12536
          if msg:
12537
            feedback_fn("Failed to start instance: %s" % msg)
12538
            _ShutdownInstanceDisks(self, instance)
12539
            raise errors.OpExecError("Could not start instance: %s" % msg)
12540

    
12541
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
12542
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12543
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12544
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
12545
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12546

    
12547
          (key_name, _, _) = self.x509_key_name
12548

    
12549
          dest_ca_pem = \
12550
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12551
                                            self.dest_x509_ca)
12552

    
12553
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12554
                                                     key_name, dest_ca_pem,
12555
                                                     timeouts)
12556
      finally:
12557
        helper.Cleanup()
12558

    
12559
      # Check for backwards compatibility
12560
      assert len(dresults) == len(instance.disks)
12561
      assert compat.all(isinstance(i, bool) for i in dresults), \
12562
             "Not all results are boolean: %r" % dresults
12563

    
12564
    finally:
12565
      if activate_disks:
12566
        feedback_fn("Deactivating disks for %s" % instance.name)
12567
        _ShutdownInstanceDisks(self, instance)
12568

    
12569
    if not (compat.all(dresults) and fin_resu):
12570
      failures = []
12571
      if not fin_resu:
12572
        failures.append("export finalization")
12573
      if not compat.all(dresults):
12574
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12575
                               if not dsk)
12576
        failures.append("disk export: disk(s) %s" % fdsk)
12577

    
12578
      raise errors.OpExecError("Export failed, errors in %s" %
12579
                               utils.CommaJoin(failures))
12580

    
12581
    # At this point, the export was successful, we can cleanup/finish
12582

    
12583
    # Remove instance if requested
12584
    if self.op.remove_instance:
12585
      feedback_fn("Removing instance %s" % instance.name)
12586
      _RemoveInstance(self, feedback_fn, instance,
12587
                      self.op.ignore_remove_failures)
12588

    
12589
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12590
      self._CleanupExports(feedback_fn)
12591

    
12592
    return fin_resu, dresults
12593

    
12594

    
12595
class LUBackupRemove(NoHooksLU):
12596
  """Remove exports related to the named instance.
12597

12598
  """
12599
  REQ_BGL = False
12600

    
12601
  def ExpandNames(self):
12602
    self.needed_locks = {}
12603
    # We need all nodes to be locked in order for RemoveExport to work, but we
12604
    # don't need to lock the instance itself, as nothing will happen to it (and
12605
    # we can remove exports also for a removed instance)
12606
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12607

    
12608
  def Exec(self, feedback_fn):
12609
    """Remove any export.
12610

12611
    """
12612
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12613
    # If the instance was not found we'll try with the name that was passed in.
12614
    # This will only work if it was an FQDN, though.
12615
    fqdn_warn = False
12616
    if not instance_name:
12617
      fqdn_warn = True
12618
      instance_name = self.op.instance_name
12619

    
12620
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12621
    exportlist = self.rpc.call_export_list(locked_nodes)
12622
    found = False
12623
    for node in exportlist:
12624
      msg = exportlist[node].fail_msg
12625
      if msg:
12626
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12627
        continue
12628
      if instance_name in exportlist[node].payload:
12629
        found = True
12630
        result = self.rpc.call_export_remove(node, instance_name)
12631
        msg = result.fail_msg
12632
        if msg:
12633
          logging.error("Could not remove export for instance %s"
12634
                        " on node %s: %s", instance_name, node, msg)
12635

    
12636
    if fqdn_warn and not found:
12637
      feedback_fn("Export not found. If trying to remove an export belonging"
12638
                  " to a deleted instance please use its Fully Qualified"
12639
                  " Domain Name.")
12640

    
12641

    
12642
class LUGroupAdd(LogicalUnit):
12643
  """Logical unit for creating node groups.
12644

12645
  """
12646
  HPATH = "group-add"
12647
  HTYPE = constants.HTYPE_GROUP
12648
  REQ_BGL = False
12649

    
12650
  def ExpandNames(self):
12651
    # We need the new group's UUID here so that we can create and acquire the
12652
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12653
    # that it should not check whether the UUID exists in the configuration.
12654
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12655
    self.needed_locks = {}
12656
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12657

    
12658
  def CheckPrereq(self):
12659
    """Check prerequisites.
12660

12661
    This checks that the given group name is not an existing node group
12662
    already.
12663

12664
    """
12665
    try:
12666
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12667
    except errors.OpPrereqError:
12668
      pass
12669
    else:
12670
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12671
                                 " node group (UUID: %s)" %
12672
                                 (self.op.group_name, existing_uuid),
12673
                                 errors.ECODE_EXISTS)
12674

    
12675
    if self.op.ndparams:
12676
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12677

    
12678
    if self.op.diskparams:
12679
      for templ in constants.DISK_TEMPLATES:
12680
        if templ not in self.op.diskparams:
12681
          self.op.diskparams[templ] = {}
12682
        utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12683
    else:
12684
      self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12685

    
12686
  def BuildHooksEnv(self):
12687
    """Build hooks env.
12688

12689
    """
12690
    return {
12691
      "GROUP_NAME": self.op.group_name,
12692
      }
12693

    
12694
  def BuildHooksNodes(self):
12695
    """Build hooks nodes.
12696

12697
    """
12698
    mn = self.cfg.GetMasterNode()
12699
    return ([mn], [mn])
12700

    
12701
  def Exec(self, feedback_fn):
12702
    """Add the node group to the cluster.
12703

12704
    """
12705
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12706
                                  uuid=self.group_uuid,
12707
                                  alloc_policy=self.op.alloc_policy,
12708
                                  ndparams=self.op.ndparams,
12709
                                  diskparams=self.op.diskparams)
12710

    
12711
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12712
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12713

    
12714

    
12715
class LUGroupAssignNodes(NoHooksLU):
12716
  """Logical unit for assigning nodes to groups.
12717

12718
  """
12719
  REQ_BGL = False
12720

    
12721
  def ExpandNames(self):
12722
    # These raise errors.OpPrereqError on their own:
12723
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12724
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12725

    
12726
    # We want to lock all the affected nodes and groups. We have readily
12727
    # available the list of nodes, and the *destination* group. To gather the
12728
    # list of "source" groups, we need to fetch node information later on.
12729
    self.needed_locks = {
12730
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12731
      locking.LEVEL_NODE: self.op.nodes,
12732
      }
12733

    
12734
  def DeclareLocks(self, level):
12735
    if level == locking.LEVEL_NODEGROUP:
12736
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12737

    
12738
      # Try to get all affected nodes' groups without having the group or node
12739
      # lock yet. Needs verification later in the code flow.
12740
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12741

    
12742
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12743

    
12744
  def CheckPrereq(self):
12745
    """Check prerequisites.
12746

12747
    """
12748
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12749
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12750
            frozenset(self.op.nodes))
12751

    
12752
    expected_locks = (set([self.group_uuid]) |
12753
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12754
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12755
    if actual_locks != expected_locks:
12756
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12757
                               " current groups are '%s', used to be '%s'" %
12758
                               (utils.CommaJoin(expected_locks),
12759
                                utils.CommaJoin(actual_locks)))
12760

    
12761
    self.node_data = self.cfg.GetAllNodesInfo()
12762
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12763
    instance_data = self.cfg.GetAllInstancesInfo()
12764

    
12765
    if self.group is None:
12766
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12767
                               (self.op.group_name, self.group_uuid))
12768

    
12769
    (new_splits, previous_splits) = \
12770
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12771
                                             for node in self.op.nodes],
12772
                                            self.node_data, instance_data)
12773

    
12774
    if new_splits:
12775
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12776

    
12777
      if not self.op.force:
12778
        raise errors.OpExecError("The following instances get split by this"
12779
                                 " change and --force was not given: %s" %
12780
                                 fmt_new_splits)
12781
      else:
12782
        self.LogWarning("This operation will split the following instances: %s",
12783
                        fmt_new_splits)
12784

    
12785
        if previous_splits:
12786
          self.LogWarning("In addition, these already-split instances continue"
12787
                          " to be split across groups: %s",
12788
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12789

    
12790
  def Exec(self, feedback_fn):
12791
    """Assign nodes to a new group.
12792

12793
    """
12794
    mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12795

    
12796
    self.cfg.AssignGroupNodes(mods)
12797

    
12798
  @staticmethod
12799
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12800
    """Check for split instances after a node assignment.
12801

12802
    This method considers a series of node assignments as an atomic operation,
12803
    and returns information about split instances after applying the set of
12804
    changes.
12805

12806
    In particular, it returns information about newly split instances, and
12807
    instances that were already split, and remain so after the change.
12808

12809
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12810
    considered.
12811

12812
    @type changes: list of (node_name, new_group_uuid) pairs.
12813
    @param changes: list of node assignments to consider.
12814
    @param node_data: a dict with data for all nodes
12815
    @param instance_data: a dict with all instances to consider
12816
    @rtype: a two-tuple
12817
    @return: a list of instances that were previously okay and result split as a
12818
      consequence of this change, and a list of instances that were previously
12819
      split and this change does not fix.
12820

12821
    """
12822
    changed_nodes = dict((node, group) for node, group in changes
12823
                         if node_data[node].group != group)
12824

    
12825
    all_split_instances = set()
12826
    previously_split_instances = set()
12827

    
12828
    def InstanceNodes(instance):
12829
      return [instance.primary_node] + list(instance.secondary_nodes)
12830

    
12831
    for inst in instance_data.values():
12832
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12833
        continue
12834

    
12835
      instance_nodes = InstanceNodes(inst)
12836

    
12837
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12838
        previously_split_instances.add(inst.name)
12839

    
12840
      if len(set(changed_nodes.get(node, node_data[node].group)
12841
                 for node in instance_nodes)) > 1:
12842
        all_split_instances.add(inst.name)
12843

    
12844
    return (list(all_split_instances - previously_split_instances),
12845
            list(previously_split_instances & all_split_instances))
12846

    
12847

    
12848
class _GroupQuery(_QueryBase):
12849
  FIELDS = query.GROUP_FIELDS
12850

    
12851
  def ExpandNames(self, lu):
12852
    lu.needed_locks = {}
12853

    
12854
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12855
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12856

    
12857
    if not self.names:
12858
      self.wanted = [name_to_uuid[name]
12859
                     for name in utils.NiceSort(name_to_uuid.keys())]
12860
    else:
12861
      # Accept names to be either names or UUIDs.
12862
      missing = []
12863
      self.wanted = []
12864
      all_uuid = frozenset(self._all_groups.keys())
12865

    
12866
      for name in self.names:
12867
        if name in all_uuid:
12868
          self.wanted.append(name)
12869
        elif name in name_to_uuid:
12870
          self.wanted.append(name_to_uuid[name])
12871
        else:
12872
          missing.append(name)
12873

    
12874
      if missing:
12875
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12876
                                   utils.CommaJoin(missing),
12877
                                   errors.ECODE_NOENT)
12878

    
12879
  def DeclareLocks(self, lu, level):
12880
    pass
12881

    
12882
  def _GetQueryData(self, lu):
12883
    """Computes the list of node groups and their attributes.
12884

12885
    """
12886
    do_nodes = query.GQ_NODE in self.requested_data
12887
    do_instances = query.GQ_INST in self.requested_data
12888

    
12889
    group_to_nodes = None
12890
    group_to_instances = None
12891

    
12892
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12893
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12894
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12895
    # instance->node. Hence, we will need to process nodes even if we only need
12896
    # instance information.
12897
    if do_nodes or do_instances:
12898
      all_nodes = lu.cfg.GetAllNodesInfo()
12899
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12900
      node_to_group = {}
12901

    
12902
      for node in all_nodes.values():
12903
        if node.group in group_to_nodes:
12904
          group_to_nodes[node.group].append(node.name)
12905
          node_to_group[node.name] = node.group
12906

    
12907
      if do_instances:
12908
        all_instances = lu.cfg.GetAllInstancesInfo()
12909
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12910

    
12911
        for instance in all_instances.values():
12912
          node = instance.primary_node
12913
          if node in node_to_group:
12914
            group_to_instances[node_to_group[node]].append(instance.name)
12915

    
12916
        if not do_nodes:
12917
          # Do not pass on node information if it was not requested.
12918
          group_to_nodes = None
12919

    
12920
    return query.GroupQueryData([self._all_groups[uuid]
12921
                                 for uuid in self.wanted],
12922
                                group_to_nodes, group_to_instances)
12923

    
12924

    
12925
class LUGroupQuery(NoHooksLU):
12926
  """Logical unit for querying node groups.
12927

12928
  """
12929
  REQ_BGL = False
12930

    
12931
  def CheckArguments(self):
12932
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12933
                          self.op.output_fields, False)
12934

    
12935
  def ExpandNames(self):
12936
    self.gq.ExpandNames(self)
12937

    
12938
  def DeclareLocks(self, level):
12939
    self.gq.DeclareLocks(self, level)
12940

    
12941
  def Exec(self, feedback_fn):
12942
    return self.gq.OldStyleQuery(self)
12943

    
12944

    
12945
class LUGroupSetParams(LogicalUnit):
12946
  """Modifies the parameters of a node group.
12947

12948
  """
12949
  HPATH = "group-modify"
12950
  HTYPE = constants.HTYPE_GROUP
12951
  REQ_BGL = False
12952

    
12953
  def CheckArguments(self):
12954
    all_changes = [
12955
      self.op.ndparams,
12956
      self.op.diskparams,
12957
      self.op.alloc_policy,
12958
      self.op.hv_state,
12959
      self.op.disk_state
12960
      ]
12961

    
12962
    if all_changes.count(None) == len(all_changes):
12963
      raise errors.OpPrereqError("Please pass at least one modification",
12964
                                 errors.ECODE_INVAL)
12965

    
12966
  def ExpandNames(self):
12967
    # This raises errors.OpPrereqError on its own:
12968
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12969

    
12970
    self.needed_locks = {
12971
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12972
      }
12973

    
12974
  def CheckPrereq(self):
12975
    """Check prerequisites.
12976

12977
    """
12978
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12979

    
12980
    if self.group is None:
12981
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12982
                               (self.op.group_name, self.group_uuid))
12983

    
12984
    if self.op.ndparams:
12985
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12986
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12987
      self.new_ndparams = new_ndparams
12988

    
12989
    if self.op.diskparams:
12990
      self.new_diskparams = dict()
12991
      for templ in constants.DISK_TEMPLATES:
12992
        if templ not in self.op.diskparams:
12993
          self.op.diskparams[templ] = {}
12994
        new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
12995
                                             self.op.diskparams[templ])
12996
        utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
12997
        self.new_diskparams[templ] = new_templ_params
12998

    
12999
    if self.op.hv_state:
13000
      self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13001
                                                 self.group.hv_state_static)
13002

    
13003
    if self.op.disk_state:
13004
      self.new_disk_state = \
13005
        _MergeAndVerifyDiskState(self.op.disk_state,
13006
                                 self.group.disk_state_static)
13007

    
13008
  def BuildHooksEnv(self):
13009
    """Build hooks env.
13010

13011
    """
13012
    return {
13013
      "GROUP_NAME": self.op.group_name,
13014
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
13015
      }
13016

    
13017
  def BuildHooksNodes(self):
13018
    """Build hooks nodes.
13019

13020
    """
13021
    mn = self.cfg.GetMasterNode()
13022
    return ([mn], [mn])
13023

    
13024
  def Exec(self, feedback_fn):
13025
    """Modifies the node group.
13026

13027
    """
13028
    result = []
13029

    
13030
    if self.op.ndparams:
13031
      self.group.ndparams = self.new_ndparams
13032
      result.append(("ndparams", str(self.group.ndparams)))
13033

    
13034
    if self.op.diskparams:
13035
      self.group.diskparams = self.new_diskparams
13036
      result.append(("diskparams", str(self.group.diskparams)))
13037

    
13038
    if self.op.alloc_policy:
13039
      self.group.alloc_policy = self.op.alloc_policy
13040

    
13041
    if self.op.hv_state:
13042
      self.group.hv_state_static = self.new_hv_state
13043

    
13044
    if self.op.disk_state:
13045
      self.group.disk_state_static = self.new_disk_state
13046

    
13047
    self.cfg.Update(self.group, feedback_fn)
13048
    return result
13049

    
13050

    
13051
class LUGroupRemove(LogicalUnit):
13052
  HPATH = "group-remove"
13053
  HTYPE = constants.HTYPE_GROUP
13054
  REQ_BGL = False
13055

    
13056
  def ExpandNames(self):
13057
    # This will raises errors.OpPrereqError on its own:
13058
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13059
    self.needed_locks = {
13060
      locking.LEVEL_NODEGROUP: [self.group_uuid],
13061
      }
13062

    
13063
  def CheckPrereq(self):
13064
    """Check prerequisites.
13065

13066
    This checks that the given group name exists as a node group, that is
13067
    empty (i.e., contains no nodes), and that is not the last group of the
13068
    cluster.
13069

13070
    """
13071
    # Verify that the group is empty.
13072
    group_nodes = [node.name
13073
                   for node in self.cfg.GetAllNodesInfo().values()
13074
                   if node.group == self.group_uuid]
13075

    
13076
    if group_nodes:
13077
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
13078
                                 " nodes: %s" %
13079
                                 (self.op.group_name,
13080
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
13081
                                 errors.ECODE_STATE)
13082

    
13083
    # Verify the cluster would not be left group-less.
13084
    if len(self.cfg.GetNodeGroupList()) == 1:
13085
      raise errors.OpPrereqError("Group '%s' is the only group,"
13086
                                 " cannot be removed" %
13087
                                 self.op.group_name,
13088
                                 errors.ECODE_STATE)
13089

    
13090
  def BuildHooksEnv(self):
13091
    """Build hooks env.
13092

13093
    """
13094
    return {
13095
      "GROUP_NAME": self.op.group_name,
13096
      }
13097

    
13098
  def BuildHooksNodes(self):
13099
    """Build hooks nodes.
13100

13101
    """
13102
    mn = self.cfg.GetMasterNode()
13103
    return ([mn], [mn])
13104

    
13105
  def Exec(self, feedback_fn):
13106
    """Remove the node group.
13107

13108
    """
13109
    try:
13110
      self.cfg.RemoveNodeGroup(self.group_uuid)
13111
    except errors.ConfigurationError:
13112
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13113
                               (self.op.group_name, self.group_uuid))
13114

    
13115
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13116

    
13117

    
13118
class LUGroupRename(LogicalUnit):
13119
  HPATH = "group-rename"
13120
  HTYPE = constants.HTYPE_GROUP
13121
  REQ_BGL = False
13122

    
13123
  def ExpandNames(self):
13124
    # This raises errors.OpPrereqError on its own:
13125
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13126

    
13127
    self.needed_locks = {
13128
      locking.LEVEL_NODEGROUP: [self.group_uuid],
13129
      }
13130

    
13131
  def CheckPrereq(self):
13132
    """Check prerequisites.
13133

13134
    Ensures requested new name is not yet used.
13135

13136
    """
13137
    try:
13138
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13139
    except errors.OpPrereqError:
13140
      pass
13141
    else:
13142
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13143
                                 " node group (UUID: %s)" %
13144
                                 (self.op.new_name, new_name_uuid),
13145
                                 errors.ECODE_EXISTS)
13146

    
13147
  def BuildHooksEnv(self):
13148
    """Build hooks env.
13149

13150
    """
13151
    return {
13152
      "OLD_NAME": self.op.group_name,
13153
      "NEW_NAME": self.op.new_name,
13154
      }
13155

    
13156
  def BuildHooksNodes(self):
13157
    """Build hooks nodes.
13158

13159
    """
13160
    mn = self.cfg.GetMasterNode()
13161

    
13162
    all_nodes = self.cfg.GetAllNodesInfo()
13163
    all_nodes.pop(mn, None)
13164

    
13165
    run_nodes = [mn]
13166
    run_nodes.extend(node.name for node in all_nodes.values()
13167
                     if node.group == self.group_uuid)
13168

    
13169
    return (run_nodes, run_nodes)
13170

    
13171
  def Exec(self, feedback_fn):
13172
    """Rename the node group.
13173

13174
    """
13175
    group = self.cfg.GetNodeGroup(self.group_uuid)
13176

    
13177
    if group is None:
13178
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13179
                               (self.op.group_name, self.group_uuid))
13180

    
13181
    group.name = self.op.new_name
13182
    self.cfg.Update(group, feedback_fn)
13183

    
13184
    return self.op.new_name
13185

    
13186

    
13187
class LUGroupEvacuate(LogicalUnit):
13188
  HPATH = "group-evacuate"
13189
  HTYPE = constants.HTYPE_GROUP
13190
  REQ_BGL = False
13191

    
13192
  def ExpandNames(self):
13193
    # This raises errors.OpPrereqError on its own:
13194
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13195

    
13196
    if self.op.target_groups:
13197
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13198
                                  self.op.target_groups)
13199
    else:
13200
      self.req_target_uuids = []
13201

    
13202
    if self.group_uuid in self.req_target_uuids:
13203
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13204
                                 " as a target group (targets are %s)" %
13205
                                 (self.group_uuid,
13206
                                  utils.CommaJoin(self.req_target_uuids)),
13207
                                 errors.ECODE_INVAL)
13208

    
13209
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13210

    
13211
    self.share_locks = _ShareAll()
13212
    self.needed_locks = {
13213
      locking.LEVEL_INSTANCE: [],
13214
      locking.LEVEL_NODEGROUP: [],
13215
      locking.LEVEL_NODE: [],
13216
      }
13217

    
13218
  def DeclareLocks(self, level):
13219
    if level == locking.LEVEL_INSTANCE:
13220
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
13221

    
13222
      # Lock instances optimistically, needs verification once node and group
13223
      # locks have been acquired
13224
      self.needed_locks[locking.LEVEL_INSTANCE] = \
13225
        self.cfg.GetNodeGroupInstances(self.group_uuid)
13226

    
13227
    elif level == locking.LEVEL_NODEGROUP:
13228
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13229

    
13230
      if self.req_target_uuids:
13231
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
13232

    
13233
        # Lock all groups used by instances optimistically; this requires going
13234
        # via the node before it's locked, requiring verification later on
13235
        lock_groups.update(group_uuid
13236
                           for instance_name in
13237
                             self.owned_locks(locking.LEVEL_INSTANCE)
13238
                           for group_uuid in
13239
                             self.cfg.GetInstanceNodeGroups(instance_name))
13240
      else:
13241
        # No target groups, need to lock all of them
13242
        lock_groups = locking.ALL_SET
13243

    
13244
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13245

    
13246
    elif level == locking.LEVEL_NODE:
13247
      # This will only lock the nodes in the group to be evacuated which
13248
      # contain actual instances
13249
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13250
      self._LockInstancesNodes()
13251

    
13252
      # Lock all nodes in group to be evacuated and target groups
13253
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13254
      assert self.group_uuid in owned_groups
13255
      member_nodes = [node_name
13256
                      for group in owned_groups
13257
                      for node_name in self.cfg.GetNodeGroup(group).members]
13258
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13259

    
13260
  def CheckPrereq(self):
13261
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13262
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13263
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13264

    
13265
    assert owned_groups.issuperset(self.req_target_uuids)
13266
    assert self.group_uuid in owned_groups
13267

    
13268
    # Check if locked instances are still correct
13269
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13270

    
13271
    # Get instance information
13272
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13273

    
13274
    # Check if node groups for locked instances are still correct
13275
    for instance_name in owned_instances:
13276
      inst = self.instances[instance_name]
13277
      assert owned_nodes.issuperset(inst.all_nodes), \
13278
        "Instance %s's nodes changed while we kept the lock" % instance_name
13279

    
13280
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13281
                                             owned_groups)
13282

    
13283
      assert self.group_uuid in inst_groups, \
13284
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13285

    
13286
    if self.req_target_uuids:
13287
      # User requested specific target groups
13288
      self.target_uuids = self.req_target_uuids
13289
    else:
13290
      # All groups except the one to be evacuated are potential targets
13291
      self.target_uuids = [group_uuid for group_uuid in owned_groups
13292
                           if group_uuid != self.group_uuid]
13293

    
13294
      if not self.target_uuids:
13295
        raise errors.OpPrereqError("There are no possible target groups",
13296
                                   errors.ECODE_INVAL)
13297

    
13298
  def BuildHooksEnv(self):
13299
    """Build hooks env.
13300

13301
    """
13302
    return {
13303
      "GROUP_NAME": self.op.group_name,
13304
      "TARGET_GROUPS": " ".join(self.target_uuids),
13305
      }
13306

    
13307
  def BuildHooksNodes(self):
13308
    """Build hooks nodes.
13309

13310
    """
13311
    mn = self.cfg.GetMasterNode()
13312

    
13313
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13314

    
13315
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13316

    
13317
    return (run_nodes, run_nodes)
13318

    
13319
  def Exec(self, feedback_fn):
13320
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13321

    
13322
    assert self.group_uuid not in self.target_uuids
13323

    
13324
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13325
                     instances=instances, target_groups=self.target_uuids)
13326

    
13327
    ial.Run(self.op.iallocator)
13328

    
13329
    if not ial.success:
13330
      raise errors.OpPrereqError("Can't compute group evacuation using"
13331
                                 " iallocator '%s': %s" %
13332
                                 (self.op.iallocator, ial.info),
13333
                                 errors.ECODE_NORES)
13334

    
13335
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13336

    
13337
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13338
                 len(jobs), self.op.group_name)
13339

    
13340
    return ResultWithJobs(jobs)
13341

    
13342

    
13343
class TagsLU(NoHooksLU): # pylint: disable=W0223
13344
  """Generic tags LU.
13345

13346
  This is an abstract class which is the parent of all the other tags LUs.
13347

13348
  """
13349
  def ExpandNames(self):
13350
    self.group_uuid = None
13351
    self.needed_locks = {}
13352
    if self.op.kind == constants.TAG_NODE:
13353
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13354
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
13355
    elif self.op.kind == constants.TAG_INSTANCE:
13356
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13357
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13358
    elif self.op.kind == constants.TAG_NODEGROUP:
13359
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13360

    
13361
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13362
    # not possible to acquire the BGL based on opcode parameters)
13363

    
13364
  def CheckPrereq(self):
13365
    """Check prerequisites.
13366

13367
    """
13368
    if self.op.kind == constants.TAG_CLUSTER:
13369
      self.target = self.cfg.GetClusterInfo()
13370
    elif self.op.kind == constants.TAG_NODE:
13371
      self.target = self.cfg.GetNodeInfo(self.op.name)
13372
    elif self.op.kind == constants.TAG_INSTANCE:
13373
      self.target = self.cfg.GetInstanceInfo(self.op.name)
13374
    elif self.op.kind == constants.TAG_NODEGROUP:
13375
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
13376
    else:
13377
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13378
                                 str(self.op.kind), errors.ECODE_INVAL)
13379

    
13380

    
13381
class LUTagsGet(TagsLU):
13382
  """Returns the tags of a given object.
13383

13384
  """
13385
  REQ_BGL = False
13386

    
13387
  def ExpandNames(self):
13388
    TagsLU.ExpandNames(self)
13389

    
13390
    # Share locks as this is only a read operation
13391
    self.share_locks = _ShareAll()
13392

    
13393
  def Exec(self, feedback_fn):
13394
    """Returns the tag list.
13395

13396
    """
13397
    return list(self.target.GetTags())
13398

    
13399

    
13400
class LUTagsSearch(NoHooksLU):
13401
  """Searches the tags for a given pattern.
13402

13403
  """
13404
  REQ_BGL = False
13405

    
13406
  def ExpandNames(self):
13407
    self.needed_locks = {}
13408

    
13409
  def CheckPrereq(self):
13410
    """Check prerequisites.
13411

13412
    This checks the pattern passed for validity by compiling it.
13413

13414
    """
13415
    try:
13416
      self.re = re.compile(self.op.pattern)
13417
    except re.error, err:
13418
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13419
                                 (self.op.pattern, err), errors.ECODE_INVAL)
13420

    
13421
  def Exec(self, feedback_fn):
13422
    """Returns the tag list.
13423

13424
    """
13425
    cfg = self.cfg
13426
    tgts = [("/cluster", cfg.GetClusterInfo())]
13427
    ilist = cfg.GetAllInstancesInfo().values()
13428
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13429
    nlist = cfg.GetAllNodesInfo().values()
13430
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13431
    tgts.extend(("/nodegroup/%s" % n.name, n)
13432
                for n in cfg.GetAllNodeGroupsInfo().values())
13433
    results = []
13434
    for path, target in tgts:
13435
      for tag in target.GetTags():
13436
        if self.re.search(tag):
13437
          results.append((path, tag))
13438
    return results
13439

    
13440

    
13441
class LUTagsSet(TagsLU):
13442
  """Sets a tag on a given object.
13443

13444
  """
13445
  REQ_BGL = False
13446

    
13447
  def CheckPrereq(self):
13448
    """Check prerequisites.
13449

13450
    This checks the type and length of the tag name and value.
13451

13452
    """
13453
    TagsLU.CheckPrereq(self)
13454
    for tag in self.op.tags:
13455
      objects.TaggableObject.ValidateTag(tag)
13456

    
13457
  def Exec(self, feedback_fn):
13458
    """Sets the tag.
13459

13460
    """
13461
    try:
13462
      for tag in self.op.tags:
13463
        self.target.AddTag(tag)
13464
    except errors.TagError, err:
13465
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
13466
    self.cfg.Update(self.target, feedback_fn)
13467

    
13468

    
13469
class LUTagsDel(TagsLU):
13470
  """Delete a list of tags from a given object.
13471

13472
  """
13473
  REQ_BGL = False
13474

    
13475
  def CheckPrereq(self):
13476
    """Check prerequisites.
13477

13478
    This checks that we have the given tag.
13479

13480
    """
13481
    TagsLU.CheckPrereq(self)
13482
    for tag in self.op.tags:
13483
      objects.TaggableObject.ValidateTag(tag)
13484
    del_tags = frozenset(self.op.tags)
13485
    cur_tags = self.target.GetTags()
13486

    
13487
    diff_tags = del_tags - cur_tags
13488
    if diff_tags:
13489
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
13490
      raise errors.OpPrereqError("Tag(s) %s not found" %
13491
                                 (utils.CommaJoin(diff_names), ),
13492
                                 errors.ECODE_NOENT)
13493

    
13494
  def Exec(self, feedback_fn):
13495
    """Remove the tag from the object.
13496

13497
    """
13498
    for tag in self.op.tags:
13499
      self.target.RemoveTag(tag)
13500
    self.cfg.Update(self.target, feedback_fn)
13501

    
13502

    
13503
class LUTestDelay(NoHooksLU):
13504
  """Sleep for a specified amount of time.
13505

13506
  This LU sleeps on the master and/or nodes for a specified amount of
13507
  time.
13508

13509
  """
13510
  REQ_BGL = False
13511

    
13512
  def ExpandNames(self):
13513
    """Expand names and set required locks.
13514

13515
    This expands the node list, if any.
13516

13517
    """
13518
    self.needed_locks = {}
13519
    if self.op.on_nodes:
13520
      # _GetWantedNodes can be used here, but is not always appropriate to use
13521
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13522
      # more information.
13523
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13524
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13525

    
13526
  def _TestDelay(self):
13527
    """Do the actual sleep.
13528

13529
    """
13530
    if self.op.on_master:
13531
      if not utils.TestDelay(self.op.duration):
13532
        raise errors.OpExecError("Error during master delay test")
13533
    if self.op.on_nodes:
13534
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13535
      for node, node_result in result.items():
13536
        node_result.Raise("Failure during rpc call to node %s" % node)
13537

    
13538
  def Exec(self, feedback_fn):
13539
    """Execute the test delay opcode, with the wanted repetitions.
13540

13541
    """
13542
    if self.op.repeat == 0:
13543
      self._TestDelay()
13544
    else:
13545
      top_value = self.op.repeat - 1
13546
      for i in range(self.op.repeat):
13547
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13548
        self._TestDelay()
13549

    
13550

    
13551
class LUTestJqueue(NoHooksLU):
13552
  """Utility LU to test some aspects of the job queue.
13553

13554
  """
13555
  REQ_BGL = False
13556

    
13557
  # Must be lower than default timeout for WaitForJobChange to see whether it
13558
  # notices changed jobs
13559
  _CLIENT_CONNECT_TIMEOUT = 20.0
13560
  _CLIENT_CONFIRM_TIMEOUT = 60.0
13561

    
13562
  @classmethod
13563
  def _NotifyUsingSocket(cls, cb, errcls):
13564
    """Opens a Unix socket and waits for another program to connect.
13565

13566
    @type cb: callable
13567
    @param cb: Callback to send socket name to client
13568
    @type errcls: class
13569
    @param errcls: Exception class to use for errors
13570

13571
    """
13572
    # Using a temporary directory as there's no easy way to create temporary
13573
    # sockets without writing a custom loop around tempfile.mktemp and
13574
    # socket.bind
13575
    tmpdir = tempfile.mkdtemp()
13576
    try:
13577
      tmpsock = utils.PathJoin(tmpdir, "sock")
13578

    
13579
      logging.debug("Creating temporary socket at %s", tmpsock)
13580
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13581
      try:
13582
        sock.bind(tmpsock)
13583
        sock.listen(1)
13584

    
13585
        # Send details to client
13586
        cb(tmpsock)
13587

    
13588
        # Wait for client to connect before continuing
13589
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13590
        try:
13591
          (conn, _) = sock.accept()
13592
        except socket.error, err:
13593
          raise errcls("Client didn't connect in time (%s)" % err)
13594
      finally:
13595
        sock.close()
13596
    finally:
13597
      # Remove as soon as client is connected
13598
      shutil.rmtree(tmpdir)
13599

    
13600
    # Wait for client to close
13601
    try:
13602
      try:
13603
        # pylint: disable=E1101
13604
        # Instance of '_socketobject' has no ... member
13605
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13606
        conn.recv(1)
13607
      except socket.error, err:
13608
        raise errcls("Client failed to confirm notification (%s)" % err)
13609
    finally:
13610
      conn.close()
13611

    
13612
  def _SendNotification(self, test, arg, sockname):
13613
    """Sends a notification to the client.
13614

13615
    @type test: string
13616
    @param test: Test name
13617
    @param arg: Test argument (depends on test)
13618
    @type sockname: string
13619
    @param sockname: Socket path
13620

13621
    """
13622
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13623

    
13624
  def _Notify(self, prereq, test, arg):
13625
    """Notifies the client of a test.
13626

13627
    @type prereq: bool
13628
    @param prereq: Whether this is a prereq-phase test
13629
    @type test: string
13630
    @param test: Test name
13631
    @param arg: Test argument (depends on test)
13632

13633
    """
13634
    if prereq:
13635
      errcls = errors.OpPrereqError
13636
    else:
13637
      errcls = errors.OpExecError
13638

    
13639
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13640
                                                  test, arg),
13641
                                   errcls)
13642

    
13643
  def CheckArguments(self):
13644
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13645
    self.expandnames_calls = 0
13646

    
13647
  def ExpandNames(self):
13648
    checkargs_calls = getattr(self, "checkargs_calls", 0)
13649
    if checkargs_calls < 1:
13650
      raise errors.ProgrammerError("CheckArguments was not called")
13651

    
13652
    self.expandnames_calls += 1
13653

    
13654
    if self.op.notify_waitlock:
13655
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
13656

    
13657
    self.LogInfo("Expanding names")
13658

    
13659
    # Get lock on master node (just to get a lock, not for a particular reason)
13660
    self.needed_locks = {
13661
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13662
      }
13663

    
13664
  def Exec(self, feedback_fn):
13665
    if self.expandnames_calls < 1:
13666
      raise errors.ProgrammerError("ExpandNames was not called")
13667

    
13668
    if self.op.notify_exec:
13669
      self._Notify(False, constants.JQT_EXEC, None)
13670

    
13671
    self.LogInfo("Executing")
13672

    
13673
    if self.op.log_messages:
13674
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13675
      for idx, msg in enumerate(self.op.log_messages):
13676
        self.LogInfo("Sending log message %s", idx + 1)
13677
        feedback_fn(constants.JQT_MSGPREFIX + msg)
13678
        # Report how many test messages have been sent
13679
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13680

    
13681
    if self.op.fail:
13682
      raise errors.OpExecError("Opcode failure was requested")
13683

    
13684
    return True
13685

    
13686

    
13687
class IAllocator(object):
13688
  """IAllocator framework.
13689

13690
  An IAllocator instance has three sets of attributes:
13691
    - cfg that is needed to query the cluster
13692
    - input data (all members of the _KEYS class attribute are required)
13693
    - four buffer attributes (in|out_data|text), that represent the
13694
      input (to the external script) in text and data structure format,
13695
      and the output from it, again in two formats
13696
    - the result variables from the script (success, info, nodes) for
13697
      easy usage
13698

13699
  """
13700
  # pylint: disable=R0902
13701
  # lots of instance attributes
13702

    
13703
  def __init__(self, cfg, rpc_runner, mode, **kwargs):
13704
    self.cfg = cfg
13705
    self.rpc = rpc_runner
13706
    # init buffer variables
13707
    self.in_text = self.out_text = self.in_data = self.out_data = None
13708
    # init all input fields so that pylint is happy
13709
    self.mode = mode
13710
    self.memory = self.disks = self.disk_template = None
13711
    self.os = self.tags = self.nics = self.vcpus = None
13712
    self.hypervisor = None
13713
    self.relocate_from = None
13714
    self.name = None
13715
    self.instances = None
13716
    self.evac_mode = None
13717
    self.target_groups = []
13718
    # computed fields
13719
    self.required_nodes = None
13720
    # init result fields
13721
    self.success = self.info = self.result = None
13722

    
13723
    try:
13724
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13725
    except KeyError:
13726
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13727
                                   " IAllocator" % self.mode)
13728

    
13729
    keyset = [n for (n, _) in keydata]
13730

    
13731
    for key in kwargs:
13732
      if key not in keyset:
13733
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13734
                                     " IAllocator" % key)
13735
      setattr(self, key, kwargs[key])
13736

    
13737
    for key in keyset:
13738
      if key not in kwargs:
13739
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13740
                                     " IAllocator" % key)
13741
    self._BuildInputData(compat.partial(fn, self), keydata)
13742

    
13743
  def _ComputeClusterData(self):
13744
    """Compute the generic allocator input data.
13745

13746
    This is the data that is independent of the actual operation.
13747

13748
    """
13749
    cfg = self.cfg
13750
    cluster_info = cfg.GetClusterInfo()
13751
    # cluster data
13752
    data = {
13753
      "version": constants.IALLOCATOR_VERSION,
13754
      "cluster_name": cfg.GetClusterName(),
13755
      "cluster_tags": list(cluster_info.GetTags()),
13756
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13757
      # we don't have job IDs
13758
      }
13759
    ninfo = cfg.GetAllNodesInfo()
13760
    iinfo = cfg.GetAllInstancesInfo().values()
13761
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13762

    
13763
    # node data
13764
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13765

    
13766
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13767
      hypervisor_name = self.hypervisor
13768
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13769
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13770
    else:
13771
      hypervisor_name = cluster_info.primary_hypervisor
13772

    
13773
    node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
13774
                                        [hypervisor_name])
13775
    node_iinfo = \
13776
      self.rpc.call_all_instances_info(node_list,
13777
                                       cluster_info.enabled_hypervisors)
13778

    
13779
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13780

    
13781
    config_ndata = self._ComputeBasicNodeData(ninfo)
13782
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13783
                                                 i_list, config_ndata)
13784
    assert len(data["nodes"]) == len(ninfo), \
13785
        "Incomplete node data computed"
13786

    
13787
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13788

    
13789
    self.in_data = data
13790

    
13791
  @staticmethod
13792
  def _ComputeNodeGroupData(cfg):
13793
    """Compute node groups data.
13794

13795
    """
13796
    ng = dict((guuid, {
13797
      "name": gdata.name,
13798
      "alloc_policy": gdata.alloc_policy,
13799
      })
13800
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13801

    
13802
    return ng
13803

    
13804
  @staticmethod
13805
  def _ComputeBasicNodeData(node_cfg):
13806
    """Compute global node data.
13807

13808
    @rtype: dict
13809
    @returns: a dict of name: (node dict, node config)
13810

13811
    """
13812
    # fill in static (config-based) values
13813
    node_results = dict((ninfo.name, {
13814
      "tags": list(ninfo.GetTags()),
13815
      "primary_ip": ninfo.primary_ip,
13816
      "secondary_ip": ninfo.secondary_ip,
13817
      "offline": ninfo.offline,
13818
      "drained": ninfo.drained,
13819
      "master_candidate": ninfo.master_candidate,
13820
      "group": ninfo.group,
13821
      "master_capable": ninfo.master_capable,
13822
      "vm_capable": ninfo.vm_capable,
13823
      })
13824
      for ninfo in node_cfg.values())
13825

    
13826
    return node_results
13827

    
13828
  @staticmethod
13829
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13830
                              node_results):
13831
    """Compute global node data.
13832

13833
    @param node_results: the basic node structures as filled from the config
13834

13835
    """
13836
    #TODO(dynmem): compute the right data on MAX and MIN memory
13837
    # make a copy of the current dict
13838
    node_results = dict(node_results)
13839
    for nname, nresult in node_data.items():
13840
      assert nname in node_results, "Missing basic data for node %s" % nname
13841
      ninfo = node_cfg[nname]
13842

    
13843
      if not (ninfo.offline or ninfo.drained):
13844
        nresult.Raise("Can't get data for node %s" % nname)
13845
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13846
                                nname)
13847
        remote_info = _MakeLegacyNodeInfo(nresult.payload)
13848

    
13849
        for attr in ["memory_total", "memory_free", "memory_dom0",
13850
                     "vg_size", "vg_free", "cpu_total"]:
13851
          if attr not in remote_info:
13852
            raise errors.OpExecError("Node '%s' didn't return attribute"
13853
                                     " '%s'" % (nname, attr))
13854
          if not isinstance(remote_info[attr], int):
13855
            raise errors.OpExecError("Node '%s' returned invalid value"
13856
                                     " for '%s': %s" %
13857
                                     (nname, attr, remote_info[attr]))
13858
        # compute memory used by primary instances
13859
        i_p_mem = i_p_up_mem = 0
13860
        for iinfo, beinfo in i_list:
13861
          if iinfo.primary_node == nname:
13862
            i_p_mem += beinfo[constants.BE_MAXMEM]
13863
            if iinfo.name not in node_iinfo[nname].payload:
13864
              i_used_mem = 0
13865
            else:
13866
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13867
            i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
13868
            remote_info["memory_free"] -= max(0, i_mem_diff)
13869

    
13870
            if iinfo.admin_state == constants.ADMINST_UP:
13871
              i_p_up_mem += beinfo[constants.BE_MAXMEM]
13872

    
13873
        # compute memory used by instances
13874
        pnr_dyn = {
13875
          "total_memory": remote_info["memory_total"],
13876
          "reserved_memory": remote_info["memory_dom0"],
13877
          "free_memory": remote_info["memory_free"],
13878
          "total_disk": remote_info["vg_size"],
13879
          "free_disk": remote_info["vg_free"],
13880
          "total_cpus": remote_info["cpu_total"],
13881
          "i_pri_memory": i_p_mem,
13882
          "i_pri_up_memory": i_p_up_mem,
13883
          }
13884
        pnr_dyn.update(node_results[nname])
13885
        node_results[nname] = pnr_dyn
13886

    
13887
    return node_results
13888

    
13889
  @staticmethod
13890
  def _ComputeInstanceData(cluster_info, i_list):
13891
    """Compute global instance data.
13892

13893
    """
13894
    instance_data = {}
13895
    for iinfo, beinfo in i_list:
13896
      nic_data = []
13897
      for nic in iinfo.nics:
13898
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13899
        nic_dict = {
13900
          "mac": nic.mac,
13901
          "ip": nic.ip,
13902
          "mode": filled_params[constants.NIC_MODE],
13903
          "link": filled_params[constants.NIC_LINK],
13904
          }
13905
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13906
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13907
        nic_data.append(nic_dict)
13908
      pir = {
13909
        "tags": list(iinfo.GetTags()),
13910
        "admin_state": iinfo.admin_state,
13911
        "vcpus": beinfo[constants.BE_VCPUS],
13912
        "memory": beinfo[constants.BE_MAXMEM],
13913
        "os": iinfo.os,
13914
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13915
        "nics": nic_data,
13916
        "disks": [{constants.IDISK_SIZE: dsk.size,
13917
                   constants.IDISK_MODE: dsk.mode}
13918
                  for dsk in iinfo.disks],
13919
        "disk_template": iinfo.disk_template,
13920
        "hypervisor": iinfo.hypervisor,
13921
        }
13922
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13923
                                                 pir["disks"])
13924
      instance_data[iinfo.name] = pir
13925

    
13926
    return instance_data
13927

    
13928
  def _AddNewInstance(self):
13929
    """Add new instance data to allocator structure.
13930

13931
    This in combination with _AllocatorGetClusterData will create the
13932
    correct structure needed as input for the allocator.
13933

13934
    The checks for the completeness of the opcode must have already been
13935
    done.
13936

13937
    """
13938
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13939

    
13940
    if self.disk_template in constants.DTS_INT_MIRROR:
13941
      self.required_nodes = 2
13942
    else:
13943
      self.required_nodes = 1
13944

    
13945
    request = {
13946
      "name": self.name,
13947
      "disk_template": self.disk_template,
13948
      "tags": self.tags,
13949
      "os": self.os,
13950
      "vcpus": self.vcpus,
13951
      "memory": self.memory,
13952
      "disks": self.disks,
13953
      "disk_space_total": disk_space,
13954
      "nics": self.nics,
13955
      "required_nodes": self.required_nodes,
13956
      "hypervisor": self.hypervisor,
13957
      }
13958

    
13959
    return request
13960

    
13961
  def _AddRelocateInstance(self):
13962
    """Add relocate instance data to allocator structure.
13963

13964
    This in combination with _IAllocatorGetClusterData will create the
13965
    correct structure needed as input for the allocator.
13966

13967
    The checks for the completeness of the opcode must have already been
13968
    done.
13969

13970
    """
13971
    instance = self.cfg.GetInstanceInfo(self.name)
13972
    if instance is None:
13973
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13974
                                   " IAllocator" % self.name)
13975

    
13976
    if instance.disk_template not in constants.DTS_MIRRORED:
13977
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13978
                                 errors.ECODE_INVAL)
13979

    
13980
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13981
        len(instance.secondary_nodes) != 1:
13982
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13983
                                 errors.ECODE_STATE)
13984

    
13985
    self.required_nodes = 1
13986
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13987
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13988

    
13989
    request = {
13990
      "name": self.name,
13991
      "disk_space_total": disk_space,
13992
      "required_nodes": self.required_nodes,
13993
      "relocate_from": self.relocate_from,
13994
      }
13995
    return request
13996

    
13997
  def _AddNodeEvacuate(self):
13998
    """Get data for node-evacuate requests.
13999

14000
    """
14001
    return {
14002
      "instances": self.instances,
14003
      "evac_mode": self.evac_mode,
14004
      }
14005

    
14006
  def _AddChangeGroup(self):
14007
    """Get data for node-evacuate requests.
14008

14009
    """
14010
    return {
14011
      "instances": self.instances,
14012
      "target_groups": self.target_groups,
14013
      }
14014

    
14015
  def _BuildInputData(self, fn, keydata):
14016
    """Build input data structures.
14017

14018
    """
14019
    self._ComputeClusterData()
14020

    
14021
    request = fn()
14022
    request["type"] = self.mode
14023
    for keyname, keytype in keydata:
14024
      if keyname not in request:
14025
        raise errors.ProgrammerError("Request parameter %s is missing" %
14026
                                     keyname)
14027
      val = request[keyname]
14028
      if not keytype(val):
14029
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
14030
                                     " validation, value %s, expected"
14031
                                     " type %s" % (keyname, val, keytype))
14032
    self.in_data["request"] = request
14033

    
14034
    self.in_text = serializer.Dump(self.in_data)
14035

    
14036
  _STRING_LIST = ht.TListOf(ht.TString)
14037
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14038
     # pylint: disable=E1101
14039
     # Class '...' has no 'OP_ID' member
14040
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14041
                          opcodes.OpInstanceMigrate.OP_ID,
14042
                          opcodes.OpInstanceReplaceDisks.OP_ID])
14043
     })))
14044

    
14045
  _NEVAC_MOVED = \
14046
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
14047
                       ht.TItems([ht.TNonEmptyString,
14048
                                  ht.TNonEmptyString,
14049
                                  ht.TListOf(ht.TNonEmptyString),
14050
                                 ])))
14051
  _NEVAC_FAILED = \
14052
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
14053
                       ht.TItems([ht.TNonEmptyString,
14054
                                  ht.TMaybeString,
14055
                                 ])))
14056
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14057
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14058

    
14059
  _MODE_DATA = {
14060
    constants.IALLOCATOR_MODE_ALLOC:
14061
      (_AddNewInstance,
14062
       [
14063
        ("name", ht.TString),
14064
        ("memory", ht.TInt),
14065
        ("disks", ht.TListOf(ht.TDict)),
14066
        ("disk_template", ht.TString),
14067
        ("os", ht.TString),
14068
        ("tags", _STRING_LIST),
14069
        ("nics", ht.TListOf(ht.TDict)),
14070
        ("vcpus", ht.TInt),
14071
        ("hypervisor", ht.TString),
14072
        ], ht.TList),
14073
    constants.IALLOCATOR_MODE_RELOC:
14074
      (_AddRelocateInstance,
14075
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14076
       ht.TList),
14077
     constants.IALLOCATOR_MODE_NODE_EVAC:
14078
      (_AddNodeEvacuate, [
14079
        ("instances", _STRING_LIST),
14080
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14081
        ], _NEVAC_RESULT),
14082
     constants.IALLOCATOR_MODE_CHG_GROUP:
14083
      (_AddChangeGroup, [
14084
        ("instances", _STRING_LIST),
14085
        ("target_groups", _STRING_LIST),
14086
        ], _NEVAC_RESULT),
14087
    }
14088

    
14089
  def Run(self, name, validate=True, call_fn=None):
14090
    """Run an instance allocator and return the results.
14091

14092
    """
14093
    if call_fn is None:
14094
      call_fn = self.rpc.call_iallocator_runner
14095

    
14096
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14097
    result.Raise("Failure while running the iallocator script")
14098

    
14099
    self.out_text = result.payload
14100
    if validate:
14101
      self._ValidateResult()
14102

    
14103
  def _ValidateResult(self):
14104
    """Process the allocator results.
14105

14106
    This will process and if successful save the result in
14107
    self.out_data and the other parameters.
14108

14109
    """
14110
    try:
14111
      rdict = serializer.Load(self.out_text)
14112
    except Exception, err:
14113
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14114

    
14115
    if not isinstance(rdict, dict):
14116
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
14117

    
14118
    # TODO: remove backwards compatiblity in later versions
14119
    if "nodes" in rdict and "result" not in rdict:
14120
      rdict["result"] = rdict["nodes"]
14121
      del rdict["nodes"]
14122

    
14123
    for key in "success", "info", "result":
14124
      if key not in rdict:
14125
        raise errors.OpExecError("Can't parse iallocator results:"
14126
                                 " missing key '%s'" % key)
14127
      setattr(self, key, rdict[key])
14128

    
14129
    if not self._result_check(self.result):
14130
      raise errors.OpExecError("Iallocator returned invalid result,"
14131
                               " expected %s, got %s" %
14132
                               (self._result_check, self.result),
14133
                               errors.ECODE_INVAL)
14134

    
14135
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
14136
      assert self.relocate_from is not None
14137
      assert self.required_nodes == 1
14138

    
14139
      node2group = dict((name, ndata["group"])
14140
                        for (name, ndata) in self.in_data["nodes"].items())
14141

    
14142
      fn = compat.partial(self._NodesToGroups, node2group,
14143
                          self.in_data["nodegroups"])
14144

    
14145
      instance = self.cfg.GetInstanceInfo(self.name)
14146
      request_groups = fn(self.relocate_from + [instance.primary_node])
14147
      result_groups = fn(rdict["result"] + [instance.primary_node])
14148

    
14149
      if self.success and not set(result_groups).issubset(request_groups):
14150
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14151
                                 " differ from original groups (%s)" %
14152
                                 (utils.CommaJoin(result_groups),
14153
                                  utils.CommaJoin(request_groups)))
14154

    
14155
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14156
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14157

    
14158
    self.out_data = rdict
14159

    
14160
  @staticmethod
14161
  def _NodesToGroups(node2group, groups, nodes):
14162
    """Returns a list of unique group names for a list of nodes.
14163

14164
    @type node2group: dict
14165
    @param node2group: Map from node name to group UUID
14166
    @type groups: dict
14167
    @param groups: Group information
14168
    @type nodes: list
14169
    @param nodes: Node names
14170

14171
    """
14172
    result = set()
14173

    
14174
    for node in nodes:
14175
      try:
14176
        group_uuid = node2group[node]
14177
      except KeyError:
14178
        # Ignore unknown node
14179
        pass
14180
      else:
14181
        try:
14182
          group = groups[group_uuid]
14183
        except KeyError:
14184
          # Can't find group, let's use UUID
14185
          group_name = group_uuid
14186
        else:
14187
          group_name = group["name"]
14188

    
14189
        result.add(group_name)
14190

    
14191
    return sorted(result)
14192

    
14193

    
14194
class LUTestAllocator(NoHooksLU):
14195
  """Run allocator tests.
14196

14197
  This LU runs the allocator tests
14198

14199
  """
14200
  def CheckPrereq(self):
14201
    """Check prerequisites.
14202

14203
    This checks the opcode parameters depending on the director and mode test.
14204

14205
    """
14206
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14207
      for attr in ["memory", "disks", "disk_template",
14208
                   "os", "tags", "nics", "vcpus"]:
14209
        if not hasattr(self.op, attr):
14210
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14211
                                     attr, errors.ECODE_INVAL)
14212
      iname = self.cfg.ExpandInstanceName(self.op.name)
14213
      if iname is not None:
14214
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14215
                                   iname, errors.ECODE_EXISTS)
14216
      if not isinstance(self.op.nics, list):
14217
        raise errors.OpPrereqError("Invalid parameter 'nics'",
14218
                                   errors.ECODE_INVAL)
14219
      if not isinstance(self.op.disks, list):
14220
        raise errors.OpPrereqError("Invalid parameter 'disks'",
14221
                                   errors.ECODE_INVAL)
14222
      for row in self.op.disks:
14223
        if (not isinstance(row, dict) or
14224
            constants.IDISK_SIZE not in row or
14225
            not isinstance(row[constants.IDISK_SIZE], int) or
14226
            constants.IDISK_MODE not in row or
14227
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14228
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
14229
                                     " parameter", errors.ECODE_INVAL)
14230
      if self.op.hypervisor is None:
14231
        self.op.hypervisor = self.cfg.GetHypervisorType()
14232
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14233
      fname = _ExpandInstanceName(self.cfg, self.op.name)
14234
      self.op.name = fname
14235
      self.relocate_from = \
14236
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14237
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14238
                          constants.IALLOCATOR_MODE_NODE_EVAC):
14239
      if not self.op.instances:
14240
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14241
      self.op.instances = _GetWantedInstances(self, self.op.instances)
14242
    else:
14243
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14244
                                 self.op.mode, errors.ECODE_INVAL)
14245

    
14246
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14247
      if self.op.allocator is None:
14248
        raise errors.OpPrereqError("Missing allocator name",
14249
                                   errors.ECODE_INVAL)
14250
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14251
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
14252
                                 self.op.direction, errors.ECODE_INVAL)
14253

    
14254
  def Exec(self, feedback_fn):
14255
    """Run the allocator test.
14256

14257
    """
14258
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14259
      ial = IAllocator(self.cfg, self.rpc,
14260
                       mode=self.op.mode,
14261
                       name=self.op.name,
14262
                       memory=self.op.memory,
14263
                       disks=self.op.disks,
14264
                       disk_template=self.op.disk_template,
14265
                       os=self.op.os,
14266
                       tags=self.op.tags,
14267
                       nics=self.op.nics,
14268
                       vcpus=self.op.vcpus,
14269
                       hypervisor=self.op.hypervisor,
14270
                       )
14271
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14272
      ial = IAllocator(self.cfg, self.rpc,
14273
                       mode=self.op.mode,
14274
                       name=self.op.name,
14275
                       relocate_from=list(self.relocate_from),
14276
                       )
14277
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14278
      ial = IAllocator(self.cfg, self.rpc,
14279
                       mode=self.op.mode,
14280
                       instances=self.op.instances,
14281
                       target_groups=self.op.target_groups)
14282
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14283
      ial = IAllocator(self.cfg, self.rpc,
14284
                       mode=self.op.mode,
14285
                       instances=self.op.instances,
14286
                       evac_mode=self.op.evac_mode)
14287
    else:
14288
      raise errors.ProgrammerError("Uncatched mode %s in"
14289
                                   " LUTestAllocator.Exec", self.op.mode)
14290

    
14291
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
14292
      result = ial.in_text
14293
    else:
14294
      ial.Run(self.op.allocator, validate=False)
14295
      result = ial.out_text
14296
    return result
14297

    
14298

    
14299
#: Query type implementations
14300
_QUERY_IMPL = {
14301
  constants.QR_INSTANCE: _InstanceQuery,
14302
  constants.QR_NODE: _NodeQuery,
14303
  constants.QR_GROUP: _GroupQuery,
14304
  constants.QR_OS: _OsQuery,
14305
  }
14306

    
14307
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14308

    
14309

    
14310
def _GetQueryImplementation(name):
14311
  """Returns the implemtnation for a query type.
14312

14313
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
14314

14315
  """
14316
  try:
14317
    return _QUERY_IMPL[name]
14318
  except KeyError:
14319
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14320
                               errors.ECODE_INVAL)