Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ c9c41373

History | View | Annotate | Download (393.4 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import query
58
from ganeti import qlang
59
from ganeti import opcodes
60

    
61
import ganeti.masterd.instance # pylint: disable-msg=W0611
62

    
63

    
64
def _SupportsOob(cfg, node):
65
  """Tells if node supports OOB.
66

67
  @type cfg: L{config.ConfigWriter}
68
  @param cfg: The cluster configuration
69
  @type node: L{objects.Node}
70
  @param node: The node
71
  @return: The OOB script if supported or an empty string otherwise
72

73
  """
74
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
75

    
76

    
77
# End types
78
class LogicalUnit(object):
79
  """Logical Unit base class.
80

81
  Subclasses must follow these rules:
82
    - implement ExpandNames
83
    - implement CheckPrereq (except when tasklets are used)
84
    - implement Exec (except when tasklets are used)
85
    - implement BuildHooksEnv
86
    - redefine HPATH and HTYPE
87
    - optionally redefine their run requirements:
88
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
89

90
  Note that all commands require root permissions.
91

92
  @ivar dry_run_result: the value (if any) that will be returned to the caller
93
      in dry-run mode (signalled by opcode dry_run parameter)
94

95
  """
96
  HPATH = None
97
  HTYPE = None
98
  REQ_BGL = True
99

    
100
  def __init__(self, processor, op, context, rpc):
101
    """Constructor for LogicalUnit.
102

103
    This needs to be overridden in derived classes in order to check op
104
    validity.
105

106
    """
107
    self.proc = processor
108
    self.op = op
109
    self.cfg = context.cfg
110
    self.context = context
111
    self.rpc = rpc
112
    # Dicts used to declare locking needs to mcpu
113
    self.needed_locks = None
114
    self.acquired_locks = {}
115
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
116
    self.add_locks = {}
117
    self.remove_locks = {}
118
    # Used to force good behavior when calling helper functions
119
    self.recalculate_locks = {}
120
    self.__ssh = None
121
    # logging
122
    self.Log = processor.Log # pylint: disable-msg=C0103
123
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
124
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
125
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
126
    # support for dry-run
127
    self.dry_run_result = None
128
    # support for generic debug attribute
129
    if (not hasattr(self.op, "debug_level") or
130
        not isinstance(self.op.debug_level, int)):
131
      self.op.debug_level = 0
132

    
133
    # Tasklets
134
    self.tasklets = None
135

    
136
    # Validate opcode parameters and set defaults
137
    self.op.Validate(True)
138

    
139
    self.CheckArguments()
140

    
141
  def __GetSSH(self):
142
    """Returns the SshRunner object
143

144
    """
145
    if not self.__ssh:
146
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
147
    return self.__ssh
148

    
149
  ssh = property(fget=__GetSSH)
150

    
151
  def CheckArguments(self):
152
    """Check syntactic validity for the opcode arguments.
153

154
    This method is for doing a simple syntactic check and ensure
155
    validity of opcode parameters, without any cluster-related
156
    checks. While the same can be accomplished in ExpandNames and/or
157
    CheckPrereq, doing these separate is better because:
158

159
      - ExpandNames is left as as purely a lock-related function
160
      - CheckPrereq is run after we have acquired locks (and possible
161
        waited for them)
162

163
    The function is allowed to change the self.op attribute so that
164
    later methods can no longer worry about missing parameters.
165

166
    """
167
    pass
168

    
169
  def ExpandNames(self):
170
    """Expand names for this LU.
171

172
    This method is called before starting to execute the opcode, and it should
173
    update all the parameters of the opcode to their canonical form (e.g. a
174
    short node name must be fully expanded after this method has successfully
175
    completed). This way locking, hooks, logging, etc. can work correctly.
176

177
    LUs which implement this method must also populate the self.needed_locks
178
    member, as a dict with lock levels as keys, and a list of needed lock names
179
    as values. Rules:
180

181
      - use an empty dict if you don't need any lock
182
      - if you don't need any lock at a particular level omit that level
183
      - don't put anything for the BGL level
184
      - if you want all locks at a level use locking.ALL_SET as a value
185

186
    If you need to share locks (rather than acquire them exclusively) at one
187
    level you can modify self.share_locks, setting a true value (usually 1) for
188
    that level. By default locks are not shared.
189

190
    This function can also define a list of tasklets, which then will be
191
    executed in order instead of the usual LU-level CheckPrereq and Exec
192
    functions, if those are not defined by the LU.
193

194
    Examples::
195

196
      # Acquire all nodes and one instance
197
      self.needed_locks = {
198
        locking.LEVEL_NODE: locking.ALL_SET,
199
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
200
      }
201
      # Acquire just two nodes
202
      self.needed_locks = {
203
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
204
      }
205
      # Acquire no locks
206
      self.needed_locks = {} # No, you can't leave it to the default value None
207

208
    """
209
    # The implementation of this method is mandatory only if the new LU is
210
    # concurrent, so that old LUs don't need to be changed all at the same
211
    # time.
212
    if self.REQ_BGL:
213
      self.needed_locks = {} # Exclusive LUs don't need locks.
214
    else:
215
      raise NotImplementedError
216

    
217
  def DeclareLocks(self, level):
218
    """Declare LU locking needs for a level
219

220
    While most LUs can just declare their locking needs at ExpandNames time,
221
    sometimes there's the need to calculate some locks after having acquired
222
    the ones before. This function is called just before acquiring locks at a
223
    particular level, but after acquiring the ones at lower levels, and permits
224
    such calculations. It can be used to modify self.needed_locks, and by
225
    default it does nothing.
226

227
    This function is only called if you have something already set in
228
    self.needed_locks for the level.
229

230
    @param level: Locking level which is going to be locked
231
    @type level: member of ganeti.locking.LEVELS
232

233
    """
234

    
235
  def CheckPrereq(self):
236
    """Check prerequisites for this LU.
237

238
    This method should check that the prerequisites for the execution
239
    of this LU are fulfilled. It can do internode communication, but
240
    it should be idempotent - no cluster or system changes are
241
    allowed.
242

243
    The method should raise errors.OpPrereqError in case something is
244
    not fulfilled. Its return value is ignored.
245

246
    This method should also update all the parameters of the opcode to
247
    their canonical form if it hasn't been done by ExpandNames before.
248

249
    """
250
    if self.tasklets is not None:
251
      for (idx, tl) in enumerate(self.tasklets):
252
        logging.debug("Checking prerequisites for tasklet %s/%s",
253
                      idx + 1, len(self.tasklets))
254
        tl.CheckPrereq()
255
    else:
256
      pass
257

    
258
  def Exec(self, feedback_fn):
259
    """Execute the LU.
260

261
    This method should implement the actual work. It should raise
262
    errors.OpExecError for failures that are somewhat dealt with in
263
    code, or expected.
264

265
    """
266
    if self.tasklets is not None:
267
      for (idx, tl) in enumerate(self.tasklets):
268
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
269
        tl.Exec(feedback_fn)
270
    else:
271
      raise NotImplementedError
272

    
273
  def BuildHooksEnv(self):
274
    """Build hooks environment for this LU.
275

276
    This method should return a three-node tuple consisting of: a dict
277
    containing the environment that will be used for running the
278
    specific hook for this LU, a list of node names on which the hook
279
    should run before the execution, and a list of node names on which
280
    the hook should run after the execution.
281

282
    The keys of the dict must not have 'GANETI_' prefixed as this will
283
    be handled in the hooks runner. Also note additional keys will be
284
    added by the hooks runner. If the LU doesn't define any
285
    environment, an empty dict (and not None) should be returned.
286

287
    No nodes should be returned as an empty list (and not None).
288

289
    Note that if the HPATH for a LU class is None, this function will
290
    not be called.
291

292
    """
293
    raise NotImplementedError
294

    
295
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
296
    """Notify the LU about the results of its hooks.
297

298
    This method is called every time a hooks phase is executed, and notifies
299
    the Logical Unit about the hooks' result. The LU can then use it to alter
300
    its result based on the hooks.  By default the method does nothing and the
301
    previous result is passed back unchanged but any LU can define it if it
302
    wants to use the local cluster hook-scripts somehow.
303

304
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
305
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
306
    @param hook_results: the results of the multi-node hooks rpc call
307
    @param feedback_fn: function used send feedback back to the caller
308
    @param lu_result: the previous Exec result this LU had, or None
309
        in the PRE phase
310
    @return: the new Exec result, based on the previous result
311
        and hook results
312

313
    """
314
    # API must be kept, thus we ignore the unused argument and could
315
    # be a function warnings
316
    # pylint: disable-msg=W0613,R0201
317
    return lu_result
318

    
319
  def _ExpandAndLockInstance(self):
320
    """Helper function to expand and lock an instance.
321

322
    Many LUs that work on an instance take its name in self.op.instance_name
323
    and need to expand it and then declare the expanded name for locking. This
324
    function does it, and then updates self.op.instance_name to the expanded
325
    name. It also initializes needed_locks as a dict, if this hasn't been done
326
    before.
327

328
    """
329
    if self.needed_locks is None:
330
      self.needed_locks = {}
331
    else:
332
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
333
        "_ExpandAndLockInstance called with instance-level locks set"
334
    self.op.instance_name = _ExpandInstanceName(self.cfg,
335
                                                self.op.instance_name)
336
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
337

    
338
  def _LockInstancesNodes(self, primary_only=False):
339
    """Helper function to declare instances' nodes for locking.
340

341
    This function should be called after locking one or more instances to lock
342
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
343
    with all primary or secondary nodes for instances already locked and
344
    present in self.needed_locks[locking.LEVEL_INSTANCE].
345

346
    It should be called from DeclareLocks, and for safety only works if
347
    self.recalculate_locks[locking.LEVEL_NODE] is set.
348

349
    In the future it may grow parameters to just lock some instance's nodes, or
350
    to just lock primaries or secondary nodes, if needed.
351

352
    If should be called in DeclareLocks in a way similar to::
353

354
      if level == locking.LEVEL_NODE:
355
        self._LockInstancesNodes()
356

357
    @type primary_only: boolean
358
    @param primary_only: only lock primary nodes of locked instances
359

360
    """
361
    assert locking.LEVEL_NODE in self.recalculate_locks, \
362
      "_LockInstancesNodes helper function called with no nodes to recalculate"
363

    
364
    # TODO: check if we're really been called with the instance locks held
365

    
366
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
367
    # future we might want to have different behaviors depending on the value
368
    # of self.recalculate_locks[locking.LEVEL_NODE]
369
    wanted_nodes = []
370
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
371
      instance = self.context.cfg.GetInstanceInfo(instance_name)
372
      wanted_nodes.append(instance.primary_node)
373
      if not primary_only:
374
        wanted_nodes.extend(instance.secondary_nodes)
375

    
376
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
377
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
378
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
379
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
380

    
381
    del self.recalculate_locks[locking.LEVEL_NODE]
382

    
383

    
384
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
385
  """Simple LU which runs no hooks.
386

387
  This LU is intended as a parent for other LogicalUnits which will
388
  run no hooks, in order to reduce duplicate code.
389

390
  """
391
  HPATH = None
392
  HTYPE = None
393

    
394
  def BuildHooksEnv(self):
395
    """Empty BuildHooksEnv for NoHooksLu.
396

397
    This just raises an error.
398

399
    """
400
    assert False, "BuildHooksEnv called for NoHooksLUs"
401

    
402

    
403
class Tasklet:
404
  """Tasklet base class.
405

406
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
407
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
408
  tasklets know nothing about locks.
409

410
  Subclasses must follow these rules:
411
    - Implement CheckPrereq
412
    - Implement Exec
413

414
  """
415
  def __init__(self, lu):
416
    self.lu = lu
417

    
418
    # Shortcuts
419
    self.cfg = lu.cfg
420
    self.rpc = lu.rpc
421

    
422
  def CheckPrereq(self):
423
    """Check prerequisites for this tasklets.
424

425
    This method should check whether the prerequisites for the execution of
426
    this tasklet are fulfilled. It can do internode communication, but it
427
    should be idempotent - no cluster or system changes are allowed.
428

429
    The method should raise errors.OpPrereqError in case something is not
430
    fulfilled. Its return value is ignored.
431

432
    This method should also update all parameters to their canonical form if it
433
    hasn't been done before.
434

435
    """
436
    pass
437

    
438
  def Exec(self, feedback_fn):
439
    """Execute the tasklet.
440

441
    This method should implement the actual work. It should raise
442
    errors.OpExecError for failures that are somewhat dealt with in code, or
443
    expected.
444

445
    """
446
    raise NotImplementedError
447

    
448

    
449
class _QueryBase:
450
  """Base for query utility classes.
451

452
  """
453
  #: Attribute holding field definitions
454
  FIELDS = None
455

    
456
  def __init__(self, names, fields, use_locking):
457
    """Initializes this class.
458

459
    """
460
    self.names = names
461
    self.use_locking = use_locking
462

    
463
    self.query = query.Query(self.FIELDS, fields)
464
    self.requested_data = self.query.RequestedData()
465

    
466
    self.do_locking = None
467
    self.wanted = None
468

    
469
  def _GetNames(self, lu, all_names, lock_level):
470
    """Helper function to determine names asked for in the query.
471

472
    """
473
    if self.do_locking:
474
      names = lu.acquired_locks[lock_level]
475
    else:
476
      names = all_names
477

    
478
    if self.wanted == locking.ALL_SET:
479
      assert not self.names
480
      # caller didn't specify names, so ordering is not important
481
      return utils.NiceSort(names)
482

    
483
    # caller specified names and we must keep the same order
484
    assert self.names
485
    assert not self.do_locking or lu.acquired_locks[lock_level]
486

    
487
    missing = set(self.wanted).difference(names)
488
    if missing:
489
      raise errors.OpExecError("Some items were removed before retrieving"
490
                               " their data: %s" % missing)
491

    
492
    # Return expanded names
493
    return self.wanted
494

    
495
  @classmethod
496
  def FieldsQuery(cls, fields):
497
    """Returns list of available fields.
498

499
    @return: List of L{objects.QueryFieldDefinition}
500

501
    """
502
    return query.QueryFields(cls.FIELDS, fields)
503

    
504
  def ExpandNames(self, lu):
505
    """Expand names for this query.
506

507
    See L{LogicalUnit.ExpandNames}.
508

509
    """
510
    raise NotImplementedError()
511

    
512
  def DeclareLocks(self, lu, level):
513
    """Declare locks for this query.
514

515
    See L{LogicalUnit.DeclareLocks}.
516

517
    """
518
    raise NotImplementedError()
519

    
520
  def _GetQueryData(self, lu):
521
    """Collects all data for this query.
522

523
    @return: Query data object
524

525
    """
526
    raise NotImplementedError()
527

    
528
  def NewStyleQuery(self, lu):
529
    """Collect data and execute query.
530

531
    """
532
    return query.GetQueryResponse(self.query, self._GetQueryData(lu))
533

    
534
  def OldStyleQuery(self, lu):
535
    """Collect data and execute query.
536

537
    """
538
    return self.query.OldStyleQuery(self._GetQueryData(lu))
539

    
540

    
541
def _GetWantedNodes(lu, nodes):
542
  """Returns list of checked and expanded node names.
543

544
  @type lu: L{LogicalUnit}
545
  @param lu: the logical unit on whose behalf we execute
546
  @type nodes: list
547
  @param nodes: list of node names or None for all nodes
548
  @rtype: list
549
  @return: the list of nodes, sorted
550
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
551

552
  """
553
  if nodes:
554
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
555

    
556
  return utils.NiceSort(lu.cfg.GetNodeList())
557

    
558

    
559
def _GetWantedInstances(lu, instances):
560
  """Returns list of checked and expanded instance names.
561

562
  @type lu: L{LogicalUnit}
563
  @param lu: the logical unit on whose behalf we execute
564
  @type instances: list
565
  @param instances: list of instance names or None for all instances
566
  @rtype: list
567
  @return: the list of instances, sorted
568
  @raise errors.OpPrereqError: if the instances parameter is wrong type
569
  @raise errors.OpPrereqError: if any of the passed instances is not found
570

571
  """
572
  if instances:
573
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
574
  else:
575
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
576
  return wanted
577

    
578

    
579
def _GetUpdatedParams(old_params, update_dict,
580
                      use_default=True, use_none=False):
581
  """Return the new version of a parameter dictionary.
582

583
  @type old_params: dict
584
  @param old_params: old parameters
585
  @type update_dict: dict
586
  @param update_dict: dict containing new parameter values, or
587
      constants.VALUE_DEFAULT to reset the parameter to its default
588
      value
589
  @param use_default: boolean
590
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
591
      values as 'to be deleted' values
592
  @param use_none: boolean
593
  @type use_none: whether to recognise C{None} values as 'to be
594
      deleted' values
595
  @rtype: dict
596
  @return: the new parameter dictionary
597

598
  """
599
  params_copy = copy.deepcopy(old_params)
600
  for key, val in update_dict.iteritems():
601
    if ((use_default and val == constants.VALUE_DEFAULT) or
602
        (use_none and val is None)):
603
      try:
604
        del params_copy[key]
605
      except KeyError:
606
        pass
607
    else:
608
      params_copy[key] = val
609
  return params_copy
610

    
611

    
612
def _CheckOutputFields(static, dynamic, selected):
613
  """Checks whether all selected fields are valid.
614

615
  @type static: L{utils.FieldSet}
616
  @param static: static fields set
617
  @type dynamic: L{utils.FieldSet}
618
  @param dynamic: dynamic fields set
619

620
  """
621
  f = utils.FieldSet()
622
  f.Extend(static)
623
  f.Extend(dynamic)
624

    
625
  delta = f.NonMatching(selected)
626
  if delta:
627
    raise errors.OpPrereqError("Unknown output fields selected: %s"
628
                               % ",".join(delta), errors.ECODE_INVAL)
629

    
630

    
631
def _CheckGlobalHvParams(params):
632
  """Validates that given hypervisor params are not global ones.
633

634
  This will ensure that instances don't get customised versions of
635
  global params.
636

637
  """
638
  used_globals = constants.HVC_GLOBALS.intersection(params)
639
  if used_globals:
640
    msg = ("The following hypervisor parameters are global and cannot"
641
           " be customized at instance level, please modify them at"
642
           " cluster level: %s" % utils.CommaJoin(used_globals))
643
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
644

    
645

    
646
def _CheckNodeOnline(lu, node, msg=None):
647
  """Ensure that a given node is online.
648

649
  @param lu: the LU on behalf of which we make the check
650
  @param node: the node to check
651
  @param msg: if passed, should be a message to replace the default one
652
  @raise errors.OpPrereqError: if the node is offline
653

654
  """
655
  if msg is None:
656
    msg = "Can't use offline node"
657
  if lu.cfg.GetNodeInfo(node).offline:
658
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
659

    
660

    
661
def _CheckNodeNotDrained(lu, node):
662
  """Ensure that a given node is not drained.
663

664
  @param lu: the LU on behalf of which we make the check
665
  @param node: the node to check
666
  @raise errors.OpPrereqError: if the node is drained
667

668
  """
669
  if lu.cfg.GetNodeInfo(node).drained:
670
    raise errors.OpPrereqError("Can't use drained node %s" % node,
671
                               errors.ECODE_STATE)
672

    
673

    
674
def _CheckNodeVmCapable(lu, node):
675
  """Ensure that a given node is vm capable.
676

677
  @param lu: the LU on behalf of which we make the check
678
  @param node: the node to check
679
  @raise errors.OpPrereqError: if the node is not vm capable
680

681
  """
682
  if not lu.cfg.GetNodeInfo(node).vm_capable:
683
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
684
                               errors.ECODE_STATE)
685

    
686

    
687
def _CheckNodeHasOS(lu, node, os_name, force_variant):
688
  """Ensure that a node supports a given OS.
689

690
  @param lu: the LU on behalf of which we make the check
691
  @param node: the node to check
692
  @param os_name: the OS to query about
693
  @param force_variant: whether to ignore variant errors
694
  @raise errors.OpPrereqError: if the node is not supporting the OS
695

696
  """
697
  result = lu.rpc.call_os_get(node, os_name)
698
  result.Raise("OS '%s' not in supported OS list for node %s" %
699
               (os_name, node),
700
               prereq=True, ecode=errors.ECODE_INVAL)
701
  if not force_variant:
702
    _CheckOSVariant(result.payload, os_name)
703

    
704

    
705
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
706
  """Ensure that a node has the given secondary ip.
707

708
  @type lu: L{LogicalUnit}
709
  @param lu: the LU on behalf of which we make the check
710
  @type node: string
711
  @param node: the node to check
712
  @type secondary_ip: string
713
  @param secondary_ip: the ip to check
714
  @type prereq: boolean
715
  @param prereq: whether to throw a prerequisite or an execute error
716
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
717
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
718

719
  """
720
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
721
  result.Raise("Failure checking secondary ip on node %s" % node,
722
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
723
  if not result.payload:
724
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
725
           " please fix and re-run this command" % secondary_ip)
726
    if prereq:
727
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
728
    else:
729
      raise errors.OpExecError(msg)
730

    
731

    
732
def _GetClusterDomainSecret():
733
  """Reads the cluster domain secret.
734

735
  """
736
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
737
                               strict=True)
738

    
739

    
740
def _CheckInstanceDown(lu, instance, reason):
741
  """Ensure that an instance is not running."""
742
  if instance.admin_up:
743
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
744
                               (instance.name, reason), errors.ECODE_STATE)
745

    
746
  pnode = instance.primary_node
747
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
748
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
749
              prereq=True, ecode=errors.ECODE_ENVIRON)
750

    
751
  if instance.name in ins_l.payload:
752
    raise errors.OpPrereqError("Instance %s is running, %s" %
753
                               (instance.name, reason), errors.ECODE_STATE)
754

    
755

    
756
def _ExpandItemName(fn, name, kind):
757
  """Expand an item name.
758

759
  @param fn: the function to use for expansion
760
  @param name: requested item name
761
  @param kind: text description ('Node' or 'Instance')
762
  @return: the resolved (full) name
763
  @raise errors.OpPrereqError: if the item is not found
764

765
  """
766
  full_name = fn(name)
767
  if full_name is None:
768
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
769
                               errors.ECODE_NOENT)
770
  return full_name
771

    
772

    
773
def _ExpandNodeName(cfg, name):
774
  """Wrapper over L{_ExpandItemName} for nodes."""
775
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
776

    
777

    
778
def _ExpandInstanceName(cfg, name):
779
  """Wrapper over L{_ExpandItemName} for instance."""
780
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
781

    
782

    
783
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
784
                          memory, vcpus, nics, disk_template, disks,
785
                          bep, hvp, hypervisor_name):
786
  """Builds instance related env variables for hooks
787

788
  This builds the hook environment from individual variables.
789

790
  @type name: string
791
  @param name: the name of the instance
792
  @type primary_node: string
793
  @param primary_node: the name of the instance's primary node
794
  @type secondary_nodes: list
795
  @param secondary_nodes: list of secondary nodes as strings
796
  @type os_type: string
797
  @param os_type: the name of the instance's OS
798
  @type status: boolean
799
  @param status: the should_run status of the instance
800
  @type memory: string
801
  @param memory: the memory size of the instance
802
  @type vcpus: string
803
  @param vcpus: the count of VCPUs the instance has
804
  @type nics: list
805
  @param nics: list of tuples (ip, mac, mode, link) representing
806
      the NICs the instance has
807
  @type disk_template: string
808
  @param disk_template: the disk template of the instance
809
  @type disks: list
810
  @param disks: the list of (size, mode) pairs
811
  @type bep: dict
812
  @param bep: the backend parameters for the instance
813
  @type hvp: dict
814
  @param hvp: the hypervisor parameters for the instance
815
  @type hypervisor_name: string
816
  @param hypervisor_name: the hypervisor for the instance
817
  @rtype: dict
818
  @return: the hook environment for this instance
819

820
  """
821
  if status:
822
    str_status = "up"
823
  else:
824
    str_status = "down"
825
  env = {
826
    "OP_TARGET": name,
827
    "INSTANCE_NAME": name,
828
    "INSTANCE_PRIMARY": primary_node,
829
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
830
    "INSTANCE_OS_TYPE": os_type,
831
    "INSTANCE_STATUS": str_status,
832
    "INSTANCE_MEMORY": memory,
833
    "INSTANCE_VCPUS": vcpus,
834
    "INSTANCE_DISK_TEMPLATE": disk_template,
835
    "INSTANCE_HYPERVISOR": hypervisor_name,
836
  }
837

    
838
  if nics:
839
    nic_count = len(nics)
840
    for idx, (ip, mac, mode, link) in enumerate(nics):
841
      if ip is None:
842
        ip = ""
843
      env["INSTANCE_NIC%d_IP" % idx] = ip
844
      env["INSTANCE_NIC%d_MAC" % idx] = mac
845
      env["INSTANCE_NIC%d_MODE" % idx] = mode
846
      env["INSTANCE_NIC%d_LINK" % idx] = link
847
      if mode == constants.NIC_MODE_BRIDGED:
848
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
849
  else:
850
    nic_count = 0
851

    
852
  env["INSTANCE_NIC_COUNT"] = nic_count
853

    
854
  if disks:
855
    disk_count = len(disks)
856
    for idx, (size, mode) in enumerate(disks):
857
      env["INSTANCE_DISK%d_SIZE" % idx] = size
858
      env["INSTANCE_DISK%d_MODE" % idx] = mode
859
  else:
860
    disk_count = 0
861

    
862
  env["INSTANCE_DISK_COUNT"] = disk_count
863

    
864
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
865
    for key, value in source.items():
866
      env["INSTANCE_%s_%s" % (kind, key)] = value
867

    
868
  return env
869

    
870

    
871
def _NICListToTuple(lu, nics):
872
  """Build a list of nic information tuples.
873

874
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
875
  value in LUInstanceQueryData.
876

877
  @type lu:  L{LogicalUnit}
878
  @param lu: the logical unit on whose behalf we execute
879
  @type nics: list of L{objects.NIC}
880
  @param nics: list of nics to convert to hooks tuples
881

882
  """
883
  hooks_nics = []
884
  cluster = lu.cfg.GetClusterInfo()
885
  for nic in nics:
886
    ip = nic.ip
887
    mac = nic.mac
888
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
889
    mode = filled_params[constants.NIC_MODE]
890
    link = filled_params[constants.NIC_LINK]
891
    hooks_nics.append((ip, mac, mode, link))
892
  return hooks_nics
893

    
894

    
895
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
896
  """Builds instance related env variables for hooks from an object.
897

898
  @type lu: L{LogicalUnit}
899
  @param lu: the logical unit on whose behalf we execute
900
  @type instance: L{objects.Instance}
901
  @param instance: the instance for which we should build the
902
      environment
903
  @type override: dict
904
  @param override: dictionary with key/values that will override
905
      our values
906
  @rtype: dict
907
  @return: the hook environment dictionary
908

909
  """
910
  cluster = lu.cfg.GetClusterInfo()
911
  bep = cluster.FillBE(instance)
912
  hvp = cluster.FillHV(instance)
913
  args = {
914
    'name': instance.name,
915
    'primary_node': instance.primary_node,
916
    'secondary_nodes': instance.secondary_nodes,
917
    'os_type': instance.os,
918
    'status': instance.admin_up,
919
    'memory': bep[constants.BE_MEMORY],
920
    'vcpus': bep[constants.BE_VCPUS],
921
    'nics': _NICListToTuple(lu, instance.nics),
922
    'disk_template': instance.disk_template,
923
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
924
    'bep': bep,
925
    'hvp': hvp,
926
    'hypervisor_name': instance.hypervisor,
927
  }
928
  if override:
929
    args.update(override)
930
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
931

    
932

    
933
def _AdjustCandidatePool(lu, exceptions):
934
  """Adjust the candidate pool after node operations.
935

936
  """
937
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
938
  if mod_list:
939
    lu.LogInfo("Promoted nodes to master candidate role: %s",
940
               utils.CommaJoin(node.name for node in mod_list))
941
    for name in mod_list:
942
      lu.context.ReaddNode(name)
943
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
944
  if mc_now > mc_max:
945
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
946
               (mc_now, mc_max))
947

    
948

    
949
def _DecideSelfPromotion(lu, exceptions=None):
950
  """Decide whether I should promote myself as a master candidate.
951

952
  """
953
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
954
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
955
  # the new node will increase mc_max with one, so:
956
  mc_should = min(mc_should + 1, cp_size)
957
  return mc_now < mc_should
958

    
959

    
960
def _CheckNicsBridgesExist(lu, target_nics, target_node):
961
  """Check that the brigdes needed by a list of nics exist.
962

963
  """
964
  cluster = lu.cfg.GetClusterInfo()
965
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
966
  brlist = [params[constants.NIC_LINK] for params in paramslist
967
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
968
  if brlist:
969
    result = lu.rpc.call_bridges_exist(target_node, brlist)
970
    result.Raise("Error checking bridges on destination node '%s'" %
971
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
972

    
973

    
974
def _CheckInstanceBridgesExist(lu, instance, node=None):
975
  """Check that the brigdes needed by an instance exist.
976

977
  """
978
  if node is None:
979
    node = instance.primary_node
980
  _CheckNicsBridgesExist(lu, instance.nics, node)
981

    
982

    
983
def _CheckOSVariant(os_obj, name):
984
  """Check whether an OS name conforms to the os variants specification.
985

986
  @type os_obj: L{objects.OS}
987
  @param os_obj: OS object to check
988
  @type name: string
989
  @param name: OS name passed by the user, to check for validity
990

991
  """
992
  if not os_obj.supported_variants:
993
    return
994
  variant = objects.OS.GetVariant(name)
995
  if not variant:
996
    raise errors.OpPrereqError("OS name must include a variant",
997
                               errors.ECODE_INVAL)
998

    
999
  if variant not in os_obj.supported_variants:
1000
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1001

    
1002

    
1003
def _GetNodeInstancesInner(cfg, fn):
1004
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1005

    
1006

    
1007
def _GetNodeInstances(cfg, node_name):
1008
  """Returns a list of all primary and secondary instances on a node.
1009

1010
  """
1011

    
1012
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1013

    
1014

    
1015
def _GetNodePrimaryInstances(cfg, node_name):
1016
  """Returns primary instances on a node.
1017

1018
  """
1019
  return _GetNodeInstancesInner(cfg,
1020
                                lambda inst: node_name == inst.primary_node)
1021

    
1022

    
1023
def _GetNodeSecondaryInstances(cfg, node_name):
1024
  """Returns secondary instances on a node.
1025

1026
  """
1027
  return _GetNodeInstancesInner(cfg,
1028
                                lambda inst: node_name in inst.secondary_nodes)
1029

    
1030

    
1031
def _GetStorageTypeArgs(cfg, storage_type):
1032
  """Returns the arguments for a storage type.
1033

1034
  """
1035
  # Special case for file storage
1036
  if storage_type == constants.ST_FILE:
1037
    # storage.FileStorage wants a list of storage directories
1038
    return [[cfg.GetFileStorageDir()]]
1039

    
1040
  return []
1041

    
1042

    
1043
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1044
  faulty = []
1045

    
1046
  for dev in instance.disks:
1047
    cfg.SetDiskID(dev, node_name)
1048

    
1049
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1050
  result.Raise("Failed to get disk status from node %s" % node_name,
1051
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1052

    
1053
  for idx, bdev_status in enumerate(result.payload):
1054
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1055
      faulty.append(idx)
1056

    
1057
  return faulty
1058

    
1059

    
1060
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1061
  """Check the sanity of iallocator and node arguments and use the
1062
  cluster-wide iallocator if appropriate.
1063

1064
  Check that at most one of (iallocator, node) is specified. If none is
1065
  specified, then the LU's opcode's iallocator slot is filled with the
1066
  cluster-wide default iallocator.
1067

1068
  @type iallocator_slot: string
1069
  @param iallocator_slot: the name of the opcode iallocator slot
1070
  @type node_slot: string
1071
  @param node_slot: the name of the opcode target node slot
1072

1073
  """
1074
  node = getattr(lu.op, node_slot, None)
1075
  iallocator = getattr(lu.op, iallocator_slot, None)
1076

    
1077
  if node is not None and iallocator is not None:
1078
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1079
                               errors.ECODE_INVAL)
1080
  elif node is None and iallocator is None:
1081
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1082
    if default_iallocator:
1083
      setattr(lu.op, iallocator_slot, default_iallocator)
1084
    else:
1085
      raise errors.OpPrereqError("No iallocator or node given and no"
1086
                                 " cluster-wide default iallocator found."
1087
                                 " Please specify either an iallocator or a"
1088
                                 " node, or set a cluster-wide default"
1089
                                 " iallocator.")
1090

    
1091

    
1092
class LUClusterPostInit(LogicalUnit):
1093
  """Logical unit for running hooks after cluster initialization.
1094

1095
  """
1096
  HPATH = "cluster-init"
1097
  HTYPE = constants.HTYPE_CLUSTER
1098

    
1099
  def BuildHooksEnv(self):
1100
    """Build hooks env.
1101

1102
    """
1103
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1104
    mn = self.cfg.GetMasterNode()
1105
    return env, [], [mn]
1106

    
1107
  def Exec(self, feedback_fn):
1108
    """Nothing to do.
1109

1110
    """
1111
    return True
1112

    
1113

    
1114
class LUClusterDestroy(LogicalUnit):
1115
  """Logical unit for destroying the cluster.
1116

1117
  """
1118
  HPATH = "cluster-destroy"
1119
  HTYPE = constants.HTYPE_CLUSTER
1120

    
1121
  def BuildHooksEnv(self):
1122
    """Build hooks env.
1123

1124
    """
1125
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1126
    return env, [], []
1127

    
1128
  def CheckPrereq(self):
1129
    """Check prerequisites.
1130

1131
    This checks whether the cluster is empty.
1132

1133
    Any errors are signaled by raising errors.OpPrereqError.
1134

1135
    """
1136
    master = self.cfg.GetMasterNode()
1137

    
1138
    nodelist = self.cfg.GetNodeList()
1139
    if len(nodelist) != 1 or nodelist[0] != master:
1140
      raise errors.OpPrereqError("There are still %d node(s) in"
1141
                                 " this cluster." % (len(nodelist) - 1),
1142
                                 errors.ECODE_INVAL)
1143
    instancelist = self.cfg.GetInstanceList()
1144
    if instancelist:
1145
      raise errors.OpPrereqError("There are still %d instance(s) in"
1146
                                 " this cluster." % len(instancelist),
1147
                                 errors.ECODE_INVAL)
1148

    
1149
  def Exec(self, feedback_fn):
1150
    """Destroys the cluster.
1151

1152
    """
1153
    master = self.cfg.GetMasterNode()
1154

    
1155
    # Run post hooks on master node before it's removed
1156
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1157
    try:
1158
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1159
    except:
1160
      # pylint: disable-msg=W0702
1161
      self.LogWarning("Errors occurred running hooks on %s" % master)
1162

    
1163
    result = self.rpc.call_node_stop_master(master, False)
1164
    result.Raise("Could not disable the master role")
1165

    
1166
    return master
1167

    
1168

    
1169
def _VerifyCertificate(filename):
1170
  """Verifies a certificate for LUClusterVerify.
1171

1172
  @type filename: string
1173
  @param filename: Path to PEM file
1174

1175
  """
1176
  try:
1177
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1178
                                           utils.ReadFile(filename))
1179
  except Exception, err: # pylint: disable-msg=W0703
1180
    return (LUClusterVerify.ETYPE_ERROR,
1181
            "Failed to load X509 certificate %s: %s" % (filename, err))
1182

    
1183
  (errcode, msg) = \
1184
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1185
                                constants.SSL_CERT_EXPIRATION_ERROR)
1186

    
1187
  if msg:
1188
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1189
  else:
1190
    fnamemsg = None
1191

    
1192
  if errcode is None:
1193
    return (None, fnamemsg)
1194
  elif errcode == utils.CERT_WARNING:
1195
    return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1196
  elif errcode == utils.CERT_ERROR:
1197
    return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1198

    
1199
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1200

    
1201

    
1202
class LUClusterVerify(LogicalUnit):
1203
  """Verifies the cluster status.
1204

1205
  """
1206
  HPATH = "cluster-verify"
1207
  HTYPE = constants.HTYPE_CLUSTER
1208
  REQ_BGL = False
1209

    
1210
  TCLUSTER = "cluster"
1211
  TNODE = "node"
1212
  TINSTANCE = "instance"
1213

    
1214
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1215
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1216
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1217
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1218
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1219
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1220
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1221
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1222
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1223
  ENODEDRBD = (TNODE, "ENODEDRBD")
1224
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1225
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1226
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1227
  ENODEHV = (TNODE, "ENODEHV")
1228
  ENODELVM = (TNODE, "ENODELVM")
1229
  ENODEN1 = (TNODE, "ENODEN1")
1230
  ENODENET = (TNODE, "ENODENET")
1231
  ENODEOS = (TNODE, "ENODEOS")
1232
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1233
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1234
  ENODERPC = (TNODE, "ENODERPC")
1235
  ENODESSH = (TNODE, "ENODESSH")
1236
  ENODEVERSION = (TNODE, "ENODEVERSION")
1237
  ENODESETUP = (TNODE, "ENODESETUP")
1238
  ENODETIME = (TNODE, "ENODETIME")
1239
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1240

    
1241
  ETYPE_FIELD = "code"
1242
  ETYPE_ERROR = "ERROR"
1243
  ETYPE_WARNING = "WARNING"
1244

    
1245
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1246

    
1247
  class NodeImage(object):
1248
    """A class representing the logical and physical status of a node.
1249

1250
    @type name: string
1251
    @ivar name: the node name to which this object refers
1252
    @ivar volumes: a structure as returned from
1253
        L{ganeti.backend.GetVolumeList} (runtime)
1254
    @ivar instances: a list of running instances (runtime)
1255
    @ivar pinst: list of configured primary instances (config)
1256
    @ivar sinst: list of configured secondary instances (config)
1257
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1258
        of this node (config)
1259
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1260
    @ivar dfree: free disk, as reported by the node (runtime)
1261
    @ivar offline: the offline status (config)
1262
    @type rpc_fail: boolean
1263
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1264
        not whether the individual keys were correct) (runtime)
1265
    @type lvm_fail: boolean
1266
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1267
    @type hyp_fail: boolean
1268
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1269
    @type ghost: boolean
1270
    @ivar ghost: whether this is a known node or not (config)
1271
    @type os_fail: boolean
1272
    @ivar os_fail: whether the RPC call didn't return valid OS data
1273
    @type oslist: list
1274
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1275
    @type vm_capable: boolean
1276
    @ivar vm_capable: whether the node can host instances
1277

1278
    """
1279
    def __init__(self, offline=False, name=None, vm_capable=True):
1280
      self.name = name
1281
      self.volumes = {}
1282
      self.instances = []
1283
      self.pinst = []
1284
      self.sinst = []
1285
      self.sbp = {}
1286
      self.mfree = 0
1287
      self.dfree = 0
1288
      self.offline = offline
1289
      self.vm_capable = vm_capable
1290
      self.rpc_fail = False
1291
      self.lvm_fail = False
1292
      self.hyp_fail = False
1293
      self.ghost = False
1294
      self.os_fail = False
1295
      self.oslist = {}
1296

    
1297
  def ExpandNames(self):
1298
    self.needed_locks = {
1299
      locking.LEVEL_NODE: locking.ALL_SET,
1300
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1301
    }
1302
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1303

    
1304
  def _Error(self, ecode, item, msg, *args, **kwargs):
1305
    """Format an error message.
1306

1307
    Based on the opcode's error_codes parameter, either format a
1308
    parseable error code, or a simpler error string.
1309

1310
    This must be called only from Exec and functions called from Exec.
1311

1312
    """
1313
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1314
    itype, etxt = ecode
1315
    # first complete the msg
1316
    if args:
1317
      msg = msg % args
1318
    # then format the whole message
1319
    if self.op.error_codes:
1320
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1321
    else:
1322
      if item:
1323
        item = " " + item
1324
      else:
1325
        item = ""
1326
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1327
    # and finally report it via the feedback_fn
1328
    self._feedback_fn("  - %s" % msg)
1329

    
1330
  def _ErrorIf(self, cond, *args, **kwargs):
1331
    """Log an error message if the passed condition is True.
1332

1333
    """
1334
    cond = bool(cond) or self.op.debug_simulate_errors
1335
    if cond:
1336
      self._Error(*args, **kwargs)
1337
    # do not mark the operation as failed for WARN cases only
1338
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1339
      self.bad = self.bad or cond
1340

    
1341
  def _VerifyNode(self, ninfo, nresult):
1342
    """Perform some basic validation on data returned from a node.
1343

1344
      - check the result data structure is well formed and has all the
1345
        mandatory fields
1346
      - check ganeti version
1347

1348
    @type ninfo: L{objects.Node}
1349
    @param ninfo: the node to check
1350
    @param nresult: the results from the node
1351
    @rtype: boolean
1352
    @return: whether overall this call was successful (and we can expect
1353
         reasonable values in the respose)
1354

1355
    """
1356
    node = ninfo.name
1357
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1358

    
1359
    # main result, nresult should be a non-empty dict
1360
    test = not nresult or not isinstance(nresult, dict)
1361
    _ErrorIf(test, self.ENODERPC, node,
1362
                  "unable to verify node: no data returned")
1363
    if test:
1364
      return False
1365

    
1366
    # compares ganeti version
1367
    local_version = constants.PROTOCOL_VERSION
1368
    remote_version = nresult.get("version", None)
1369
    test = not (remote_version and
1370
                isinstance(remote_version, (list, tuple)) and
1371
                len(remote_version) == 2)
1372
    _ErrorIf(test, self.ENODERPC, node,
1373
             "connection to node returned invalid data")
1374
    if test:
1375
      return False
1376

    
1377
    test = local_version != remote_version[0]
1378
    _ErrorIf(test, self.ENODEVERSION, node,
1379
             "incompatible protocol versions: master %s,"
1380
             " node %s", local_version, remote_version[0])
1381
    if test:
1382
      return False
1383

    
1384
    # node seems compatible, we can actually try to look into its results
1385

    
1386
    # full package version
1387
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1388
                  self.ENODEVERSION, node,
1389
                  "software version mismatch: master %s, node %s",
1390
                  constants.RELEASE_VERSION, remote_version[1],
1391
                  code=self.ETYPE_WARNING)
1392

    
1393
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1394
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1395
      for hv_name, hv_result in hyp_result.iteritems():
1396
        test = hv_result is not None
1397
        _ErrorIf(test, self.ENODEHV, node,
1398
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1399

    
1400
    test = nresult.get(constants.NV_NODESETUP,
1401
                           ["Missing NODESETUP results"])
1402
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1403
             "; ".join(test))
1404

    
1405
    return True
1406

    
1407
  def _VerifyNodeTime(self, ninfo, nresult,
1408
                      nvinfo_starttime, nvinfo_endtime):
1409
    """Check the node time.
1410

1411
    @type ninfo: L{objects.Node}
1412
    @param ninfo: the node to check
1413
    @param nresult: the remote results for the node
1414
    @param nvinfo_starttime: the start time of the RPC call
1415
    @param nvinfo_endtime: the end time of the RPC call
1416

1417
    """
1418
    node = ninfo.name
1419
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1420

    
1421
    ntime = nresult.get(constants.NV_TIME, None)
1422
    try:
1423
      ntime_merged = utils.MergeTime(ntime)
1424
    except (ValueError, TypeError):
1425
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1426
      return
1427

    
1428
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1429
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1430
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1431
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1432
    else:
1433
      ntime_diff = None
1434

    
1435
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1436
             "Node time diverges by at least %s from master node time",
1437
             ntime_diff)
1438

    
1439
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1440
    """Check the node time.
1441

1442
    @type ninfo: L{objects.Node}
1443
    @param ninfo: the node to check
1444
    @param nresult: the remote results for the node
1445
    @param vg_name: the configured VG name
1446

1447
    """
1448
    if vg_name is None:
1449
      return
1450

    
1451
    node = ninfo.name
1452
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1453

    
1454
    # checks vg existence and size > 20G
1455
    vglist = nresult.get(constants.NV_VGLIST, None)
1456
    test = not vglist
1457
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1458
    if not test:
1459
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1460
                                            constants.MIN_VG_SIZE)
1461
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1462

    
1463
    # check pv names
1464
    pvlist = nresult.get(constants.NV_PVLIST, None)
1465
    test = pvlist is None
1466
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1467
    if not test:
1468
      # check that ':' is not present in PV names, since it's a
1469
      # special character for lvcreate (denotes the range of PEs to
1470
      # use on the PV)
1471
      for _, pvname, owner_vg in pvlist:
1472
        test = ":" in pvname
1473
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1474
                 " '%s' of VG '%s'", pvname, owner_vg)
1475

    
1476
  def _VerifyNodeNetwork(self, ninfo, nresult):
1477
    """Check the node time.
1478

1479
    @type ninfo: L{objects.Node}
1480
    @param ninfo: the node to check
1481
    @param nresult: the remote results for the node
1482

1483
    """
1484
    node = ninfo.name
1485
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1486

    
1487
    test = constants.NV_NODELIST not in nresult
1488
    _ErrorIf(test, self.ENODESSH, node,
1489
             "node hasn't returned node ssh connectivity data")
1490
    if not test:
1491
      if nresult[constants.NV_NODELIST]:
1492
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1493
          _ErrorIf(True, self.ENODESSH, node,
1494
                   "ssh communication with node '%s': %s", a_node, a_msg)
1495

    
1496
    test = constants.NV_NODENETTEST not in nresult
1497
    _ErrorIf(test, self.ENODENET, node,
1498
             "node hasn't returned node tcp connectivity data")
1499
    if not test:
1500
      if nresult[constants.NV_NODENETTEST]:
1501
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1502
        for anode in nlist:
1503
          _ErrorIf(True, self.ENODENET, node,
1504
                   "tcp communication with node '%s': %s",
1505
                   anode, nresult[constants.NV_NODENETTEST][anode])
1506

    
1507
    test = constants.NV_MASTERIP not in nresult
1508
    _ErrorIf(test, self.ENODENET, node,
1509
             "node hasn't returned node master IP reachability data")
1510
    if not test:
1511
      if not nresult[constants.NV_MASTERIP]:
1512
        if node == self.master_node:
1513
          msg = "the master node cannot reach the master IP (not configured?)"
1514
        else:
1515
          msg = "cannot reach the master IP"
1516
        _ErrorIf(True, self.ENODENET, node, msg)
1517

    
1518
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1519
                      diskstatus):
1520
    """Verify an instance.
1521

1522
    This function checks to see if the required block devices are
1523
    available on the instance's node.
1524

1525
    """
1526
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1527
    node_current = instanceconfig.primary_node
1528

    
1529
    node_vol_should = {}
1530
    instanceconfig.MapLVsByNode(node_vol_should)
1531

    
1532
    for node in node_vol_should:
1533
      n_img = node_image[node]
1534
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1535
        # ignore missing volumes on offline or broken nodes
1536
        continue
1537
      for volume in node_vol_should[node]:
1538
        test = volume not in n_img.volumes
1539
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1540
                 "volume %s missing on node %s", volume, node)
1541

    
1542
    if instanceconfig.admin_up:
1543
      pri_img = node_image[node_current]
1544
      test = instance not in pri_img.instances and not pri_img.offline
1545
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1546
               "instance not running on its primary node %s",
1547
               node_current)
1548

    
1549
    for node, n_img in node_image.items():
1550
      if (not node == node_current):
1551
        test = instance in n_img.instances
1552
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1553
                 "instance should not run on node %s", node)
1554

    
1555
    diskdata = [(nname, success, status, idx)
1556
                for (nname, disks) in diskstatus.items()
1557
                for idx, (success, status) in enumerate(disks)]
1558

    
1559
    for nname, success, bdev_status, idx in diskdata:
1560
      _ErrorIf(instanceconfig.admin_up and not success,
1561
               self.EINSTANCEFAULTYDISK, instance,
1562
               "couldn't retrieve status for disk/%s on %s: %s",
1563
               idx, nname, bdev_status)
1564
      _ErrorIf((instanceconfig.admin_up and success and
1565
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1566
               self.EINSTANCEFAULTYDISK, instance,
1567
               "disk/%s on %s is faulty", idx, nname)
1568

    
1569
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1570
    """Verify if there are any unknown volumes in the cluster.
1571

1572
    The .os, .swap and backup volumes are ignored. All other volumes are
1573
    reported as unknown.
1574

1575
    @type reserved: L{ganeti.utils.FieldSet}
1576
    @param reserved: a FieldSet of reserved volume names
1577

1578
    """
1579
    for node, n_img in node_image.items():
1580
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1581
        # skip non-healthy nodes
1582
        continue
1583
      for volume in n_img.volumes:
1584
        test = ((node not in node_vol_should or
1585
                volume not in node_vol_should[node]) and
1586
                not reserved.Matches(volume))
1587
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1588
                      "volume %s is unknown", volume)
1589

    
1590
  def _VerifyOrphanInstances(self, instancelist, node_image):
1591
    """Verify the list of running instances.
1592

1593
    This checks what instances are running but unknown to the cluster.
1594

1595
    """
1596
    for node, n_img in node_image.items():
1597
      for o_inst in n_img.instances:
1598
        test = o_inst not in instancelist
1599
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1600
                      "instance %s on node %s should not exist", o_inst, node)
1601

    
1602
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1603
    """Verify N+1 Memory Resilience.
1604

1605
    Check that if one single node dies we can still start all the
1606
    instances it was primary for.
1607

1608
    """
1609
    for node, n_img in node_image.items():
1610
      # This code checks that every node which is now listed as
1611
      # secondary has enough memory to host all instances it is
1612
      # supposed to should a single other node in the cluster fail.
1613
      # FIXME: not ready for failover to an arbitrary node
1614
      # FIXME: does not support file-backed instances
1615
      # WARNING: we currently take into account down instances as well
1616
      # as up ones, considering that even if they're down someone
1617
      # might want to start them even in the event of a node failure.
1618
      for prinode, instances in n_img.sbp.items():
1619
        needed_mem = 0
1620
        for instance in instances:
1621
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1622
          if bep[constants.BE_AUTO_BALANCE]:
1623
            needed_mem += bep[constants.BE_MEMORY]
1624
        test = n_img.mfree < needed_mem
1625
        self._ErrorIf(test, self.ENODEN1, node,
1626
                      "not enough memory to accomodate instance failovers"
1627
                      " should node %s fail", prinode)
1628

    
1629
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1630
                       master_files):
1631
    """Verifies and computes the node required file checksums.
1632

1633
    @type ninfo: L{objects.Node}
1634
    @param ninfo: the node to check
1635
    @param nresult: the remote results for the node
1636
    @param file_list: required list of files
1637
    @param local_cksum: dictionary of local files and their checksums
1638
    @param master_files: list of files that only masters should have
1639

1640
    """
1641
    node = ninfo.name
1642
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1643

    
1644
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1645
    test = not isinstance(remote_cksum, dict)
1646
    _ErrorIf(test, self.ENODEFILECHECK, node,
1647
             "node hasn't returned file checksum data")
1648
    if test:
1649
      return
1650

    
1651
    for file_name in file_list:
1652
      node_is_mc = ninfo.master_candidate
1653
      must_have = (file_name not in master_files) or node_is_mc
1654
      # missing
1655
      test1 = file_name not in remote_cksum
1656
      # invalid checksum
1657
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1658
      # existing and good
1659
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1660
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1661
               "file '%s' missing", file_name)
1662
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1663
               "file '%s' has wrong checksum", file_name)
1664
      # not candidate and this is not a must-have file
1665
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1666
               "file '%s' should not exist on non master"
1667
               " candidates (and the file is outdated)", file_name)
1668
      # all good, except non-master/non-must have combination
1669
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1670
               "file '%s' should not exist"
1671
               " on non master candidates", file_name)
1672

    
1673
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1674
                      drbd_map):
1675
    """Verifies and the node DRBD status.
1676

1677
    @type ninfo: L{objects.Node}
1678
    @param ninfo: the node to check
1679
    @param nresult: the remote results for the node
1680
    @param instanceinfo: the dict of instances
1681
    @param drbd_helper: the configured DRBD usermode helper
1682
    @param drbd_map: the DRBD map as returned by
1683
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1684

1685
    """
1686
    node = ninfo.name
1687
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1688

    
1689
    if drbd_helper:
1690
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1691
      test = (helper_result == None)
1692
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1693
               "no drbd usermode helper returned")
1694
      if helper_result:
1695
        status, payload = helper_result
1696
        test = not status
1697
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1698
                 "drbd usermode helper check unsuccessful: %s", payload)
1699
        test = status and (payload != drbd_helper)
1700
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1701
                 "wrong drbd usermode helper: %s", payload)
1702

    
1703
    # compute the DRBD minors
1704
    node_drbd = {}
1705
    for minor, instance in drbd_map[node].items():
1706
      test = instance not in instanceinfo
1707
      _ErrorIf(test, self.ECLUSTERCFG, None,
1708
               "ghost instance '%s' in temporary DRBD map", instance)
1709
        # ghost instance should not be running, but otherwise we
1710
        # don't give double warnings (both ghost instance and
1711
        # unallocated minor in use)
1712
      if test:
1713
        node_drbd[minor] = (instance, False)
1714
      else:
1715
        instance = instanceinfo[instance]
1716
        node_drbd[minor] = (instance.name, instance.admin_up)
1717

    
1718
    # and now check them
1719
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1720
    test = not isinstance(used_minors, (tuple, list))
1721
    _ErrorIf(test, self.ENODEDRBD, node,
1722
             "cannot parse drbd status file: %s", str(used_minors))
1723
    if test:
1724
      # we cannot check drbd status
1725
      return
1726

    
1727
    for minor, (iname, must_exist) in node_drbd.items():
1728
      test = minor not in used_minors and must_exist
1729
      _ErrorIf(test, self.ENODEDRBD, node,
1730
               "drbd minor %d of instance %s is not active", minor, iname)
1731
    for minor in used_minors:
1732
      test = minor not in node_drbd
1733
      _ErrorIf(test, self.ENODEDRBD, node,
1734
               "unallocated drbd minor %d is in use", minor)
1735

    
1736
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1737
    """Builds the node OS structures.
1738

1739
    @type ninfo: L{objects.Node}
1740
    @param ninfo: the node to check
1741
    @param nresult: the remote results for the node
1742
    @param nimg: the node image object
1743

1744
    """
1745
    node = ninfo.name
1746
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1747

    
1748
    remote_os = nresult.get(constants.NV_OSLIST, None)
1749
    test = (not isinstance(remote_os, list) or
1750
            not compat.all(isinstance(v, list) and len(v) == 7
1751
                           for v in remote_os))
1752

    
1753
    _ErrorIf(test, self.ENODEOS, node,
1754
             "node hasn't returned valid OS data")
1755

    
1756
    nimg.os_fail = test
1757

    
1758
    if test:
1759
      return
1760

    
1761
    os_dict = {}
1762

    
1763
    for (name, os_path, status, diagnose,
1764
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1765

    
1766
      if name not in os_dict:
1767
        os_dict[name] = []
1768

    
1769
      # parameters is a list of lists instead of list of tuples due to
1770
      # JSON lacking a real tuple type, fix it:
1771
      parameters = [tuple(v) for v in parameters]
1772
      os_dict[name].append((os_path, status, diagnose,
1773
                            set(variants), set(parameters), set(api_ver)))
1774

    
1775
    nimg.oslist = os_dict
1776

    
1777
  def _VerifyNodeOS(self, ninfo, nimg, base):
1778
    """Verifies the node OS list.
1779

1780
    @type ninfo: L{objects.Node}
1781
    @param ninfo: the node to check
1782
    @param nimg: the node image object
1783
    @param base: the 'template' node we match against (e.g. from the master)
1784

1785
    """
1786
    node = ninfo.name
1787
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1788

    
1789
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1790

    
1791
    for os_name, os_data in nimg.oslist.items():
1792
      assert os_data, "Empty OS status for OS %s?!" % os_name
1793
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1794
      _ErrorIf(not f_status, self.ENODEOS, node,
1795
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1796
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1797
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1798
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1799
      # this will catched in backend too
1800
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1801
               and not f_var, self.ENODEOS, node,
1802
               "OS %s with API at least %d does not declare any variant",
1803
               os_name, constants.OS_API_V15)
1804
      # comparisons with the 'base' image
1805
      test = os_name not in base.oslist
1806
      _ErrorIf(test, self.ENODEOS, node,
1807
               "Extra OS %s not present on reference node (%s)",
1808
               os_name, base.name)
1809
      if test:
1810
        continue
1811
      assert base.oslist[os_name], "Base node has empty OS status?"
1812
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1813
      if not b_status:
1814
        # base OS is invalid, skipping
1815
        continue
1816
      for kind, a, b in [("API version", f_api, b_api),
1817
                         ("variants list", f_var, b_var),
1818
                         ("parameters", f_param, b_param)]:
1819
        _ErrorIf(a != b, self.ENODEOS, node,
1820
                 "OS %s %s differs from reference node %s: %s vs. %s",
1821
                 kind, os_name, base.name,
1822
                 utils.CommaJoin(a), utils.CommaJoin(b))
1823

    
1824
    # check any missing OSes
1825
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1826
    _ErrorIf(missing, self.ENODEOS, node,
1827
             "OSes present on reference node %s but missing on this node: %s",
1828
             base.name, utils.CommaJoin(missing))
1829

    
1830
  def _VerifyOob(self, ninfo, nresult):
1831
    """Verifies out of band functionality of a node.
1832

1833
    @type ninfo: L{objects.Node}
1834
    @param ninfo: the node to check
1835
    @param nresult: the remote results for the node
1836

1837
    """
1838
    node = ninfo.name
1839
    # We just have to verify the paths on master and/or master candidates
1840
    # as the oob helper is invoked on the master
1841
    if ((ninfo.master_candidate or ninfo.master_capable) and
1842
        constants.NV_OOB_PATHS in nresult):
1843
      for path_result in nresult[constants.NV_OOB_PATHS]:
1844
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1845

    
1846
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1847
    """Verifies and updates the node volume data.
1848

1849
    This function will update a L{NodeImage}'s internal structures
1850
    with data from the remote call.
1851

1852
    @type ninfo: L{objects.Node}
1853
    @param ninfo: the node to check
1854
    @param nresult: the remote results for the node
1855
    @param nimg: the node image object
1856
    @param vg_name: the configured VG name
1857

1858
    """
1859
    node = ninfo.name
1860
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1861

    
1862
    nimg.lvm_fail = True
1863
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1864
    if vg_name is None:
1865
      pass
1866
    elif isinstance(lvdata, basestring):
1867
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1868
               utils.SafeEncode(lvdata))
1869
    elif not isinstance(lvdata, dict):
1870
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1871
    else:
1872
      nimg.volumes = lvdata
1873
      nimg.lvm_fail = False
1874

    
1875
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1876
    """Verifies and updates the node instance list.
1877

1878
    If the listing was successful, then updates this node's instance
1879
    list. Otherwise, it marks the RPC call as failed for the instance
1880
    list key.
1881

1882
    @type ninfo: L{objects.Node}
1883
    @param ninfo: the node to check
1884
    @param nresult: the remote results for the node
1885
    @param nimg: the node image object
1886

1887
    """
1888
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1889
    test = not isinstance(idata, list)
1890
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1891
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1892
    if test:
1893
      nimg.hyp_fail = True
1894
    else:
1895
      nimg.instances = idata
1896

    
1897
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1898
    """Verifies and computes a node information map
1899

1900
    @type ninfo: L{objects.Node}
1901
    @param ninfo: the node to check
1902
    @param nresult: the remote results for the node
1903
    @param nimg: the node image object
1904
    @param vg_name: the configured VG name
1905

1906
    """
1907
    node = ninfo.name
1908
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1909

    
1910
    # try to read free memory (from the hypervisor)
1911
    hv_info = nresult.get(constants.NV_HVINFO, None)
1912
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1913
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1914
    if not test:
1915
      try:
1916
        nimg.mfree = int(hv_info["memory_free"])
1917
      except (ValueError, TypeError):
1918
        _ErrorIf(True, self.ENODERPC, node,
1919
                 "node returned invalid nodeinfo, check hypervisor")
1920

    
1921
    # FIXME: devise a free space model for file based instances as well
1922
    if vg_name is not None:
1923
      test = (constants.NV_VGLIST not in nresult or
1924
              vg_name not in nresult[constants.NV_VGLIST])
1925
      _ErrorIf(test, self.ENODELVM, node,
1926
               "node didn't return data for the volume group '%s'"
1927
               " - it is either missing or broken", vg_name)
1928
      if not test:
1929
        try:
1930
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1931
        except (ValueError, TypeError):
1932
          _ErrorIf(True, self.ENODERPC, node,
1933
                   "node returned invalid LVM info, check LVM status")
1934

    
1935
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1936
    """Gets per-disk status information for all instances.
1937

1938
    @type nodelist: list of strings
1939
    @param nodelist: Node names
1940
    @type node_image: dict of (name, L{objects.Node})
1941
    @param node_image: Node objects
1942
    @type instanceinfo: dict of (name, L{objects.Instance})
1943
    @param instanceinfo: Instance objects
1944
    @rtype: {instance: {node: [(succes, payload)]}}
1945
    @return: a dictionary of per-instance dictionaries with nodes as
1946
        keys and disk information as values; the disk information is a
1947
        list of tuples (success, payload)
1948

1949
    """
1950
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1951

    
1952
    node_disks = {}
1953
    node_disks_devonly = {}
1954
    diskless_instances = set()
1955
    diskless = constants.DT_DISKLESS
1956

    
1957
    for nname in nodelist:
1958
      node_instances = list(itertools.chain(node_image[nname].pinst,
1959
                                            node_image[nname].sinst))
1960
      diskless_instances.update(inst for inst in node_instances
1961
                                if instanceinfo[inst].disk_template == diskless)
1962
      disks = [(inst, disk)
1963
               for inst in node_instances
1964
               for disk in instanceinfo[inst].disks]
1965

    
1966
      if not disks:
1967
        # No need to collect data
1968
        continue
1969

    
1970
      node_disks[nname] = disks
1971

    
1972
      # Creating copies as SetDiskID below will modify the objects and that can
1973
      # lead to incorrect data returned from nodes
1974
      devonly = [dev.Copy() for (_, dev) in disks]
1975

    
1976
      for dev in devonly:
1977
        self.cfg.SetDiskID(dev, nname)
1978

    
1979
      node_disks_devonly[nname] = devonly
1980

    
1981
    assert len(node_disks) == len(node_disks_devonly)
1982

    
1983
    # Collect data from all nodes with disks
1984
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
1985
                                                          node_disks_devonly)
1986

    
1987
    assert len(result) == len(node_disks)
1988

    
1989
    instdisk = {}
1990

    
1991
    for (nname, nres) in result.items():
1992
      disks = node_disks[nname]
1993

    
1994
      if nres.offline:
1995
        # No data from this node
1996
        data = len(disks) * [(False, "node offline")]
1997
      else:
1998
        msg = nres.fail_msg
1999
        _ErrorIf(msg, self.ENODERPC, nname,
2000
                 "while getting disk information: %s", msg)
2001
        if msg:
2002
          # No data from this node
2003
          data = len(disks) * [(False, msg)]
2004
        else:
2005
          data = []
2006
          for idx, i in enumerate(nres.payload):
2007
            if isinstance(i, (tuple, list)) and len(i) == 2:
2008
              data.append(i)
2009
            else:
2010
              logging.warning("Invalid result from node %s, entry %d: %s",
2011
                              nname, idx, i)
2012
              data.append((False, "Invalid result from the remote node"))
2013

    
2014
      for ((inst, _), status) in zip(disks, data):
2015
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2016

    
2017
    # Add empty entries for diskless instances.
2018
    for inst in diskless_instances:
2019
      assert inst not in instdisk
2020
      instdisk[inst] = {}
2021

    
2022
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2023
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2024
                      compat.all(isinstance(s, (tuple, list)) and
2025
                                 len(s) == 2 for s in statuses)
2026
                      for inst, nnames in instdisk.items()
2027
                      for nname, statuses in nnames.items())
2028
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2029

    
2030
    return instdisk
2031

    
2032
  def BuildHooksEnv(self):
2033
    """Build hooks env.
2034

2035
    Cluster-Verify hooks just ran in the post phase and their failure makes
2036
    the output be logged in the verify output and the verification to fail.
2037

2038
    """
2039
    all_nodes = self.cfg.GetNodeList()
2040
    env = {
2041
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2042
      }
2043
    for node in self.cfg.GetAllNodesInfo().values():
2044
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2045

    
2046
    return env, [], all_nodes
2047

    
2048
  def Exec(self, feedback_fn):
2049
    """Verify integrity of cluster, performing various test on nodes.
2050

2051
    """
2052
    # This method has too many local variables. pylint: disable-msg=R0914
2053
    self.bad = False
2054
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2055
    verbose = self.op.verbose
2056
    self._feedback_fn = feedback_fn
2057
    feedback_fn("* Verifying global settings")
2058
    for msg in self.cfg.VerifyConfig():
2059
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2060

    
2061
    # Check the cluster certificates
2062
    for cert_filename in constants.ALL_CERT_FILES:
2063
      (errcode, msg) = _VerifyCertificate(cert_filename)
2064
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2065

    
2066
    vg_name = self.cfg.GetVGName()
2067
    drbd_helper = self.cfg.GetDRBDHelper()
2068
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2069
    cluster = self.cfg.GetClusterInfo()
2070
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2071
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2072
    nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2073
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2074
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2075
                        for iname in instancelist)
2076
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2077
    i_non_redundant = [] # Non redundant instances
2078
    i_non_a_balanced = [] # Non auto-balanced instances
2079
    n_offline = 0 # Count of offline nodes
2080
    n_drained = 0 # Count of nodes being drained
2081
    node_vol_should = {}
2082

    
2083
    # FIXME: verify OS list
2084
    # do local checksums
2085
    master_files = [constants.CLUSTER_CONF_FILE]
2086
    master_node = self.master_node = self.cfg.GetMasterNode()
2087
    master_ip = self.cfg.GetMasterIP()
2088

    
2089
    file_names = ssconf.SimpleStore().GetFileList()
2090
    file_names.extend(constants.ALL_CERT_FILES)
2091
    file_names.extend(master_files)
2092
    if cluster.modify_etc_hosts:
2093
      file_names.append(constants.ETC_HOSTS)
2094

    
2095
    local_checksums = utils.FingerprintFiles(file_names)
2096

    
2097
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2098
    node_verify_param = {
2099
      constants.NV_FILELIST: file_names,
2100
      constants.NV_NODELIST: [node.name for node in nodeinfo
2101
                              if not node.offline],
2102
      constants.NV_HYPERVISOR: hypervisors,
2103
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2104
                                  node.secondary_ip) for node in nodeinfo
2105
                                 if not node.offline],
2106
      constants.NV_INSTANCELIST: hypervisors,
2107
      constants.NV_VERSION: None,
2108
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2109
      constants.NV_NODESETUP: None,
2110
      constants.NV_TIME: None,
2111
      constants.NV_MASTERIP: (master_node, master_ip),
2112
      constants.NV_OSLIST: None,
2113
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2114
      }
2115

    
2116
    if vg_name is not None:
2117
      node_verify_param[constants.NV_VGLIST] = None
2118
      node_verify_param[constants.NV_LVLIST] = vg_name
2119
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2120
      node_verify_param[constants.NV_DRBDLIST] = None
2121

    
2122
    if drbd_helper:
2123
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2124

    
2125
    # Build our expected cluster state
2126
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2127
                                                 name=node.name,
2128
                                                 vm_capable=node.vm_capable))
2129
                      for node in nodeinfo)
2130

    
2131
    # Gather OOB paths
2132
    oob_paths = []
2133
    for node in nodeinfo:
2134
      path = _SupportsOob(self.cfg, node)
2135
      if path and path not in oob_paths:
2136
        oob_paths.append(path)
2137

    
2138
    if oob_paths:
2139
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2140

    
2141
    for instance in instancelist:
2142
      inst_config = instanceinfo[instance]
2143

    
2144
      for nname in inst_config.all_nodes:
2145
        if nname not in node_image:
2146
          # ghost node
2147
          gnode = self.NodeImage(name=nname)
2148
          gnode.ghost = True
2149
          node_image[nname] = gnode
2150

    
2151
      inst_config.MapLVsByNode(node_vol_should)
2152

    
2153
      pnode = inst_config.primary_node
2154
      node_image[pnode].pinst.append(instance)
2155

    
2156
      for snode in inst_config.secondary_nodes:
2157
        nimg = node_image[snode]
2158
        nimg.sinst.append(instance)
2159
        if pnode not in nimg.sbp:
2160
          nimg.sbp[pnode] = []
2161
        nimg.sbp[pnode].append(instance)
2162

    
2163
    # At this point, we have the in-memory data structures complete,
2164
    # except for the runtime information, which we'll gather next
2165

    
2166
    # Due to the way our RPC system works, exact response times cannot be
2167
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2168
    # time before and after executing the request, we can at least have a time
2169
    # window.
2170
    nvinfo_starttime = time.time()
2171
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2172
                                           self.cfg.GetClusterName())
2173
    nvinfo_endtime = time.time()
2174

    
2175
    all_drbd_map = self.cfg.ComputeDRBDMap()
2176

    
2177
    feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2178
    instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2179

    
2180
    feedback_fn("* Verifying node status")
2181

    
2182
    refos_img = None
2183

    
2184
    for node_i in nodeinfo:
2185
      node = node_i.name
2186
      nimg = node_image[node]
2187

    
2188
      if node_i.offline:
2189
        if verbose:
2190
          feedback_fn("* Skipping offline node %s" % (node,))
2191
        n_offline += 1
2192
        continue
2193

    
2194
      if node == master_node:
2195
        ntype = "master"
2196
      elif node_i.master_candidate:
2197
        ntype = "master candidate"
2198
      elif node_i.drained:
2199
        ntype = "drained"
2200
        n_drained += 1
2201
      else:
2202
        ntype = "regular"
2203
      if verbose:
2204
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2205

    
2206
      msg = all_nvinfo[node].fail_msg
2207
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2208
      if msg:
2209
        nimg.rpc_fail = True
2210
        continue
2211

    
2212
      nresult = all_nvinfo[node].payload
2213

    
2214
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2215
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2216
      self._VerifyNodeNetwork(node_i, nresult)
2217
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2218
                            master_files)
2219

    
2220
      self._VerifyOob(node_i, nresult)
2221

    
2222
      if nimg.vm_capable:
2223
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2224
        self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2225
                             all_drbd_map)
2226

    
2227
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2228
        self._UpdateNodeInstances(node_i, nresult, nimg)
2229
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2230
        self._UpdateNodeOS(node_i, nresult, nimg)
2231
        if not nimg.os_fail:
2232
          if refos_img is None:
2233
            refos_img = nimg
2234
          self._VerifyNodeOS(node_i, nimg, refos_img)
2235

    
2236
    feedback_fn("* Verifying instance status")
2237
    for instance in instancelist:
2238
      if verbose:
2239
        feedback_fn("* Verifying instance %s" % instance)
2240
      inst_config = instanceinfo[instance]
2241
      self._VerifyInstance(instance, inst_config, node_image,
2242
                           instdisk[instance])
2243
      inst_nodes_offline = []
2244

    
2245
      pnode = inst_config.primary_node
2246
      pnode_img = node_image[pnode]
2247
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2248
               self.ENODERPC, pnode, "instance %s, connection to"
2249
               " primary node failed", instance)
2250

    
2251
      if pnode_img.offline:
2252
        inst_nodes_offline.append(pnode)
2253

    
2254
      # If the instance is non-redundant we cannot survive losing its primary
2255
      # node, so we are not N+1 compliant. On the other hand we have no disk
2256
      # templates with more than one secondary so that situation is not well
2257
      # supported either.
2258
      # FIXME: does not support file-backed instances
2259
      if not inst_config.secondary_nodes:
2260
        i_non_redundant.append(instance)
2261

    
2262
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2263
               instance, "instance has multiple secondary nodes: %s",
2264
               utils.CommaJoin(inst_config.secondary_nodes),
2265
               code=self.ETYPE_WARNING)
2266

    
2267
      if inst_config.disk_template in constants.DTS_NET_MIRROR:
2268
        pnode = inst_config.primary_node
2269
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2270
        instance_groups = {}
2271

    
2272
        for node in instance_nodes:
2273
          instance_groups.setdefault(nodeinfo_byname[node].group,
2274
                                     []).append(node)
2275

    
2276
        pretty_list = [
2277
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2278
          # Sort so that we always list the primary node first.
2279
          for group, nodes in sorted(instance_groups.items(),
2280
                                     key=lambda (_, nodes): pnode in nodes,
2281
                                     reverse=True)]
2282

    
2283
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2284
                      instance, "instance has primary and secondary nodes in"
2285
                      " different groups: %s", utils.CommaJoin(pretty_list),
2286
                      code=self.ETYPE_WARNING)
2287

    
2288
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2289
        i_non_a_balanced.append(instance)
2290

    
2291
      for snode in inst_config.secondary_nodes:
2292
        s_img = node_image[snode]
2293
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2294
                 "instance %s, connection to secondary node failed", instance)
2295

    
2296
        if s_img.offline:
2297
          inst_nodes_offline.append(snode)
2298

    
2299
      # warn that the instance lives on offline nodes
2300
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2301
               "instance lives on offline node(s) %s",
2302
               utils.CommaJoin(inst_nodes_offline))
2303
      # ... or ghost/non-vm_capable nodes
2304
      for node in inst_config.all_nodes:
2305
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2306
                 "instance lives on ghost node %s", node)
2307
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2308
                 instance, "instance lives on non-vm_capable node %s", node)
2309

    
2310
    feedback_fn("* Verifying orphan volumes")
2311
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2312
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2313

    
2314
    feedback_fn("* Verifying orphan instances")
2315
    self._VerifyOrphanInstances(instancelist, node_image)
2316

    
2317
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2318
      feedback_fn("* Verifying N+1 Memory redundancy")
2319
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2320

    
2321
    feedback_fn("* Other Notes")
2322
    if i_non_redundant:
2323
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2324
                  % len(i_non_redundant))
2325

    
2326
    if i_non_a_balanced:
2327
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2328
                  % len(i_non_a_balanced))
2329

    
2330
    if n_offline:
2331
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2332

    
2333
    if n_drained:
2334
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2335

    
2336
    return not self.bad
2337

    
2338
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2339
    """Analyze the post-hooks' result
2340

2341
    This method analyses the hook result, handles it, and sends some
2342
    nicely-formatted feedback back to the user.
2343

2344
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2345
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2346
    @param hooks_results: the results of the multi-node hooks rpc call
2347
    @param feedback_fn: function used send feedback back to the caller
2348
    @param lu_result: previous Exec result
2349
    @return: the new Exec result, based on the previous result
2350
        and hook results
2351

2352
    """
2353
    # We only really run POST phase hooks, and are only interested in
2354
    # their results
2355
    if phase == constants.HOOKS_PHASE_POST:
2356
      # Used to change hooks' output to proper indentation
2357
      feedback_fn("* Hooks Results")
2358
      assert hooks_results, "invalid result from hooks"
2359

    
2360
      for node_name in hooks_results:
2361
        res = hooks_results[node_name]
2362
        msg = res.fail_msg
2363
        test = msg and not res.offline
2364
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2365
                      "Communication failure in hooks execution: %s", msg)
2366
        if res.offline or msg:
2367
          # No need to investigate payload if node is offline or gave an error.
2368
          # override manually lu_result here as _ErrorIf only
2369
          # overrides self.bad
2370
          lu_result = 1
2371
          continue
2372
        for script, hkr, output in res.payload:
2373
          test = hkr == constants.HKR_FAIL
2374
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2375
                        "Script %s failed, output:", script)
2376
          if test:
2377
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2378
            feedback_fn("%s" % output)
2379
            lu_result = 0
2380

    
2381
      return lu_result
2382

    
2383

    
2384
class LUClusterVerifyDisks(NoHooksLU):
2385
  """Verifies the cluster disks status.
2386

2387
  """
2388
  REQ_BGL = False
2389

    
2390
  def ExpandNames(self):
2391
    self.needed_locks = {
2392
      locking.LEVEL_NODE: locking.ALL_SET,
2393
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2394
    }
2395
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2396

    
2397
  def Exec(self, feedback_fn):
2398
    """Verify integrity of cluster disks.
2399

2400
    @rtype: tuple of three items
2401
    @return: a tuple of (dict of node-to-node_error, list of instances
2402
        which need activate-disks, dict of instance: (node, volume) for
2403
        missing volumes
2404

2405
    """
2406
    result = res_nodes, res_instances, res_missing = {}, [], {}
2407

    
2408
    nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2409
    instances = [self.cfg.GetInstanceInfo(name)
2410
                 for name in self.cfg.GetInstanceList()]
2411

    
2412
    nv_dict = {}
2413
    for inst in instances:
2414
      inst_lvs = {}
2415
      if (not inst.admin_up or
2416
          inst.disk_template not in constants.DTS_NET_MIRROR):
2417
        continue
2418
      inst.MapLVsByNode(inst_lvs)
2419
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2420
      for node, vol_list in inst_lvs.iteritems():
2421
        for vol in vol_list:
2422
          nv_dict[(node, vol)] = inst
2423

    
2424
    if not nv_dict:
2425
      return result
2426

    
2427
    vg_names = self.rpc.call_vg_list(nodes)
2428
    for node in nodes:
2429
      vg_names[node].Raise("Cannot get list of VGs")
2430

    
2431
    for node in nodes:
2432
      # node_volume
2433
      node_res = self.rpc.call_lv_list([node],
2434
                                       vg_names[node].payload.keys())[node]
2435
      if node_res.offline:
2436
        continue
2437
      msg = node_res.fail_msg
2438
      if msg:
2439
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2440
        res_nodes[node] = msg
2441
        continue
2442

    
2443
      lvs = node_res.payload
2444
      for lv_name, (_, _, lv_online) in lvs.items():
2445
        inst = nv_dict.pop((node, lv_name), None)
2446
        if (not lv_online and inst is not None
2447
            and inst.name not in res_instances):
2448
          res_instances.append(inst.name)
2449

    
2450
    # any leftover items in nv_dict are missing LVs, let's arrange the
2451
    # data better
2452
    for key, inst in nv_dict.iteritems():
2453
      if inst.name not in res_missing:
2454
        res_missing[inst.name] = []
2455
      res_missing[inst.name].append(key)
2456

    
2457
    return result
2458

    
2459

    
2460
class LUClusterRepairDiskSizes(NoHooksLU):
2461
  """Verifies the cluster disks sizes.
2462

2463
  """
2464
  REQ_BGL = False
2465

    
2466
  def ExpandNames(self):
2467
    if self.op.instances:
2468
      self.wanted_names = []
2469
      for name in self.op.instances:
2470
        full_name = _ExpandInstanceName(self.cfg, name)
2471
        self.wanted_names.append(full_name)
2472
      self.needed_locks = {
2473
        locking.LEVEL_NODE: [],
2474
        locking.LEVEL_INSTANCE: self.wanted_names,
2475
        }
2476
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2477
    else:
2478
      self.wanted_names = None
2479
      self.needed_locks = {
2480
        locking.LEVEL_NODE: locking.ALL_SET,
2481
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2482
        }
2483
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2484

    
2485
  def DeclareLocks(self, level):
2486
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2487
      self._LockInstancesNodes(primary_only=True)
2488

    
2489
  def CheckPrereq(self):
2490
    """Check prerequisites.
2491

2492
    This only checks the optional instance list against the existing names.
2493

2494
    """
2495
    if self.wanted_names is None:
2496
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2497

    
2498
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2499
                             in self.wanted_names]
2500

    
2501
  def _EnsureChildSizes(self, disk):
2502
    """Ensure children of the disk have the needed disk size.
2503

2504
    This is valid mainly for DRBD8 and fixes an issue where the
2505
    children have smaller disk size.
2506

2507
    @param disk: an L{ganeti.objects.Disk} object
2508

2509
    """
2510
    if disk.dev_type == constants.LD_DRBD8:
2511
      assert disk.children, "Empty children for DRBD8?"
2512
      fchild = disk.children[0]
2513
      mismatch = fchild.size < disk.size
2514
      if mismatch:
2515
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2516
                     fchild.size, disk.size)
2517
        fchild.size = disk.size
2518

    
2519
      # and we recurse on this child only, not on the metadev
2520
      return self._EnsureChildSizes(fchild) or mismatch
2521
    else:
2522
      return False
2523

    
2524
  def Exec(self, feedback_fn):
2525
    """Verify the size of cluster disks.
2526

2527
    """
2528
    # TODO: check child disks too
2529
    # TODO: check differences in size between primary/secondary nodes
2530
    per_node_disks = {}
2531
    for instance in self.wanted_instances:
2532
      pnode = instance.primary_node
2533
      if pnode not in per_node_disks:
2534
        per_node_disks[pnode] = []
2535
      for idx, disk in enumerate(instance.disks):
2536
        per_node_disks[pnode].append((instance, idx, disk))
2537

    
2538
    changed = []
2539
    for node, dskl in per_node_disks.items():
2540
      newl = [v[2].Copy() for v in dskl]
2541
      for dsk in newl:
2542
        self.cfg.SetDiskID(dsk, node)
2543
      result = self.rpc.call_blockdev_getsizes(node, newl)
2544
      if result.fail_msg:
2545
        self.LogWarning("Failure in blockdev_getsizes call to node"
2546
                        " %s, ignoring", node)
2547
        continue
2548
      if len(result.data) != len(dskl):
2549
        self.LogWarning("Invalid result from node %s, ignoring node results",
2550
                        node)
2551
        continue
2552
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2553
        if size is None:
2554
          self.LogWarning("Disk %d of instance %s did not return size"
2555
                          " information, ignoring", idx, instance.name)
2556
          continue
2557
        if not isinstance(size, (int, long)):
2558
          self.LogWarning("Disk %d of instance %s did not return valid"
2559
                          " size information, ignoring", idx, instance.name)
2560
          continue
2561
        size = size >> 20
2562
        if size != disk.size:
2563
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2564
                       " correcting: recorded %d, actual %d", idx,
2565
                       instance.name, disk.size, size)
2566
          disk.size = size
2567
          self.cfg.Update(instance, feedback_fn)
2568
          changed.append((instance.name, idx, size))
2569
        if self._EnsureChildSizes(disk):
2570
          self.cfg.Update(instance, feedback_fn)
2571
          changed.append((instance.name, idx, disk.size))
2572
    return changed
2573

    
2574

    
2575
class LUClusterRename(LogicalUnit):
2576
  """Rename the cluster.
2577

2578
  """
2579
  HPATH = "cluster-rename"
2580
  HTYPE = constants.HTYPE_CLUSTER
2581

    
2582
  def BuildHooksEnv(self):
2583
    """Build hooks env.
2584

2585
    """
2586
    env = {
2587
      "OP_TARGET": self.cfg.GetClusterName(),
2588
      "NEW_NAME": self.op.name,
2589
      }
2590
    mn = self.cfg.GetMasterNode()
2591
    all_nodes = self.cfg.GetNodeList()
2592
    return env, [mn], all_nodes
2593

    
2594
  def CheckPrereq(self):
2595
    """Verify that the passed name is a valid one.
2596

2597
    """
2598
    hostname = netutils.GetHostname(name=self.op.name,
2599
                                    family=self.cfg.GetPrimaryIPFamily())
2600

    
2601
    new_name = hostname.name
2602
    self.ip = new_ip = hostname.ip
2603
    old_name = self.cfg.GetClusterName()
2604
    old_ip = self.cfg.GetMasterIP()
2605
    if new_name == old_name and new_ip == old_ip:
2606
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2607
                                 " cluster has changed",
2608
                                 errors.ECODE_INVAL)
2609
    if new_ip != old_ip:
2610
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2611
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2612
                                   " reachable on the network" %
2613
                                   new_ip, errors.ECODE_NOTUNIQUE)
2614

    
2615
    self.op.name = new_name
2616

    
2617
  def Exec(self, feedback_fn):
2618
    """Rename the cluster.
2619

2620
    """
2621
    clustername = self.op.name
2622
    ip = self.ip
2623

    
2624
    # shutdown the master IP
2625
    master = self.cfg.GetMasterNode()
2626
    result = self.rpc.call_node_stop_master(master, False)
2627
    result.Raise("Could not disable the master role")
2628

    
2629
    try:
2630
      cluster = self.cfg.GetClusterInfo()
2631
      cluster.cluster_name = clustername
2632
      cluster.master_ip = ip
2633
      self.cfg.Update(cluster, feedback_fn)
2634

    
2635
      # update the known hosts file
2636
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2637
      node_list = self.cfg.GetOnlineNodeList()
2638
      try:
2639
        node_list.remove(master)
2640
      except ValueError:
2641
        pass
2642
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2643
    finally:
2644
      result = self.rpc.call_node_start_master(master, False, False)
2645
      msg = result.fail_msg
2646
      if msg:
2647
        self.LogWarning("Could not re-enable the master role on"
2648
                        " the master, please restart manually: %s", msg)
2649

    
2650
    return clustername
2651

    
2652

    
2653
class LUClusterSetParams(LogicalUnit):
2654
  """Change the parameters of the cluster.
2655

2656
  """
2657
  HPATH = "cluster-modify"
2658
  HTYPE = constants.HTYPE_CLUSTER
2659
  REQ_BGL = False
2660

    
2661
  def CheckArguments(self):
2662
    """Check parameters
2663

2664
    """
2665
    if self.op.uid_pool:
2666
      uidpool.CheckUidPool(self.op.uid_pool)
2667

    
2668
    if self.op.add_uids:
2669
      uidpool.CheckUidPool(self.op.add_uids)
2670

    
2671
    if self.op.remove_uids:
2672
      uidpool.CheckUidPool(self.op.remove_uids)
2673

    
2674
  def ExpandNames(self):
2675
    # FIXME: in the future maybe other cluster params won't require checking on
2676
    # all nodes to be modified.
2677
    self.needed_locks = {
2678
      locking.LEVEL_NODE: locking.ALL_SET,
2679
    }
2680
    self.share_locks[locking.LEVEL_NODE] = 1
2681

    
2682
  def BuildHooksEnv(self):
2683
    """Build hooks env.
2684

2685
    """
2686
    env = {
2687
      "OP_TARGET": self.cfg.GetClusterName(),
2688
      "NEW_VG_NAME": self.op.vg_name,
2689
      }
2690
    mn = self.cfg.GetMasterNode()
2691
    return env, [mn], [mn]
2692

    
2693
  def CheckPrereq(self):
2694
    """Check prerequisites.
2695

2696
    This checks whether the given params don't conflict and
2697
    if the given volume group is valid.
2698

2699
    """
2700
    if self.op.vg_name is not None and not self.op.vg_name:
2701
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2702
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2703
                                   " instances exist", errors.ECODE_INVAL)
2704

    
2705
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2706
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2707
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2708
                                   " drbd-based instances exist",
2709
                                   errors.ECODE_INVAL)
2710

    
2711
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2712

    
2713
    # if vg_name not None, checks given volume group on all nodes
2714
    if self.op.vg_name:
2715
      vglist = self.rpc.call_vg_list(node_list)
2716
      for node in node_list:
2717
        msg = vglist[node].fail_msg
2718
        if msg:
2719
          # ignoring down node
2720
          self.LogWarning("Error while gathering data on node %s"
2721
                          " (ignoring node): %s", node, msg)
2722
          continue
2723
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2724
                                              self.op.vg_name,
2725
                                              constants.MIN_VG_SIZE)
2726
        if vgstatus:
2727
          raise errors.OpPrereqError("Error on node '%s': %s" %
2728
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2729

    
2730
    if self.op.drbd_helper:
2731
      # checks given drbd helper on all nodes
2732
      helpers = self.rpc.call_drbd_helper(node_list)
2733
      for node in node_list:
2734
        ninfo = self.cfg.GetNodeInfo(node)
2735
        if ninfo.offline:
2736
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2737
          continue
2738
        msg = helpers[node].fail_msg
2739
        if msg:
2740
          raise errors.OpPrereqError("Error checking drbd helper on node"
2741
                                     " '%s': %s" % (node, msg),
2742
                                     errors.ECODE_ENVIRON)
2743
        node_helper = helpers[node].payload
2744
        if node_helper != self.op.drbd_helper:
2745
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2746
                                     (node, node_helper), errors.ECODE_ENVIRON)
2747

    
2748
    self.cluster = cluster = self.cfg.GetClusterInfo()
2749
    # validate params changes
2750
    if self.op.beparams:
2751
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2752
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2753

    
2754
    if self.op.ndparams:
2755
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2756
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2757

    
2758
    if self.op.nicparams:
2759
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2760
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2761
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2762
      nic_errors = []
2763

    
2764
      # check all instances for consistency
2765
      for instance in self.cfg.GetAllInstancesInfo().values():
2766
        for nic_idx, nic in enumerate(instance.nics):
2767
          params_copy = copy.deepcopy(nic.nicparams)
2768
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2769

    
2770
          # check parameter syntax
2771
          try:
2772
            objects.NIC.CheckParameterSyntax(params_filled)
2773
          except errors.ConfigurationError, err:
2774
            nic_errors.append("Instance %s, nic/%d: %s" %
2775
                              (instance.name, nic_idx, err))
2776

    
2777
          # if we're moving instances to routed, check that they have an ip
2778
          target_mode = params_filled[constants.NIC_MODE]
2779
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2780
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2781
                              (instance.name, nic_idx))
2782
      if nic_errors:
2783
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2784
                                   "\n".join(nic_errors))
2785

    
2786
    # hypervisor list/parameters
2787
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2788
    if self.op.hvparams:
2789
      for hv_name, hv_dict in self.op.hvparams.items():
2790
        if hv_name not in self.new_hvparams:
2791
          self.new_hvparams[hv_name] = hv_dict
2792
        else:
2793
          self.new_hvparams[hv_name].update(hv_dict)
2794

    
2795
    # os hypervisor parameters
2796
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2797
    if self.op.os_hvp:
2798
      for os_name, hvs in self.op.os_hvp.items():
2799
        if os_name not in self.new_os_hvp:
2800
          self.new_os_hvp[os_name] = hvs
2801
        else:
2802
          for hv_name, hv_dict in hvs.items():
2803
            if hv_name not in self.new_os_hvp[os_name]:
2804
              self.new_os_hvp[os_name][hv_name] = hv_dict
2805
            else:
2806
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2807

    
2808
    # os parameters
2809
    self.new_osp = objects.FillDict(cluster.osparams, {})
2810
    if self.op.osparams:
2811
      for os_name, osp in self.op.osparams.items():
2812
        if os_name not in self.new_osp:
2813
          self.new_osp[os_name] = {}
2814

    
2815
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2816
                                                  use_none=True)
2817

    
2818
        if not self.new_osp[os_name]:
2819
          # we removed all parameters
2820
          del self.new_osp[os_name]
2821
        else:
2822
          # check the parameter validity (remote check)
2823
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2824
                         os_name, self.new_osp[os_name])
2825

    
2826
    # changes to the hypervisor list
2827
    if self.op.enabled_hypervisors is not None:
2828
      self.hv_list = self.op.enabled_hypervisors
2829
      for hv in self.hv_list:
2830
        # if the hypervisor doesn't already exist in the cluster
2831
        # hvparams, we initialize it to empty, and then (in both
2832
        # cases) we make sure to fill the defaults, as we might not
2833
        # have a complete defaults list if the hypervisor wasn't
2834
        # enabled before
2835
        if hv not in new_hvp:
2836
          new_hvp[hv] = {}
2837
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2838
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2839
    else:
2840
      self.hv_list = cluster.enabled_hypervisors
2841

    
2842
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2843
      # either the enabled list has changed, or the parameters have, validate
2844
      for hv_name, hv_params in self.new_hvparams.items():
2845
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2846
            (self.op.enabled_hypervisors and
2847
             hv_name in self.op.enabled_hypervisors)):
2848
          # either this is a new hypervisor, or its parameters have changed
2849
          hv_class = hypervisor.GetHypervisor(hv_name)
2850
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2851
          hv_class.CheckParameterSyntax(hv_params)
2852
          _CheckHVParams(self, node_list, hv_name, hv_params)
2853

    
2854
    if self.op.os_hvp:
2855
      # no need to check any newly-enabled hypervisors, since the
2856
      # defaults have already been checked in the above code-block
2857
      for os_name, os_hvp in self.new_os_hvp.items():
2858
        for hv_name, hv_params in os_hvp.items():
2859
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2860
          # we need to fill in the new os_hvp on top of the actual hv_p
2861
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2862
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2863
          hv_class = hypervisor.GetHypervisor(hv_name)
2864
          hv_class.CheckParameterSyntax(new_osp)
2865
          _CheckHVParams(self, node_list, hv_name, new_osp)
2866

    
2867
    if self.op.default_iallocator:
2868
      alloc_script = utils.FindFile(self.op.default_iallocator,
2869
                                    constants.IALLOCATOR_SEARCH_PATH,
2870
                                    os.path.isfile)
2871
      if alloc_script is None:
2872
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2873
                                   " specified" % self.op.default_iallocator,
2874
                                   errors.ECODE_INVAL)
2875

    
2876
  def Exec(self, feedback_fn):
2877
    """Change the parameters of the cluster.
2878

2879
    """
2880
    if self.op.vg_name is not None:
2881
      new_volume = self.op.vg_name
2882
      if not new_volume:
2883
        new_volume = None
2884
      if new_volume != self.cfg.GetVGName():
2885
        self.cfg.SetVGName(new_volume)
2886
      else:
2887
        feedback_fn("Cluster LVM configuration already in desired"
2888
                    " state, not changing")
2889
    if self.op.drbd_helper is not None:
2890
      new_helper = self.op.drbd_helper
2891
      if not new_helper:
2892
        new_helper = None
2893
      if new_helper != self.cfg.GetDRBDHelper():
2894
        self.cfg.SetDRBDHelper(new_helper)
2895
      else:
2896
        feedback_fn("Cluster DRBD helper already in desired state,"
2897
                    " not changing")
2898
    if self.op.hvparams:
2899
      self.cluster.hvparams = self.new_hvparams
2900
    if self.op.os_hvp:
2901
      self.cluster.os_hvp = self.new_os_hvp
2902
    if self.op.enabled_hypervisors is not None:
2903
      self.cluster.hvparams = self.new_hvparams
2904
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2905
    if self.op.beparams:
2906
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2907
    if self.op.nicparams:
2908
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2909
    if self.op.osparams:
2910
      self.cluster.osparams = self.new_osp
2911
    if self.op.ndparams:
2912
      self.cluster.ndparams = self.new_ndparams
2913

    
2914
    if self.op.candidate_pool_size is not None:
2915
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2916
      # we need to update the pool size here, otherwise the save will fail
2917
      _AdjustCandidatePool(self, [])
2918

    
2919
    if self.op.maintain_node_health is not None:
2920
      self.cluster.maintain_node_health = self.op.maintain_node_health
2921

    
2922
    if self.op.prealloc_wipe_disks is not None:
2923
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2924

    
2925
    if self.op.add_uids is not None:
2926
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2927

    
2928
    if self.op.remove_uids is not None:
2929
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2930

    
2931
    if self.op.uid_pool is not None:
2932
      self.cluster.uid_pool = self.op.uid_pool
2933

    
2934
    if self.op.default_iallocator is not None:
2935
      self.cluster.default_iallocator = self.op.default_iallocator
2936

    
2937
    if self.op.reserved_lvs is not None:
2938
      self.cluster.reserved_lvs = self.op.reserved_lvs
2939

    
2940
    def helper_os(aname, mods, desc):
2941
      desc += " OS list"
2942
      lst = getattr(self.cluster, aname)
2943
      for key, val in mods:
2944
        if key == constants.DDM_ADD:
2945
          if val in lst:
2946
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2947
          else:
2948
            lst.append(val)
2949
        elif key == constants.DDM_REMOVE:
2950
          if val in lst:
2951
            lst.remove(val)
2952
          else:
2953
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
2954
        else:
2955
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
2956

    
2957
    if self.op.hidden_os:
2958
      helper_os("hidden_os", self.op.hidden_os, "hidden")
2959

    
2960
    if self.op.blacklisted_os:
2961
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2962

    
2963
    if self.op.master_netdev:
2964
      master = self.cfg.GetMasterNode()
2965
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
2966
                  self.cluster.master_netdev)
2967
      result = self.rpc.call_node_stop_master(master, False)
2968
      result.Raise("Could not disable the master ip")
2969
      feedback_fn("Changing master_netdev from %s to %s" %
2970
                  (self.cluster.master_netdev, self.op.master_netdev))
2971
      self.cluster.master_netdev = self.op.master_netdev
2972

    
2973
    self.cfg.Update(self.cluster, feedback_fn)
2974

    
2975
    if self.op.master_netdev:
2976
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
2977
                  self.op.master_netdev)
2978
      result = self.rpc.call_node_start_master(master, False, False)
2979
      if result.fail_msg:
2980
        self.LogWarning("Could not re-enable the master ip on"
2981
                        " the master, please restart manually: %s",
2982
                        result.fail_msg)
2983

    
2984

    
2985
def _UploadHelper(lu, nodes, fname):
2986
  """Helper for uploading a file and showing warnings.
2987

2988
  """
2989
  if os.path.exists(fname):
2990
    result = lu.rpc.call_upload_file(nodes, fname)
2991
    for to_node, to_result in result.items():
2992
      msg = to_result.fail_msg
2993
      if msg:
2994
        msg = ("Copy of file %s to node %s failed: %s" %
2995
               (fname, to_node, msg))
2996
        lu.proc.LogWarning(msg)
2997

    
2998

    
2999
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3000
  """Distribute additional files which are part of the cluster configuration.
3001

3002
  ConfigWriter takes care of distributing the config and ssconf files, but
3003
  there are more files which should be distributed to all nodes. This function
3004
  makes sure those are copied.
3005

3006
  @param lu: calling logical unit
3007
  @param additional_nodes: list of nodes not in the config to distribute to
3008
  @type additional_vm: boolean
3009
  @param additional_vm: whether the additional nodes are vm-capable or not
3010

3011
  """
3012
  # 1. Gather target nodes
3013
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3014
  dist_nodes = lu.cfg.GetOnlineNodeList()
3015
  nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3016
  vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3017
  if additional_nodes is not None:
3018
    dist_nodes.extend(additional_nodes)
3019
    if additional_vm:
3020
      vm_nodes.extend(additional_nodes)
3021
  if myself.name in dist_nodes:
3022
    dist_nodes.remove(myself.name)
3023
  if myself.name in vm_nodes:
3024
    vm_nodes.remove(myself.name)
3025

    
3026
  # 2. Gather files to distribute
3027
  dist_files = set([constants.ETC_HOSTS,
3028
                    constants.SSH_KNOWN_HOSTS_FILE,
3029
                    constants.RAPI_CERT_FILE,
3030
                    constants.RAPI_USERS_FILE,
3031
                    constants.CONFD_HMAC_KEY,
3032
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
3033
                   ])
3034

    
3035
  vm_files = set()
3036
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3037
  for hv_name in enabled_hypervisors:
3038
    hv_class = hypervisor.GetHypervisor(hv_name)
3039
    vm_files.update(hv_class.GetAncillaryFiles())
3040

    
3041
  # 3. Perform the files upload
3042
  for fname in dist_files:
3043
    _UploadHelper(lu, dist_nodes, fname)
3044
  for fname in vm_files:
3045
    _UploadHelper(lu, vm_nodes, fname)
3046

    
3047

    
3048
class LUClusterRedistConf(NoHooksLU):
3049
  """Force the redistribution of cluster configuration.
3050

3051
  This is a very simple LU.
3052

3053
  """
3054
  REQ_BGL = False
3055

    
3056
  def ExpandNames(self):
3057
    self.needed_locks = {
3058
      locking.LEVEL_NODE: locking.ALL_SET,
3059
    }
3060
    self.share_locks[locking.LEVEL_NODE] = 1
3061

    
3062
  def Exec(self, feedback_fn):
3063
    """Redistribute the configuration.
3064

3065
    """
3066
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3067
    _RedistributeAncillaryFiles(self)
3068

    
3069

    
3070
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3071
  """Sleep and poll for an instance's disk to sync.
3072

3073
  """
3074
  if not instance.disks or disks is not None and not disks:
3075
    return True
3076

    
3077
  disks = _ExpandCheckDisks(instance, disks)
3078

    
3079
  if not oneshot:
3080
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3081

    
3082
  node = instance.primary_node
3083

    
3084
  for dev in disks:
3085
    lu.cfg.SetDiskID(dev, node)
3086

    
3087
  # TODO: Convert to utils.Retry
3088

    
3089
  retries = 0
3090
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3091
  while True:
3092
    max_time = 0
3093
    done = True
3094
    cumul_degraded = False
3095
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3096
    msg = rstats.fail_msg
3097
    if msg:
3098
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3099
      retries += 1
3100
      if retries >= 10:
3101
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3102
                                 " aborting." % node)
3103
      time.sleep(6)
3104
      continue
3105
    rstats = rstats.payload
3106
    retries = 0
3107
    for i, mstat in enumerate(rstats):
3108
      if mstat is None:
3109
        lu.LogWarning("Can't compute data for node %s/%s",
3110
                           node, disks[i].iv_name)
3111
        continue
3112

    
3113
      cumul_degraded = (cumul_degraded or
3114
                        (mstat.is_degraded and mstat.sync_percent is None))
3115
      if mstat.sync_percent is not None:
3116
        done = False
3117
        if mstat.estimated_time is not None:
3118
          rem_time = ("%s remaining (estimated)" %
3119
                      utils.FormatSeconds(mstat.estimated_time))
3120
          max_time = mstat.estimated_time
3121
        else:
3122
          rem_time = "no time estimate"
3123
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3124
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3125

    
3126
    # if we're done but degraded, let's do a few small retries, to
3127
    # make sure we see a stable and not transient situation; therefore
3128
    # we force restart of the loop
3129
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3130
      logging.info("Degraded disks found, %d retries left", degr_retries)
3131
      degr_retries -= 1
3132
      time.sleep(1)
3133
      continue
3134

    
3135
    if done or oneshot:
3136
      break
3137

    
3138
    time.sleep(min(60, max_time))
3139

    
3140
  if done:
3141
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3142
  return not cumul_degraded
3143

    
3144

    
3145
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3146
  """Check that mirrors are not degraded.
3147

3148
  The ldisk parameter, if True, will change the test from the
3149
  is_degraded attribute (which represents overall non-ok status for
3150
  the device(s)) to the ldisk (representing the local storage status).
3151

3152
  """
3153
  lu.cfg.SetDiskID(dev, node)
3154

    
3155
  result = True
3156

    
3157
  if on_primary or dev.AssembleOnSecondary():
3158
    rstats = lu.rpc.call_blockdev_find(node, dev)
3159
    msg = rstats.fail_msg
3160
    if msg:
3161
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3162
      result = False
3163
    elif not rstats.payload:
3164
      lu.LogWarning("Can't find disk on node %s", node)
3165
      result = False
3166
    else:
3167
      if ldisk:
3168
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3169
      else:
3170
        result = result and not rstats.payload.is_degraded
3171

    
3172
  if dev.children:
3173
    for child in dev.children:
3174
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3175

    
3176
  return result
3177

    
3178

    
3179
class LUOobCommand(NoHooksLU):
3180
  """Logical unit for OOB handling.
3181

3182
  """
3183
  REG_BGL = False
3184

    
3185
  def CheckPrereq(self):
3186
    """Check prerequisites.
3187

3188
    This checks:
3189
     - the node exists in the configuration
3190
     - OOB is supported
3191

3192
    Any errors are signaled by raising errors.OpPrereqError.
3193

3194
    """
3195
    self.nodes = []
3196
    for node_name in self.op.node_names:
3197
      node = self.cfg.GetNodeInfo(node_name)
3198

    
3199
      if node is None:
3200
        raise errors.OpPrereqError("Node %s not found" % node_name,
3201
                                   errors.ECODE_NOENT)
3202
      else:
3203
        self.nodes.append(node)
3204

    
3205
      if (self.op.command == constants.OOB_POWER_OFF and not node.offline):
3206
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3207
                                    " not marked offline") % node_name,
3208
                                   errors.ECODE_STATE)
3209

    
3210
  def ExpandNames(self):
3211
    """Gather locks we need.
3212

3213
    """
3214
    if self.op.node_names:
3215
      self.op.node_names = [_ExpandNodeName(self.cfg, name)
3216
                            for name in self.op.node_names]
3217
    else:
3218
      self.op.node_names = self.cfg.GetNodeList()
3219

    
3220
    self.needed_locks = {
3221
      locking.LEVEL_NODE: self.op.node_names,
3222
      }
3223

    
3224
  def Exec(self, feedback_fn):
3225
    """Execute OOB and return result if we expect any.
3226

3227
    """
3228
    master_node = self.cfg.GetMasterNode()
3229
    ret = []
3230

    
3231
    for node in self.nodes:
3232
      node_entry = [(constants.RS_NORMAL, node.name)]
3233
      ret.append(node_entry)
3234

    
3235
      oob_program = _SupportsOob(self.cfg, node)
3236

    
3237
      if not oob_program:
3238
        node_entry.append((constants.RS_UNAVAIL, None))
3239
        continue
3240

    
3241
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3242
                   self.op.command, oob_program, node.name)
3243
      result = self.rpc.call_run_oob(master_node, oob_program,
3244
                                     self.op.command, node.name,
3245
                                     self.op.timeout)
3246

    
3247
      if result.fail_msg:
3248
        self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3249
                        node.name, result.fail_msg)
3250
        node_entry.append((constants.RS_NODATA, None))
3251
      else:
3252
        try:
3253
          self._CheckPayload(result)
3254
        except errors.OpExecError, err:
3255
          self.LogWarning("The payload returned by '%s' is not valid: %s",
3256
                          node.name, err)
3257
          node_entry.append((constants.RS_NODATA, None))
3258
        else:
3259
          if self.op.command == constants.OOB_HEALTH:
3260
            # For health we should log important events
3261
            for item, status in result.payload:
3262
              if status in [constants.OOB_STATUS_WARNING,
3263
                            constants.OOB_STATUS_CRITICAL]:
3264
                self.LogWarning("On node '%s' item '%s' has status '%s'",
3265
                                node.name, item, status)
3266

    
3267
          if self.op.command == constants.OOB_POWER_ON:
3268
            node.powered = True
3269
          elif self.op.command == constants.OOB_POWER_OFF:
3270
            node.powered = False
3271
          elif self.op.command == constants.OOB_POWER_STATUS:
3272
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3273
            if powered != node.powered:
3274
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3275
                               " match actual power state (%s)"), node.powered,
3276
                              node.name, powered)
3277

    
3278
          # For configuration changing commands we should update the node
3279
          if self.op.command in (constants.OOB_POWER_ON,
3280
                                 constants.OOB_POWER_OFF):
3281
            self.cfg.Update(node, feedback_fn)
3282

    
3283
          node_entry.append((constants.RS_NORMAL, result.payload))
3284

    
3285
    return ret
3286

    
3287
  def _CheckPayload(self, result):
3288
    """Checks if the payload is valid.
3289

3290
    @param result: RPC result
3291
    @raises errors.OpExecError: If payload is not valid
3292

3293
    """
3294
    errs = []
3295
    if self.op.command == constants.OOB_HEALTH:
3296
      if not isinstance(result.payload, list):
3297
        errs.append("command 'health' is expected to return a list but got %s" %
3298
                    type(result.payload))
3299
      else:
3300
        for item, status in result.payload:
3301
          if status not in constants.OOB_STATUSES:
3302
            errs.append("health item '%s' has invalid status '%s'" %
3303
                        (item, status))
3304

    
3305
    if self.op.command == constants.OOB_POWER_STATUS:
3306
      if not isinstance(result.payload, dict):
3307
        errs.append("power-status is expected to return a dict but got %s" %
3308
                    type(result.payload))
3309

    
3310
    if self.op.command in [
3311
        constants.OOB_POWER_ON,
3312
        constants.OOB_POWER_OFF,
3313
        constants.OOB_POWER_CYCLE,
3314
        ]:
3315
      if result.payload is not None:
3316
        errs.append("%s is expected to not return payload but got '%s'" %
3317
                    (self.op.command, result.payload))
3318

    
3319
    if errs:
3320
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3321
                               utils.CommaJoin(errs))
3322

    
3323

    
3324

    
3325
class LUOsDiagnose(NoHooksLU):
3326
  """Logical unit for OS diagnose/query.
3327

3328
  """
3329
  REQ_BGL = False
3330
  _HID = "hidden"
3331
  _BLK = "blacklisted"
3332
  _VLD = "valid"
3333
  _FIELDS_STATIC = utils.FieldSet()
3334
  _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3335
                                   "parameters", "api_versions", _HID, _BLK)
3336

    
3337
  def CheckArguments(self):
3338
    if self.op.names:
3339
      raise errors.OpPrereqError("Selective OS query not supported",
3340
                                 errors.ECODE_INVAL)
3341

    
3342
    _CheckOutputFields(static=self._FIELDS_STATIC,
3343
                       dynamic=self._FIELDS_DYNAMIC,
3344
                       selected=self.op.output_fields)
3345

    
3346
  def ExpandNames(self):
3347
    # Lock all nodes, in shared mode
3348
    # Temporary removal of locks, should be reverted later
3349
    # TODO: reintroduce locks when they are lighter-weight
3350
    self.needed_locks = {}
3351
    #self.share_locks[locking.LEVEL_NODE] = 1
3352
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3353

    
3354
  @staticmethod
3355
  def _DiagnoseByOS(rlist):
3356
    """Remaps a per-node return list into an a per-os per-node dictionary
3357

3358
    @param rlist: a map with node names as keys and OS objects as values
3359

3360
    @rtype: dict
3361
    @return: a dictionary with osnames as keys and as value another
3362
        map, with nodes as keys and tuples of (path, status, diagnose,
3363
        variants, parameters, api_versions) as values, eg::
3364

3365
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3366
                                     (/srv/..., False, "invalid api")],
3367
                           "node2": [(/srv/..., True, "", [], [])]}
3368
          }
3369

3370
    """
3371
    all_os = {}
3372
    # we build here the list of nodes that didn't fail the RPC (at RPC
3373
    # level), so that nodes with a non-responding node daemon don't
3374
    # make all OSes invalid
3375
    good_nodes = [node_name for node_name in rlist
3376
                  if not rlist[node_name].fail_msg]
3377
    for node_name, nr in rlist.items():
3378
      if nr.fail_msg or not nr.payload:
3379
        continue
3380
      for (name, path, status, diagnose, variants,
3381
           params, api_versions) in nr.payload:
3382
        if name not in all_os:
3383
          # build a list of nodes for this os containing empty lists
3384
          # for each node in node_list
3385
          all_os[name] = {}
3386
          for nname in good_nodes:
3387
            all_os[name][nname] = []
3388
        # convert params from [name, help] to (name, help)
3389
        params = [tuple(v) for v in params]
3390
        all_os[name][node_name].append((path, status, diagnose,
3391
                                        variants, params, api_versions))
3392
    return all_os
3393

    
3394
  def Exec(self, feedback_fn):
3395
    """Compute the list of OSes.
3396

3397
    """
3398
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3399
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3400
    pol = self._DiagnoseByOS(node_data)
3401
    output = []
3402
    cluster = self.cfg.GetClusterInfo()
3403

    
3404
    for os_name in utils.NiceSort(pol.keys()):
3405
      os_data = pol[os_name]
3406
      row = []
3407
      valid = True
3408
      (variants, params, api_versions) = null_state = (set(), set(), set())
3409
      for idx, osl in enumerate(os_data.values()):
3410
        valid = bool(valid and osl and osl[0][1])
3411
        if not valid:
3412
          (variants, params, api_versions) = null_state
3413
          break
3414
        node_variants, node_params, node_api = osl[0][3:6]
3415
        if idx == 0: # first entry
3416
          variants = set(node_variants)
3417
          params = set(node_params)
3418
          api_versions = set(node_api)
3419
        else: # keep consistency
3420
          variants.intersection_update(node_variants)
3421
          params.intersection_update(node_params)
3422
          api_versions.intersection_update(node_api)
3423

    
3424
      is_hid = os_name in cluster.hidden_os
3425
      is_blk = os_name in cluster.blacklisted_os
3426
      if ((self._HID not in self.op.output_fields and is_hid) or
3427
          (self._BLK not in self.op.output_fields and is_blk) or
3428
          (self._VLD not in self.op.output_fields and not valid)):
3429
        continue
3430

    
3431
      for field in self.op.output_fields:
3432
        if field == "name":
3433
          val = os_name
3434
        elif field == self._VLD:
3435
          val = valid
3436
        elif field == "node_status":
3437
          # this is just a copy of the dict
3438
          val = {}
3439
          for node_name, nos_list in os_data.items():
3440
            val[node_name] = nos_list
3441
        elif field == "variants":
3442
          val = utils.NiceSort(list(variants))
3443
        elif field == "parameters":
3444
          val = list(params)
3445
        elif field == "api_versions":
3446
          val = list(api_versions)
3447
        elif field == self._HID:
3448
          val = is_hid
3449
        elif field == self._BLK:
3450
          val = is_blk
3451
        else:
3452
          raise errors.ParameterError(field)
3453
        row.append(val)
3454
      output.append(row)
3455

    
3456
    return output
3457

    
3458

    
3459
class LUNodeRemove(LogicalUnit):
3460
  """Logical unit for removing a node.
3461

3462
  """
3463
  HPATH = "node-remove"
3464
  HTYPE = constants.HTYPE_NODE
3465

    
3466
  def BuildHooksEnv(self):
3467
    """Build hooks env.
3468

3469
    This doesn't run on the target node in the pre phase as a failed
3470
    node would then be impossible to remove.
3471

3472
    """
3473
    env = {
3474
      "OP_TARGET": self.op.node_name,
3475
      "NODE_NAME": self.op.node_name,
3476
      }
3477
    all_nodes = self.cfg.GetNodeList()
3478
    try:
3479
      all_nodes.remove(self.op.node_name)
3480
    except ValueError:
3481
      logging.warning("Node %s which is about to be removed not found"
3482
                      " in the all nodes list", self.op.node_name)
3483
    return env, all_nodes, all_nodes
3484

    
3485
  def CheckPrereq(self):
3486
    """Check prerequisites.
3487

3488
    This checks:
3489
     - the node exists in the configuration
3490
     - it does not have primary or secondary instances
3491
     - it's not the master
3492

3493
    Any errors are signaled by raising errors.OpPrereqError.
3494

3495
    """
3496
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3497
    node = self.cfg.GetNodeInfo(self.op.node_name)
3498
    assert node is not None
3499

    
3500
    instance_list = self.cfg.GetInstanceList()
3501

    
3502
    masternode = self.cfg.GetMasterNode()
3503
    if node.name == masternode:
3504
      raise errors.OpPrereqError("Node is the master node,"
3505
                                 " you need to failover first.",
3506
                                 errors.ECODE_INVAL)
3507

    
3508
    for instance_name in instance_list:
3509
      instance = self.cfg.GetInstanceInfo(instance_name)
3510
      if node.name in instance.all_nodes:
3511
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3512
                                   " please remove first." % instance_name,
3513
                                   errors.ECODE_INVAL)
3514
    self.op.node_name = node.name
3515
    self.node = node
3516

    
3517
  def Exec(self, feedback_fn):
3518
    """Removes the node from the cluster.
3519

3520
    """
3521
    node = self.node
3522
    logging.info("Stopping the node daemon and removing configs from node %s",
3523
                 node.name)
3524

    
3525
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3526

    
3527
    # Promote nodes to master candidate as needed
3528
    _AdjustCandidatePool(self, exceptions=[node.name])
3529
    self.context.RemoveNode(node.name)
3530

    
3531
    # Run post hooks on the node before it's removed
3532
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3533
    try:
3534
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3535
    except:
3536
      # pylint: disable-msg=W0702
3537
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3538

    
3539
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3540
    msg = result.fail_msg
3541
    if msg:
3542
      self.LogWarning("Errors encountered on the remote node while leaving"
3543
                      " the cluster: %s", msg)
3544

    
3545
    # Remove node from our /etc/hosts
3546
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3547
      master_node = self.cfg.GetMasterNode()
3548
      result = self.rpc.call_etc_hosts_modify(master_node,
3549
                                              constants.ETC_HOSTS_REMOVE,
3550
                                              node.name, None)
3551
      result.Raise("Can't update hosts file with new host data")
3552
      _RedistributeAncillaryFiles(self)
3553

    
3554

    
3555
class _NodeQuery(_QueryBase):
3556
  FIELDS = query.NODE_FIELDS
3557

    
3558
  def ExpandNames(self, lu):
3559
    lu.needed_locks = {}
3560
    lu.share_locks[locking.LEVEL_NODE] = 1
3561

    
3562
    if self.names:
3563
      self.wanted = _GetWantedNodes(lu, self.names)
3564
    else:
3565
      self.wanted = locking.ALL_SET
3566

    
3567
    self.do_locking = (self.use_locking and
3568
                       query.NQ_LIVE in self.requested_data)
3569

    
3570
    if self.do_locking:
3571
      # if we don't request only static fields, we need to lock the nodes
3572
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3573

    
3574
  def DeclareLocks(self, lu, level):
3575
    pass
3576

    
3577
  def _GetQueryData(self, lu):
3578
    """Computes the list of nodes and their attributes.
3579

3580
    """
3581
    all_info = lu.cfg.GetAllNodesInfo()
3582

    
3583
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3584

    
3585
    # Gather data as requested
3586
    if query.NQ_LIVE in self.requested_data:
3587
      node_data = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3588
                                        lu.cfg.GetHypervisorType())
3589
      live_data = dict((name, nresult.payload)
3590
                       for (name, nresult) in node_data.items()
3591
                       if not nresult.fail_msg and nresult.payload)
3592
    else:
3593
      live_data = None
3594

    
3595
    if query.NQ_INST in self.requested_data:
3596
      node_to_primary = dict([(name, set()) for name in nodenames])
3597
      node_to_secondary = dict([(name, set()) for name in nodenames])
3598

    
3599
      inst_data = lu.cfg.GetAllInstancesInfo()
3600

    
3601
      for inst in inst_data.values():
3602
        if inst.primary_node in node_to_primary:
3603
          node_to_primary[inst.primary_node].add(inst.name)
3604
        for secnode in inst.secondary_nodes:
3605
          if secnode in node_to_secondary:
3606
            node_to_secondary[secnode].add(inst.name)
3607
    else:
3608
      node_to_primary = None
3609
      node_to_secondary = None
3610

    
3611
    if query.NQ_OOB in self.requested_data:
3612
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3613
                         for name, node in all_info.iteritems())
3614
    else:
3615
      oob_support = None
3616

    
3617
    if query.NQ_GROUP in self.requested_data:
3618
      groups = lu.cfg.GetAllNodeGroupsInfo()
3619
    else:
3620
      groups = {}
3621

    
3622
    return query.NodeQueryData([all_info[name] for name in nodenames],
3623
                               live_data, lu.cfg.GetMasterNode(),
3624
                               node_to_primary, node_to_secondary, groups,
3625
                               oob_support, lu.cfg.GetClusterInfo())
3626

    
3627

    
3628
class LUNodeQuery(NoHooksLU):
3629
  """Logical unit for querying nodes.
3630

3631
  """
3632
  # pylint: disable-msg=W0142
3633
  REQ_BGL = False
3634

    
3635
  def CheckArguments(self):
3636
    self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3637
                         self.op.use_locking)
3638

    
3639
  def ExpandNames(self):
3640
    self.nq.ExpandNames(self)
3641

    
3642
  def Exec(self, feedback_fn):
3643
    return self.nq.OldStyleQuery(self)
3644

    
3645

    
3646
class LUNodeQueryvols(NoHooksLU):
3647
  """Logical unit for getting volumes on node(s).
3648

3649
  """
3650
  REQ_BGL = False
3651
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3652
  _FIELDS_STATIC = utils.FieldSet("node")
3653

    
3654
  def CheckArguments(self):
3655
    _CheckOutputFields(static=self._FIELDS_STATIC,
3656
                       dynamic=self._FIELDS_DYNAMIC,
3657
                       selected=self.op.output_fields)
3658

    
3659
  def ExpandNames(self):
3660
    self.needed_locks = {}
3661
    self.share_locks[locking.LEVEL_NODE] = 1
3662
    if not self.op.nodes:
3663
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3664
    else:
3665
      self.needed_locks[locking.LEVEL_NODE] = \
3666
        _GetWantedNodes(self, self.op.nodes)
3667

    
3668
  def Exec(self, feedback_fn):
3669
    """Computes the list of nodes and their attributes.
3670

3671
    """
3672
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3673
    volumes = self.rpc.call_node_volumes(nodenames)
3674

    
3675
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3676
             in self.cfg.GetInstanceList()]
3677

    
3678
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3679

    
3680
    output = []
3681
    for node in nodenames:
3682
      nresult = volumes[node]
3683
      if nresult.offline:
3684
        continue
3685
      msg = nresult.fail_msg
3686
      if msg:
3687
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3688
        continue
3689

    
3690
      node_vols = nresult.payload[:]
3691
      node_vols.sort(key=lambda vol: vol['dev'])
3692

    
3693
      for vol in node_vols:
3694
        node_output = []
3695
        for field in self.op.output_fields:
3696
          if field == "node":
3697
            val = node
3698
          elif field == "phys":
3699
            val = vol['dev']
3700
          elif field == "vg":
3701
            val = vol['vg']
3702
          elif field == "name":
3703
            val = vol['name']
3704
          elif field == "size":
3705
            val = int(float(vol['size']))
3706
          elif field == "instance":
3707
            for inst in ilist:
3708
              if node not in lv_by_node[inst]:
3709
                continue
3710
              if vol['name'] in lv_by_node[inst][node]:
3711
                val = inst.name
3712
                break
3713
            else:
3714
              val = '-'
3715
          else:
3716
            raise errors.ParameterError(field)
3717
          node_output.append(str(val))
3718

    
3719
        output.append(node_output)
3720

    
3721
    return output
3722

    
3723

    
3724
class LUNodeQueryStorage(NoHooksLU):
3725
  """Logical unit for getting information on storage units on node(s).
3726

3727
  """
3728
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3729
  REQ_BGL = False
3730

    
3731
  def CheckArguments(self):
3732
    _CheckOutputFields(static=self._FIELDS_STATIC,
3733
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3734
                       selected=self.op.output_fields)
3735

    
3736
  def ExpandNames(self):
3737
    self.needed_locks = {}
3738
    self.share_locks[locking.LEVEL_NODE] = 1
3739

    
3740
    if self.op.nodes:
3741
      self.needed_locks[locking.LEVEL_NODE] = \
3742
        _GetWantedNodes(self, self.op.nodes)
3743
    else:
3744
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3745

    
3746
  def Exec(self, feedback_fn):
3747
    """Computes the list of nodes and their attributes.
3748

3749
    """
3750
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3751

    
3752
    # Always get name to sort by
3753
    if constants.SF_NAME in self.op.output_fields:
3754
      fields = self.op.output_fields[:]
3755
    else:
3756
      fields = [constants.SF_NAME] + self.op.output_fields
3757

    
3758
    # Never ask for node or type as it's only known to the LU
3759
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3760
      while extra in fields:
3761
        fields.remove(extra)
3762

    
3763
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3764
    name_idx = field_idx[constants.SF_NAME]
3765

    
3766
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3767
    data = self.rpc.call_storage_list(self.nodes,
3768
                                      self.op.storage_type, st_args,
3769
                                      self.op.name, fields)
3770

    
3771
    result = []
3772

    
3773
    for node in utils.NiceSort(self.nodes):
3774
      nresult = data[node]
3775
      if nresult.offline:
3776
        continue
3777

    
3778
      msg = nresult.fail_msg
3779
      if msg:
3780
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3781
        continue
3782

    
3783
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3784

    
3785
      for name in utils.NiceSort(rows.keys()):
3786
        row = rows[name]
3787

    
3788
        out = []
3789

    
3790
        for field in self.op.output_fields:
3791
          if field == constants.SF_NODE:
3792
            val = node
3793
          elif field == constants.SF_TYPE:
3794
            val = self.op.storage_type
3795
          elif field in field_idx:
3796
            val = row[field_idx[field]]
3797
          else:
3798
            raise errors.ParameterError(field)
3799

    
3800
          out.append(val)
3801

    
3802
        result.append(out)
3803

    
3804
    return result
3805

    
3806

    
3807
class _InstanceQuery(_QueryBase):
3808
  FIELDS = query.INSTANCE_FIELDS
3809

    
3810
  def ExpandNames(self, lu):
3811
    lu.needed_locks = {}
3812
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
3813
    lu.share_locks[locking.LEVEL_NODE] = 1
3814

    
3815
    if self.names:
3816
      self.wanted = _GetWantedInstances(lu, self.names)
3817
    else:
3818
      self.wanted = locking.ALL_SET
3819

    
3820
    self.do_locking = (self.use_locking and
3821
                       query.IQ_LIVE in self.requested_data)
3822
    if self.do_locking:
3823
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3824
      lu.needed_locks[locking.LEVEL_NODE] = []
3825
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3826

    
3827
  def DeclareLocks(self, lu, level):
3828
    if level == locking.LEVEL_NODE and self.do_locking:
3829
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
3830

    
3831
  def _GetQueryData(self, lu):
3832
    """Computes the list of instances and their attributes.
3833

3834
    """
3835
    all_info = lu.cfg.GetAllInstancesInfo()
3836

    
3837
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3838

    
3839
    instance_list = [all_info[name] for name in instance_names]
3840
    nodes = frozenset([inst.primary_node for inst in instance_list])
3841
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3842
    bad_nodes = []
3843
    offline_nodes = []
3844

    
3845
    # Gather data as requested
3846
    if query.IQ_LIVE in self.requested_data:
3847
      live_data = {}
3848
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3849
      for name in nodes:
3850
        result = node_data[name]
3851
        if result.offline:
3852
          # offline nodes will be in both lists
3853
          assert result.fail_msg
3854
          offline_nodes.append(name)
3855
        if result.fail_msg:
3856
          bad_nodes.append(name)
3857
        elif result.payload:
3858
          live_data.update(result.payload)
3859
        # else no instance is alive
3860
    else:
3861
      live_data = {}
3862

    
3863
    if query.IQ_DISKUSAGE in self.requested_data:
3864
      disk_usage = dict((inst.name,
3865
                         _ComputeDiskSize(inst.disk_template,
3866
                                          [{"size": disk.size}
3867
                                           for disk in inst.disks]))
3868
                        for inst in instance_list)
3869
    else:
3870
      disk_usage = None
3871

    
3872
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3873
                                   disk_usage, offline_nodes, bad_nodes,
3874
                                   live_data)
3875

    
3876

    
3877
class LUQuery(NoHooksLU):
3878
  """Query for resources/items of a certain kind.
3879

3880
  """
3881
  # pylint: disable-msg=W0142
3882
  REQ_BGL = False
3883

    
3884
  def CheckArguments(self):
3885
    qcls = _GetQueryImplementation(self.op.what)
3886
    names = qlang.ReadSimpleFilter("name", self.op.filter)
3887

    
3888
    self.impl = qcls(names, self.op.fields, False)
3889

    
3890
  def ExpandNames(self):
3891
    self.impl.ExpandNames(self)
3892

    
3893
  def DeclareLocks(self, level):
3894
    self.impl.DeclareLocks(self, level)
3895

    
3896
  def Exec(self, feedback_fn):
3897
    return self.impl.NewStyleQuery(self)
3898

    
3899

    
3900
class LUQueryFields(NoHooksLU):
3901
  """Query for resources/items of a certain kind.
3902

3903
  """
3904
  # pylint: disable-msg=W0142
3905
  REQ_BGL = False
3906

    
3907
  def CheckArguments(self):
3908
    self.qcls = _GetQueryImplementation(self.op.what)
3909

    
3910
  def ExpandNames(self):
3911
    self.needed_locks = {}
3912

    
3913
  def Exec(self, feedback_fn):
3914
    return self.qcls.FieldsQuery(self.op.fields)
3915

    
3916

    
3917
class LUNodeModifyStorage(NoHooksLU):
3918
  """Logical unit for modifying a storage volume on a node.
3919

3920
  """
3921
  REQ_BGL = False
3922

    
3923
  def CheckArguments(self):
3924
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3925

    
3926
    storage_type = self.op.storage_type
3927

    
3928
    try:
3929
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3930
    except KeyError:
3931
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3932
                                 " modified" % storage_type,
3933
                                 errors.ECODE_INVAL)
3934

    
3935
    diff = set(self.op.changes.keys()) - modifiable
3936
    if diff:
3937
      raise errors.OpPrereqError("The following fields can not be modified for"
3938
                                 " storage units of type '%s': %r" %
3939
                                 (storage_type, list(diff)),
3940
                                 errors.ECODE_INVAL)
3941

    
3942
  def ExpandNames(self):
3943
    self.needed_locks = {
3944
      locking.LEVEL_NODE: self.op.node_name,
3945
      }
3946

    
3947
  def Exec(self, feedback_fn):
3948
    """Computes the list of nodes and their attributes.
3949

3950
    """
3951
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3952
    result = self.rpc.call_storage_modify(self.op.node_name,
3953
                                          self.op.storage_type, st_args,
3954
                                          self.op.name, self.op.changes)
3955
    result.Raise("Failed to modify storage unit '%s' on %s" %
3956
                 (self.op.name, self.op.node_name))
3957

    
3958

    
3959
class LUNodeAdd(LogicalUnit):
3960
  """Logical unit for adding node to the cluster.
3961

3962
  """
3963
  HPATH = "node-add"
3964
  HTYPE = constants.HTYPE_NODE
3965
  _NFLAGS = ["master_capable", "vm_capable"]
3966

    
3967
  def CheckArguments(self):
3968
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3969
    # validate/normalize the node name
3970
    self.hostname = netutils.GetHostname(name=self.op.node_name,
3971
                                         family=self.primary_ip_family)
3972
    self.op.node_name = self.hostname.name
3973
    if self.op.readd and self.op.group:
3974
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
3975
                                 " being readded", errors.ECODE_INVAL)
3976

    
3977
  def BuildHooksEnv(self):
3978
    """Build hooks env.
3979

3980
    This will run on all nodes before, and on all nodes + the new node after.
3981

3982
    """
3983
    env = {
3984
      "OP_TARGET": self.op.node_name,
3985
      "NODE_NAME": self.op.node_name,
3986
      "NODE_PIP": self.op.primary_ip,
3987
      "NODE_SIP": self.op.secondary_ip,
3988
      "MASTER_CAPABLE": str(self.op.master_capable),
3989
      "VM_CAPABLE": str(self.op.vm_capable),
3990
      }
3991
    nodes_0 = self.cfg.GetNodeList()
3992
    nodes_1 = nodes_0 + [self.op.node_name, ]
3993
    return env, nodes_0, nodes_1
3994

    
3995
  def CheckPrereq(self):
3996
    """Check prerequisites.
3997

3998
    This checks:
3999
     - the new node is not already in the config
4000
     - it is resolvable
4001
     - its parameters (single/dual homed) matches the cluster
4002

4003
    Any errors are signaled by raising errors.OpPrereqError.
4004

4005
    """
4006
    cfg = self.cfg
4007
    hostname = self.hostname
4008
    node = hostname.name
4009
    primary_ip = self.op.primary_ip = hostname.ip
4010
    if self.op.secondary_ip is None:
4011
      if self.primary_ip_family == netutils.IP6Address.family:
4012
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4013
                                   " IPv4 address must be given as secondary",
4014
                                   errors.ECODE_INVAL)
4015
      self.op.secondary_ip = primary_ip
4016

    
4017
    secondary_ip = self.op.secondary_ip
4018
    if not netutils.IP4Address.IsValid(secondary_ip):
4019
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4020
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4021

    
4022
    node_list = cfg.GetNodeList()
4023
    if not self.op.readd and node in node_list:
4024
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4025
                                 node, errors.ECODE_EXISTS)
4026
    elif self.op.readd and node not in node_list:
4027
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4028
                                 errors.ECODE_NOENT)
4029

    
4030
    self.changed_primary_ip = False
4031

    
4032
    for existing_node_name in node_list:
4033
      existing_node = cfg.GetNodeInfo(existing_node_name)
4034

    
4035
      if self.op.readd and node == existing_node_name:
4036
        if existing_node.secondary_ip != secondary_ip:
4037
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4038
                                     " address configuration as before",
4039
                                     errors.ECODE_INVAL)
4040
        if existing_node.primary_ip != primary_ip:
4041
          self.changed_primary_ip = True
4042

    
4043
        continue
4044

    
4045
      if (existing_node.primary_ip == primary_ip or
4046
          existing_node.secondary_ip == primary_ip or
4047
          existing_node.primary_ip == secondary_ip or
4048
          existing_node.secondary_ip == secondary_ip):
4049
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4050
                                   " existing node %s" % existing_node.name,
4051
                                   errors.ECODE_NOTUNIQUE)
4052

    
4053
    # After this 'if' block, None is no longer a valid value for the
4054
    # _capable op attributes
4055
    if self.op.readd:
4056
      old_node = self.cfg.GetNodeInfo(node)
4057
      assert old_node is not None, "Can't retrieve locked node %s" % node
4058
      for attr in self._NFLAGS:
4059
        if getattr(self.op, attr) is None:
4060
          setattr(self.op, attr, getattr(old_node, attr))
4061
    else:
4062
      for attr in self._NFLAGS:
4063
        if getattr(self.op, attr) is None:
4064
          setattr(self.op, attr, True)
4065

    
4066
    if self.op.readd and not self.op.vm_capable:
4067
      pri, sec = cfg.GetNodeInstances(node)
4068
      if pri or sec:
4069
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4070
                                   " flag set to false, but it already holds"
4071
                                   " instances" % node,
4072
                                   errors.ECODE_STATE)
4073

    
4074
    # check that the type of the node (single versus dual homed) is the
4075
    # same as for the master
4076
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4077
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4078
    newbie_singlehomed = secondary_ip == primary_ip
4079
    if master_singlehomed != newbie_singlehomed:
4080
      if master_singlehomed:
4081
        raise errors.OpPrereqError("The master has no secondary ip but the"
4082
                                   " new node has one",
4083
                                   errors.ECODE_INVAL)
4084
      else:
4085
        raise errors.OpPrereqError("The master has a secondary ip but the"
4086
                                   " new node doesn't have one",
4087
                                   errors.ECODE_INVAL)
4088

    
4089
    # checks reachability
4090
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4091
      raise errors.OpPrereqError("Node not reachable by ping",
4092
                                 errors.ECODE_ENVIRON)
4093

    
4094
    if not newbie_singlehomed:
4095
      # check reachability from my secondary ip to newbie's secondary ip
4096
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4097
                           source=myself.secondary_ip):
4098
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4099
                                   " based ping to node daemon port",
4100
                                   errors.ECODE_ENVIRON)
4101

    
4102
    if self.op.readd:
4103
      exceptions = [node]
4104
    else:
4105
      exceptions = []
4106

    
4107
    if self.op.master_capable:
4108
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4109
    else:
4110
      self.master_candidate = False
4111

    
4112
    if self.op.readd:
4113
      self.new_node = old_node
4114
    else:
4115
      node_group = cfg.LookupNodeGroup(self.op.group)
4116
      self.new_node = objects.Node(name=node,
4117
                                   primary_ip=primary_ip,
4118
                                   secondary_ip=secondary_ip,
4119
                                   master_candidate=self.master_candidate,
4120
                                   offline=False, drained=False,
4121
                                   group=node_group)
4122

    
4123
    if self.op.ndparams:
4124
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4125

    
4126
  def Exec(self, feedback_fn):
4127
    """Adds the new node to the cluster.
4128

4129
    """
4130
    new_node = self.new_node
4131
    node = new_node.name
4132

    
4133
    # We adding a new node so we assume it's powered
4134
    new_node.powered = True
4135

    
4136
    # for re-adds, reset the offline/drained/master-candidate flags;
4137
    # we need to reset here, otherwise offline would prevent RPC calls
4138
    # later in the procedure; this also means that if the re-add
4139
    # fails, we are left with a non-offlined, broken node
4140
    if self.op.readd:
4141
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4142
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4143
      # if we demote the node, we do cleanup later in the procedure
4144
      new_node.master_candidate = self.master_candidate
4145
      if self.changed_primary_ip:
4146
        new_node.primary_ip = self.op.primary_ip
4147

    
4148
    # copy the master/vm_capable flags
4149
    for attr in self._NFLAGS:
4150
      setattr(new_node, attr, getattr(self.op, attr))
4151

    
4152
    # notify the user about any possible mc promotion
4153
    if new_node.master_candidate:
4154
      self.LogInfo("Node will be a master candidate")
4155

    
4156
    if self.op.ndparams:
4157
      new_node.ndparams = self.op.ndparams
4158
    else:
4159
      new_node.ndparams = {}
4160

    
4161
    # check connectivity
4162
    result = self.rpc.call_version([node])[node]
4163
    result.Raise("Can't get version information from node %s" % node)
4164
    if constants.PROTOCOL_VERSION == result.payload:
4165
      logging.info("Communication to node %s fine, sw version %s match",
4166
                   node, result.payload)
4167
    else:
4168
      raise errors.OpExecError("Version mismatch master version %s,"
4169
                               " node version %s" %
4170
                               (constants.PROTOCOL_VERSION, result.payload))
4171

    
4172
    # Add node to our /etc/hosts, and add key to known_hosts
4173
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4174
      master_node = self.cfg.GetMasterNode()
4175
      result = self.rpc.call_etc_hosts_modify(master_node,
4176
                                              constants.ETC_HOSTS_ADD,
4177
                                              self.hostname.name,
4178
                                              self.hostname.ip)
4179
      result.Raise("Can't update hosts file with new host data")
4180

    
4181
    if new_node.secondary_ip != new_node.primary_ip:
4182
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4183
                               False)
4184

    
4185
    node_verify_list = [self.cfg.GetMasterNode()]
4186
    node_verify_param = {
4187
      constants.NV_NODELIST: [node],
4188
      # TODO: do a node-net-test as well?
4189
    }
4190

    
4191
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4192
                                       self.cfg.GetClusterName())
4193
    for verifier in node_verify_list:
4194
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4195
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4196
      if nl_payload:
4197
        for failed in nl_payload:
4198
          feedback_fn("ssh/hostname verification failed"
4199
                      " (checking from %s): %s" %
4200
                      (verifier, nl_payload[failed]))
4201
        raise errors.OpExecError("ssh/hostname verification failed.")
4202

    
4203
    if self.op.readd:
4204
      _RedistributeAncillaryFiles(self)
4205
      self.context.ReaddNode(new_node)
4206
      # make sure we redistribute the config
4207
      self.cfg.Update(new_node, feedback_fn)
4208
      # and make sure the new node will not have old files around
4209
      if not new_node.master_candidate:
4210
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4211
        msg = result.fail_msg
4212
        if msg:
4213
          self.LogWarning("Node failed to demote itself from master"
4214
                          " candidate status: %s" % msg)
4215
    else:
4216
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4217
                                  additional_vm=self.op.vm_capable)
4218
      self.context.AddNode(new_node, self.proc.GetECId())
4219

    
4220

    
4221
class LUNodeSetParams(LogicalUnit):
4222
  """Modifies the parameters of a node.
4223

4224
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4225
      to the node role (as _ROLE_*)
4226
  @cvar _R2F: a dictionary from node role to tuples of flags
4227
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4228

4229
  """
4230
  HPATH = "node-modify"
4231
  HTYPE = constants.HTYPE_NODE
4232
  REQ_BGL = False
4233
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4234
  _F2R = {
4235
    (True, False, False): _ROLE_CANDIDATE,
4236
    (False, True, False): _ROLE_DRAINED,
4237
    (False, False, True): _ROLE_OFFLINE,
4238
    (False, False, False): _ROLE_REGULAR,
4239
    }
4240
  _R2F = dict((v, k) for k, v in _F2R.items())
4241
  _FLAGS = ["master_candidate", "drained", "offline"]
4242

    
4243
  def CheckArguments(self):
4244
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4245
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4246
                self.op.master_capable, self.op.vm_capable,
4247
                self.op.secondary_ip, self.op.ndparams]
4248
    if all_mods.count(None) == len(all_mods):
4249
      raise errors.OpPrereqError("Please pass at least one modification",
4250
                                 errors.ECODE_INVAL)
4251
    if all_mods.count(True) > 1:
4252
      raise errors.OpPrereqError("Can't set the node into more than one"
4253
                                 " state at the same time",
4254
                                 errors.ECODE_INVAL)
4255

    
4256
    # Boolean value that tells us whether we might be demoting from MC
4257
    self.might_demote = (self.op.master_candidate == False or
4258
                         self.op.offline == True or
4259
                         self.op.drained == True or
4260
                         self.op.master_capable == False)
4261

    
4262
    if self.op.secondary_ip:
4263
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4264
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4265
                                   " address" % self.op.secondary_ip,
4266
                                   errors.ECODE_INVAL)
4267

    
4268
    self.lock_all = self.op.auto_promote and self.might_demote
4269
    self.lock_instances = self.op.secondary_ip is not None
4270

    
4271
  def ExpandNames(self):
4272
    if self.lock_all:
4273
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4274
    else:
4275
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4276

    
4277
    if self.lock_instances:
4278
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4279

    
4280
  def DeclareLocks(self, level):
4281
    # If we have locked all instances, before waiting to lock nodes, release
4282
    # all the ones living on nodes unrelated to the current operation.
4283
    if level == locking.LEVEL_NODE and self.lock_instances:
4284
      instances_release = []
4285
      instances_keep = []
4286
      self.affected_instances = []
4287
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4288
        for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4289
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4290
          i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4291
          if i_mirrored and self.op.node_name in instance.all_nodes:
4292
            instances_keep.append(instance_name)
4293
            self.affected_instances.append(instance)
4294
          else:
4295
            instances_release.append(instance_name)
4296
        if instances_release:
4297
          self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4298
          self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4299

    
4300
  def BuildHooksEnv(self):
4301
    """Build hooks env.
4302

4303
    This runs on the master node.
4304

4305
    """
4306
    env = {
4307
      "OP_TARGET": self.op.node_name,
4308
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4309
      "OFFLINE": str(self.op.offline),
4310
      "DRAINED": str(self.op.drained),
4311
      "MASTER_CAPABLE": str(self.op.master_capable),
4312
      "VM_CAPABLE": str(self.op.vm_capable),
4313
      }
4314
    nl = [self.cfg.GetMasterNode(),
4315
          self.op.node_name]
4316
    return env, nl, nl
4317

    
4318
  def CheckPrereq(self):
4319
    """Check prerequisites.
4320

4321
    This only checks the instance list against the existing names.
4322

4323
    """
4324
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4325

    
4326
    if (self.op.master_candidate is not None or
4327
        self.op.drained is not None or
4328
        self.op.offline is not None):
4329
      # we can't change the master's node flags
4330
      if self.op.node_name == self.cfg.GetMasterNode():
4331
        raise errors.OpPrereqError("The master role can be changed"
4332
                                   " only via master-failover",
4333
                                   errors.ECODE_INVAL)
4334

    
4335
    if self.op.master_candidate and not node.master_capable:
4336
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4337
                                 " it a master candidate" % node.name,
4338
                                 errors.ECODE_STATE)
4339

    
4340
    if self.op.vm_capable == False:
4341
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4342
      if ipri or isec:
4343
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4344
                                   " the vm_capable flag" % node.name,
4345
                                   errors.ECODE_STATE)
4346

    
4347
    if node.master_candidate and self.might_demote and not self.lock_all:
4348
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
4349
      # check if after removing the current node, we're missing master
4350
      # candidates
4351
      (mc_remaining, mc_should, _) = \
4352
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4353
      if mc_remaining < mc_should:
4354
        raise errors.OpPrereqError("Not enough master candidates, please"
4355
                                   " pass auto_promote to allow promotion",
4356
                                   errors.ECODE_STATE)
4357

    
4358
    self.old_flags = old_flags = (node.master_candidate,
4359
                                  node.drained, node.offline)
4360
    assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4361
    self.old_role = old_role = self._F2R[old_flags]
4362

    
4363
    # Check for ineffective changes
4364
    for attr in self._FLAGS:
4365
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4366
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4367
        setattr(self.op, attr, None)
4368

    
4369
    # Past this point, any flag change to False means a transition
4370
    # away from the respective state, as only real changes are kept
4371

    
4372
    # TODO: We might query the real power state if it supports OOB
4373
    if _SupportsOob(self.cfg, node):
4374
      if self.op.offline is False and not (node.powered or
4375
                                           self.op.powered == True):
4376
        raise errors.OpPrereqError(("Please power on node %s first before you"
4377
                                    " can reset offline state") %
4378
                                   self.op.node_name)
4379
    elif self.op.powered is not None:
4380
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
4381
                                  " which does not support out-of-band"
4382
                                  " handling") % self.op.node_name)
4383

    
4384
    # If we're being deofflined/drained, we'll MC ourself if needed
4385
    if (self.op.drained == False or self.op.offline == False or
4386
        (self.op.master_capable and not node.master_capable)):
4387
      if _DecideSelfPromotion(self):
4388
        self.op.master_candidate = True
4389
        self.LogInfo("Auto-promoting node to master candidate")
4390

    
4391
    # If we're no longer master capable, we'll demote ourselves from MC
4392
    if self.op.master_capable == False and node.master_candidate:
4393
      self.LogInfo("Demoting from master candidate")
4394
      self.op.master_candidate = False
4395

    
4396
    # Compute new role
4397
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4398
    if self.op.master_candidate:
4399
      new_role = self._ROLE_CANDIDATE
4400
    elif self.op.drained:
4401
      new_role = self._ROLE_DRAINED
4402
    elif self.op.offline:
4403
      new_role = self._ROLE_OFFLINE
4404
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4405
      # False is still in new flags, which means we're un-setting (the
4406
      # only) True flag
4407
      new_role = self._ROLE_REGULAR
4408
    else: # no new flags, nothing, keep old role
4409
      new_role = old_role
4410

    
4411
    self.new_role = new_role
4412

    
4413
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4414
      # Trying to transition out of offline status
4415
      result = self.rpc.call_version([node.name])[node.name]
4416
      if result.fail_msg:
4417
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4418
                                   " to report its version: %s" %
4419
                                   (node.name, result.fail_msg),
4420
                                   errors.ECODE_STATE)
4421
      else:
4422
        self.LogWarning("Transitioning node from offline to online state"
4423
                        " without using re-add. Please make sure the node"
4424
                        " is healthy!")
4425

    
4426
    if self.op.secondary_ip:
4427
      # Ok even without locking, because this can't be changed by any LU
4428
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4429
      master_singlehomed = master.secondary_ip == master.primary_ip
4430
      if master_singlehomed and self.op.secondary_ip:
4431
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4432
                                   " homed cluster", errors.ECODE_INVAL)
4433

    
4434
      if node.offline:
4435
        if self.affected_instances:
4436
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4437
                                     " node has instances (%s) configured"
4438
                                     " to use it" % self.affected_instances)
4439
      else:
4440
        # On online nodes, check that no instances are running, and that
4441
        # the node has the new ip and we can reach it.
4442
        for instance in self.affected_instances:
4443
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4444

    
4445
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4446
        if master.name != node.name:
4447
          # check reachability from master secondary ip to new secondary ip
4448
          if not netutils.TcpPing(self.op.secondary_ip,
4449
                                  constants.DEFAULT_NODED_PORT,
4450
                                  source=master.secondary_ip):
4451
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4452
                                       " based ping to node daemon port",
4453
                                       errors.ECODE_ENVIRON)
4454

    
4455
    if self.op.ndparams:
4456
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4457
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4458
      self.new_ndparams = new_ndparams
4459

    
4460
  def Exec(self, feedback_fn):
4461
    """Modifies a node.
4462

4463
    """
4464
    node = self.node
4465
    old_role = self.old_role
4466
    new_role = self.new_role
4467

    
4468
    result = []
4469

    
4470
    if self.op.ndparams:
4471
      node.ndparams = self.new_ndparams
4472

    
4473
    if self.op.powered is not None:
4474
      node.powered = self.op.powered
4475

    
4476
    for attr in ["master_capable", "vm_capable"]:
4477
      val = getattr(self.op, attr)
4478
      if val is not None:
4479
        setattr(node, attr, val)
4480
        result.append((attr, str(val)))
4481

    
4482
    if new_role != old_role:
4483
      # Tell the node to demote itself, if no longer MC and not offline
4484
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4485
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4486
        if msg:
4487
          self.LogWarning("Node failed to demote itself: %s", msg)
4488

    
4489
      new_flags = self._R2F[new_role]
4490
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4491
        if of != nf:
4492
          result.append((desc, str(nf)))
4493
      (node.master_candidate, node.drained, node.offline) = new_flags
4494

    
4495
      # we locked all nodes, we adjust the CP before updating this node
4496
      if self.lock_all:
4497
        _AdjustCandidatePool(self, [node.name])
4498

    
4499
    if self.op.secondary_ip:
4500
      node.secondary_ip = self.op.secondary_ip
4501
      result.append(("secondary_ip", self.op.secondary_ip))
4502

    
4503
    # this will trigger configuration file update, if needed
4504
    self.cfg.Update(node, feedback_fn)
4505

    
4506
    # this will trigger job queue propagation or cleanup if the mc
4507
    # flag changed
4508
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4509
      self.context.ReaddNode(node)
4510

    
4511
    return result
4512

    
4513

    
4514
class LUNodePowercycle(NoHooksLU):
4515
  """Powercycles a node.
4516

4517
  """
4518
  REQ_BGL = False
4519

    
4520
  def CheckArguments(self):
4521
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4522
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4523
      raise errors.OpPrereqError("The node is the master and the force"
4524
                                 " parameter was not set",
4525
                                 errors.ECODE_INVAL)
4526

    
4527
  def ExpandNames(self):
4528
    """Locking for PowercycleNode.
4529

4530
    This is a last-resort option and shouldn't block on other
4531
    jobs. Therefore, we grab no locks.
4532

4533
    """
4534
    self.needed_locks = {}
4535

    
4536
  def Exec(self, feedback_fn):
4537
    """Reboots a node.
4538

4539
    """
4540
    result = self.rpc.call_node_powercycle(self.op.node_name,
4541
                                           self.cfg.GetHypervisorType())
4542
    result.Raise("Failed to schedule the reboot")
4543
    return result.payload
4544

    
4545

    
4546
class LUClusterQuery(NoHooksLU):
4547
  """Query cluster configuration.
4548

4549
  """
4550
  REQ_BGL = False
4551

    
4552
  def ExpandNames(self):
4553
    self.needed_locks = {}
4554

    
4555
  def Exec(self, feedback_fn):
4556
    """Return cluster config.
4557

4558
    """
4559
    cluster = self.cfg.GetClusterInfo()
4560
    os_hvp = {}
4561

    
4562
    # Filter just for enabled hypervisors
4563
    for os_name, hv_dict in cluster.os_hvp.items():
4564
      os_hvp[os_name] = {}
4565
      for hv_name, hv_params in hv_dict.items():
4566
        if hv_name in cluster.enabled_hypervisors:
4567
          os_hvp[os_name][hv_name] = hv_params
4568

    
4569
    # Convert ip_family to ip_version
4570
    primary_ip_version = constants.IP4_VERSION
4571
    if cluster.primary_ip_family == netutils.IP6Address.family:
4572
      primary_ip_version = constants.IP6_VERSION
4573

    
4574
    result = {
4575
      "software_version": constants.RELEASE_VERSION,
4576
      "protocol_version": constants.PROTOCOL_VERSION,
4577
      "config_version": constants.CONFIG_VERSION,
4578
      "os_api_version": max(constants.OS_API_VERSIONS),
4579
      "export_version": constants.EXPORT_VERSION,
4580
      "architecture": (platform.architecture()[0], platform.machine()),
4581
      "name": cluster.cluster_name,
4582
      "master": cluster.master_node,
4583
      "default_hypervisor": cluster.enabled_hypervisors[0],
4584
      "enabled_hypervisors": cluster.enabled_hypervisors,
4585
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4586
                        for hypervisor_name in cluster.enabled_hypervisors]),
4587
      "os_hvp": os_hvp,
4588
      "beparams": cluster.beparams,
4589
      "osparams": cluster.osparams,
4590
      "nicparams": cluster.nicparams,
4591
      "ndparams": cluster.ndparams,
4592
      "candidate_pool_size": cluster.candidate_pool_size,
4593
      "master_netdev": cluster.master_netdev,
4594
      "volume_group_name": cluster.volume_group_name,
4595
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4596
      "file_storage_dir": cluster.file_storage_dir,
4597
      "maintain_node_health": cluster.maintain_node_health,
4598
      "ctime": cluster.ctime,
4599
      "mtime": cluster.mtime,
4600
      "uuid": cluster.uuid,
4601
      "tags": list(cluster.GetTags()),
4602
      "uid_pool": cluster.uid_pool,
4603
      "default_iallocator": cluster.default_iallocator,
4604
      "reserved_lvs": cluster.reserved_lvs,
4605
      "primary_ip_version": primary_ip_version,
4606
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4607
      "hidden_os": cluster.hidden_os,
4608
      "blacklisted_os": cluster.blacklisted_os,
4609
      }
4610

    
4611
    return result
4612

    
4613

    
4614
class LUClusterConfigQuery(NoHooksLU):
4615
  """Return configuration values.
4616

4617
  """
4618
  REQ_BGL = False
4619
  _FIELDS_DYNAMIC = utils.FieldSet()
4620
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4621
                                  "watcher_pause", "volume_group_name")
4622

    
4623
  def CheckArguments(self):
4624
    _CheckOutputFields(static=self._FIELDS_STATIC,
4625
                       dynamic=self._FIELDS_DYNAMIC,
4626
                       selected=self.op.output_fields)
4627

    
4628
  def ExpandNames(self):
4629
    self.needed_locks = {}
4630

    
4631
  def Exec(self, feedback_fn):
4632
    """Dump a representation of the cluster config to the standard output.
4633

4634
    """
4635
    values = []
4636
    for field in self.op.output_fields:
4637
      if field == "cluster_name":
4638
        entry = self.cfg.GetClusterName()
4639
      elif field == "master_node":
4640
        entry = self.cfg.GetMasterNode()
4641
      elif field == "drain_flag":
4642
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4643
      elif field == "watcher_pause":
4644
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4645
      elif field == "volume_group_name":
4646
        entry = self.cfg.GetVGName()
4647
      else:
4648
        raise errors.ParameterError(field)
4649
      values.append(entry)
4650
    return values
4651

    
4652

    
4653
class LUInstanceActivateDisks(NoHooksLU):
4654
  """Bring up an instance's disks.
4655

4656
  """
4657
  REQ_BGL = False
4658

    
4659
  def ExpandNames(self):
4660
    self._ExpandAndLockInstance()
4661
    self.needed_locks[locking.LEVEL_NODE] = []
4662
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4663

    
4664
  def DeclareLocks(self, level):
4665
    if level == locking.LEVEL_NODE:
4666
      self._LockInstancesNodes()
4667

    
4668
  def CheckPrereq(self):
4669
    """Check prerequisites.
4670

4671
    This checks that the instance is in the cluster.
4672

4673
    """
4674
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4675
    assert self.instance is not None, \
4676
      "Cannot retrieve locked instance %s" % self.op.instance_name
4677
    _CheckNodeOnline(self, self.instance.primary_node)
4678

    
4679
  def Exec(self, feedback_fn):
4680
    """Activate the disks.
4681

4682
    """
4683
    disks_ok, disks_info = \
4684
              _AssembleInstanceDisks(self, self.instance,
4685
                                     ignore_size=self.op.ignore_size)
4686
    if not disks_ok:
4687
      raise errors.OpExecError("Cannot activate block devices")
4688

    
4689
    return disks_info
4690

    
4691

    
4692
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4693
                           ignore_size=False):
4694
  """Prepare the block devices for an instance.
4695

4696
  This sets up the block devices on all nodes.
4697

4698
  @type lu: L{LogicalUnit}
4699
  @param lu: the logical unit on whose behalf we execute
4700
  @type instance: L{objects.Instance}
4701
  @param instance: the instance for whose disks we assemble
4702
  @type disks: list of L{objects.Disk} or None
4703
  @param disks: which disks to assemble (or all, if None)
4704
  @type ignore_secondaries: boolean
4705
  @param ignore_secondaries: if true, errors on secondary nodes
4706
      won't result in an error return from the function
4707
  @type ignore_size: boolean
4708
  @param ignore_size: if true, the current known size of the disk
4709
      will not be used during the disk activation, useful for cases
4710
      when the size is wrong
4711
  @return: False if the operation failed, otherwise a list of
4712
      (host, instance_visible_name, node_visible_name)
4713
      with the mapping from node devices to instance devices
4714

4715
  """
4716
  device_info = []
4717
  disks_ok = True
4718
  iname = instance.name
4719
  disks = _ExpandCheckDisks(instance, disks)
4720

    
4721
  # With the two passes mechanism we try to reduce the window of
4722
  # opportunity for the race condition of switching DRBD to primary
4723
  # before handshaking occured, but we do not eliminate it
4724

    
4725
  # The proper fix would be to wait (with some limits) until the
4726
  # connection has been made and drbd transitions from WFConnection
4727
  # into any other network-connected state (Connected, SyncTarget,
4728
  # SyncSource, etc.)
4729

    
4730
  # 1st pass, assemble on all nodes in secondary mode
4731
  for inst_disk in disks:
4732
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4733
      if ignore_size:
4734
        node_disk = node_disk.Copy()
4735
        node_disk.UnsetSize()
4736
      lu.cfg.SetDiskID(node_disk, node)
4737
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4738
      msg = result.fail_msg
4739
      if msg:
4740
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4741
                           " (is_primary=False, pass=1): %s",
4742
                           inst_disk.iv_name, node, msg)
4743
        if not ignore_secondaries:
4744
          disks_ok = False
4745

    
4746
  # FIXME: race condition on drbd migration to primary
4747

    
4748
  # 2nd pass, do only the primary node
4749
  for inst_disk in disks:
4750
    dev_path = None
4751

    
4752
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4753
      if node != instance.primary_node:
4754
        continue
4755
      if ignore_size:
4756
        node_disk = node_disk.Copy()
4757
        node_disk.UnsetSize()
4758
      lu.cfg.SetDiskID(node_disk, node)
4759
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4760
      msg = result.fail_msg
4761
      if msg:
4762
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4763
                           " (is_primary=True, pass=2): %s",
4764
                           inst_disk.iv_name, node, msg)
4765
        disks_ok = False
4766
      else:
4767
        dev_path = result.payload
4768

    
4769
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4770

    
4771
  # leave the disks configured for the primary node
4772
  # this is a workaround that would be fixed better by
4773
  # improving the logical/physical id handling
4774
  for disk in disks:
4775
    lu.cfg.SetDiskID(disk, instance.primary_node)
4776

    
4777
  return disks_ok, device_info
4778

    
4779

    
4780
def _StartInstanceDisks(lu, instance, force):
4781
  """Start the disks of an instance.
4782

4783
  """
4784
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4785
                                           ignore_secondaries=force)
4786
  if not disks_ok:
4787
    _ShutdownInstanceDisks(lu, instance)
4788
    if force is not None and not force:
4789
      lu.proc.LogWarning("", hint="If the message above refers to a"
4790
                         " secondary node,"
4791
                         " you can retry the operation using '--force'.")
4792
    raise errors.OpExecError("Disk consistency error")
4793

    
4794

    
4795
class LUInstanceDeactivateDisks(NoHooksLU):
4796
  """Shutdown an instance's disks.
4797

4798
  """
4799
  REQ_BGL = False
4800

    
4801
  def ExpandNames(self):
4802
    self._ExpandAndLockInstance()
4803
    self.needed_locks[locking.LEVEL_NODE] = []
4804
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4805

    
4806
  def DeclareLocks(self, level):
4807
    if level == locking.LEVEL_NODE:
4808
      self._LockInstancesNodes()
4809

    
4810
  def CheckPrereq(self):
4811
    """Check prerequisites.
4812

4813
    This checks that the instance is in the cluster.
4814

4815
    """
4816
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4817
    assert self.instance is not None, \
4818
      "Cannot retrieve locked instance %s" % self.op.instance_name
4819

    
4820
  def Exec(self, feedback_fn):
4821
    """Deactivate the disks
4822

4823
    """
4824
    instance = self.instance
4825
    if self.op.force:
4826
      _ShutdownInstanceDisks(self, instance)
4827
    else:
4828
      _SafeShutdownInstanceDisks(self, instance)
4829

    
4830

    
4831
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4832
  """Shutdown block devices of an instance.
4833

4834
  This function checks if an instance is running, before calling
4835
  _ShutdownInstanceDisks.
4836

4837
  """
4838
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4839
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4840

    
4841

    
4842
def _ExpandCheckDisks(instance, disks):
4843
  """Return the instance disks selected by the disks list
4844

4845
  @type disks: list of L{objects.Disk} or None
4846
  @param disks: selected disks
4847
  @rtype: list of L{objects.Disk}
4848
  @return: selected instance disks to act on
4849

4850
  """
4851
  if disks is None:
4852
    return instance.disks
4853
  else:
4854
    if not set(disks).issubset(instance.disks):
4855
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4856
                                   " target instance")
4857
    return disks
4858

    
4859

    
4860
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4861
  """Shutdown block devices of an instance.
4862

4863
  This does the shutdown on all nodes of the instance.
4864

4865
  If the ignore_primary is false, errors on the primary node are
4866
  ignored.
4867

4868
  """
4869
  all_result = True
4870
  disks = _ExpandCheckDisks(instance, disks)
4871

    
4872
  for disk in disks:
4873
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4874
      lu.cfg.SetDiskID(top_disk, node)
4875
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4876
      msg = result.fail_msg
4877
      if msg:
4878
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4879
                      disk.iv_name, node, msg)
4880
        if ((node == instance.primary_node and not ignore_primary) or
4881
            (node != instance.primary_node and not result.offline)):
4882
          all_result = False
4883
  return all_result
4884

    
4885

    
4886
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4887
  """Checks if a node has enough free memory.
4888

4889
  This function check if a given node has the needed amount of free
4890
  memory. In case the node has less memory or we cannot get the
4891
  information from the node, this function raise an OpPrereqError
4892
  exception.
4893

4894
  @type lu: C{LogicalUnit}
4895
  @param lu: a logical unit from which we get configuration data
4896
  @type node: C{str}
4897
  @param node: the node to check
4898
  @type reason: C{str}
4899
  @param reason: string to use in the error message
4900
  @type requested: C{int}
4901
  @param requested: the amount of memory in MiB to check for
4902
  @type hypervisor_name: C{str}
4903
  @param hypervisor_name: the hypervisor to ask for memory stats
4904
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4905
      we cannot check the node
4906

4907
  """
4908
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
4909
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4910
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4911
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4912
  if not isinstance(free_mem, int):
4913
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4914
                               " was '%s'" % (node, free_mem),
4915
                               errors.ECODE_ENVIRON)
4916
  if requested > free_mem:
4917
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4918
                               " needed %s MiB, available %s MiB" %
4919
                               (node, reason, requested, free_mem),
4920
                               errors.ECODE_NORES)
4921

    
4922

    
4923
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
4924
  """Checks if nodes have enough free disk space in the all VGs.
4925

4926
  This function check if all given nodes have the needed amount of
4927
  free disk. In case any node has less disk or we cannot get the
4928
  information from the node, this function raise an OpPrereqError
4929
  exception.
4930

4931
  @type lu: C{LogicalUnit}
4932
  @param lu: a logical unit from which we get configuration data
4933
  @type nodenames: C{list}
4934
  @param nodenames: the list of node names to check
4935
  @type req_sizes: C{dict}
4936
  @param req_sizes: the hash of vg and corresponding amount of disk in
4937
      MiB to check for
4938
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
4939
      or we cannot check the node
4940

4941
  """
4942
  for vg, req_size in req_sizes.items():
4943
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
4944

    
4945

    
4946
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
4947
  """Checks if nodes have enough free disk space in the specified VG.
4948

4949
  This function check if all given nodes have the needed amount of
4950
  free disk. In case any node has less disk or we cannot get the
4951
  information from the node, this function raise an OpPrereqError
4952
  exception.
4953

4954
  @type lu: C{LogicalUnit}
4955
  @param lu: a logical unit from which we get configuration data
4956
  @type nodenames: C{list}
4957
  @param nodenames: the list of node names to check
4958
  @type vg: C{str}
4959
  @param vg: the volume group to check
4960
  @type requested: C{int}
4961
  @param requested: the amount of disk in MiB to check for
4962
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
4963
      or we cannot check the node
4964

4965
  """
4966
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
4967
  for node in nodenames:
4968
    info = nodeinfo[node]
4969
    info.Raise("Cannot get current information from node %s" % node,
4970
               prereq=True, ecode=errors.ECODE_ENVIRON)
4971
    vg_free = info.payload.get("vg_free", None)
4972
    if not isinstance(vg_free, int):
4973
      raise errors.OpPrereqError("Can't compute free disk space on node"
4974
                                 " %s for vg %s, result was '%s'" %
4975
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
4976
    if requested > vg_free:
4977
      raise errors.OpPrereqError("Not enough disk space on target node %s"
4978
                                 " vg %s: required %d MiB, available %d MiB" %
4979
                                 (node, vg, requested, vg_free),
4980
                                 errors.ECODE_NORES)
4981

    
4982

    
4983
class LUInstanceStartup(LogicalUnit):
4984
  """Starts an instance.
4985

4986
  """
4987
  HPATH = "instance-start"
4988
  HTYPE = constants.HTYPE_INSTANCE
4989
  REQ_BGL = False
4990

    
4991
  def CheckArguments(self):
4992
    # extra beparams
4993
    if self.op.beparams:
4994
      # fill the beparams dict
4995
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4996

    
4997
  def ExpandNames(self):
4998
    self._ExpandAndLockInstance()
4999

    
5000
  def BuildHooksEnv(self):
5001
    """Build hooks env.
5002

5003
    This runs on master, primary and secondary nodes of the instance.
5004

5005
    """
5006
    env = {
5007
      "FORCE": self.op.force,
5008
      }
5009
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5010
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5011
    return env, nl, nl
5012

    
5013
  def CheckPrereq(self):
5014
    """Check prerequisites.
5015

5016
    This checks that the instance is in the cluster.
5017

5018
    """
5019
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5020
    assert self.instance is not None, \
5021
      "Cannot retrieve locked instance %s" % self.op.instance_name
5022

    
5023
    # extra hvparams
5024
    if self.op.hvparams:
5025
      # check hypervisor parameter syntax (locally)
5026
      cluster = self.cfg.GetClusterInfo()
5027
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5028
      filled_hvp = cluster.FillHV(instance)
5029
      filled_hvp.update(self.op.hvparams)
5030
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5031
      hv_type.CheckParameterSyntax(filled_hvp)
5032
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5033

    
5034
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5035

    
5036
    if self.primary_offline and self.op.ignore_offline_nodes:
5037
      self.proc.LogWarning("Ignoring offline primary node")
5038

    
5039
      if self.op.hvparams or self.op.beparams:
5040
        self.proc.LogWarning("Overridden parameters are ignored")
5041
    else:
5042
      _CheckNodeOnline(self, instance.primary_node)
5043

    
5044
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5045

    
5046
      # check bridges existence
5047
      _CheckInstanceBridgesExist(self, instance)
5048

    
5049
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5050
                                                instance.name,
5051
                                                instance.hypervisor)
5052
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5053
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5054
      if not remote_info.payload: # not running already
5055
        _CheckNodeFreeMemory(self, instance.primary_node,
5056
                             "starting instance %s" % instance.name,
5057
                             bep[constants.BE_MEMORY], instance.hypervisor)
5058

    
5059
  def Exec(self, feedback_fn):
5060
    """Start the instance.
5061

5062
    """
5063
    instance = self.instance
5064
    force = self.op.force
5065

    
5066
    self.cfg.MarkInstanceUp(instance.name)
5067

    
5068
    if self.primary_offline:
5069
      assert self.op.ignore_offline_nodes
5070
      self.proc.LogInfo("Primary node offline, marked instance as started")
5071
    else:
5072
      node_current = instance.primary_node
5073

    
5074
      _StartInstanceDisks(self, instance, force)
5075

    
5076
      result = self.rpc.call_instance_start(node_current, instance,
5077
                                            self.op.hvparams, self.op.beparams)
5078
      msg = result.fail_msg
5079
      if msg:
5080
        _ShutdownInstanceDisks(self, instance)
5081
        raise errors.OpExecError("Could not start instance: %s" % msg)
5082

    
5083

    
5084
class LUInstanceReboot(LogicalUnit):
5085
  """Reboot an instance.
5086

5087
  """
5088
  HPATH = "instance-reboot"
5089
  HTYPE = constants.HTYPE_INSTANCE
5090
  REQ_BGL = False
5091

    
5092
  def ExpandNames(self):
5093
    self._ExpandAndLockInstance()
5094

    
5095
  def BuildHooksEnv(self):
5096
    """Build hooks env.
5097

5098
    This runs on master, primary and secondary nodes of the instance.
5099

5100
    """
5101
    env = {
5102
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5103
      "REBOOT_TYPE": self.op.reboot_type,
5104
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5105
      }
5106
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5107
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5108
    return env, nl, nl
5109

    
5110
  def CheckPrereq(self):
5111
    """Check prerequisites.
5112

5113
    This checks that the instance is in the cluster.
5114

5115
    """
5116
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5117
    assert self.instance is not None, \
5118
      "Cannot retrieve locked instance %s" % self.op.instance_name
5119

    
5120
    _CheckNodeOnline(self, instance.primary_node)
5121

    
5122
    # check bridges existence
5123
    _CheckInstanceBridgesExist(self, instance)
5124

    
5125
  def Exec(self, feedback_fn):
5126
    """Reboot the instance.
5127

5128
    """
5129
    instance = self.instance
5130
    ignore_secondaries = self.op.ignore_secondaries
5131
    reboot_type = self.op.reboot_type
5132

    
5133
    node_current = instance.primary_node
5134

    
5135
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5136
                       constants.INSTANCE_REBOOT_HARD]:
5137
      for disk in instance.disks:
5138
        self.cfg.SetDiskID(disk, node_current)
5139
      result = self.rpc.call_instance_reboot(node_current, instance,
5140
                                             reboot_type,
5141
                                             self.op.shutdown_timeout)
5142
      result.Raise("Could not reboot instance")
5143
    else:
5144
      result = self.rpc.call_instance_shutdown(node_current, instance,
5145
                                               self.op.shutdown_timeout)
5146
      result.Raise("Could not shutdown instance for full reboot")
5147
      _ShutdownInstanceDisks(self, instance)
5148
      _StartInstanceDisks(self, instance, ignore_secondaries)
5149
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5150
      msg = result.fail_msg
5151
      if msg:
5152
        _ShutdownInstanceDisks(self, instance)
5153
        raise errors.OpExecError("Could not start instance for"
5154
                                 " full reboot: %s" % msg)
5155

    
5156
    self.cfg.MarkInstanceUp(instance.name)
5157

    
5158

    
5159
class LUInstanceShutdown(LogicalUnit):
5160
  """Shutdown an instance.
5161

5162
  """
5163
  HPATH = "instance-stop"
5164
  HTYPE = constants.HTYPE_INSTANCE
5165
  REQ_BGL = False
5166

    
5167
  def ExpandNames(self):
5168
    self._ExpandAndLockInstance()
5169

    
5170
  def BuildHooksEnv(self):
5171
    """Build hooks env.
5172

5173
    This runs on master, primary and secondary nodes of the instance.
5174

5175
    """
5176
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5177
    env["TIMEOUT"] = self.op.timeout
5178
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5179
    return env, nl, nl
5180

    
5181
  def CheckPrereq(self):
5182
    """Check prerequisites.
5183

5184
    This checks that the instance is in the cluster.
5185

5186
    """
5187
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5188
    assert self.instance is not None, \
5189
      "Cannot retrieve locked instance %s" % self.op.instance_name
5190

    
5191
    self.primary_offline = \
5192
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5193

    
5194
    if self.primary_offline and self.op.ignore_offline_nodes:
5195
      self.proc.LogWarning("Ignoring offline primary node")
5196
    else:
5197
      _CheckNodeOnline(self, self.instance.primary_node)
5198

    
5199
  def Exec(self, feedback_fn):
5200
    """Shutdown the instance.
5201

5202
    """
5203
    instance = self.instance
5204
    node_current = instance.primary_node
5205
    timeout = self.op.timeout
5206

    
5207
    self.cfg.MarkInstanceDown(instance.name)
5208

    
5209
    if self.primary_offline:
5210
      assert self.op.ignore_offline_nodes
5211
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5212
    else:
5213
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5214
      msg = result.fail_msg
5215
      if msg:
5216
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5217

    
5218
      _ShutdownInstanceDisks(self, instance)
5219

    
5220

    
5221
class LUInstanceReinstall(LogicalUnit):
5222
  """Reinstall an instance.
5223

5224
  """
5225
  HPATH = "instance-reinstall"
5226
  HTYPE = constants.HTYPE_INSTANCE
5227
  REQ_BGL = False
5228

    
5229
  def ExpandNames(self):
5230
    self._ExpandAndLockInstance()
5231

    
5232
  def BuildHooksEnv(self):
5233
    """Build hooks env.
5234

5235
    This runs on master, primary and secondary nodes of the instance.
5236

5237
    """
5238
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5239
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5240
    return env, nl, nl
5241

    
5242
  def CheckPrereq(self):
5243
    """Check prerequisites.
5244

5245
    This checks that the instance is in the cluster and is not running.
5246

5247
    """
5248
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5249
    assert instance is not None, \
5250
      "Cannot retrieve locked instance %s" % self.op.instance_name
5251
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5252
                     " offline, cannot reinstall")
5253
    for node in instance.secondary_nodes:
5254
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5255
                       " cannot reinstall")
5256

    
5257
    if instance.disk_template == constants.DT_DISKLESS:
5258
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5259
                                 self.op.instance_name,
5260
                                 errors.ECODE_INVAL)
5261
    _CheckInstanceDown(self, instance, "cannot reinstall")
5262

    
5263
    if self.op.os_type is not None:
5264
      # OS verification
5265
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5266
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5267
      instance_os = self.op.os_type
5268
    else:
5269
      instance_os = instance.os
5270

    
5271
    nodelist = list(instance.all_nodes)
5272

    
5273
    if self.op.osparams:
5274
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5275
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5276
      self.os_inst = i_osdict # the new dict (without defaults)
5277
    else:
5278
      self.os_inst = None
5279

    
5280
    self.instance = instance
5281

    
5282
  def Exec(self, feedback_fn):
5283
    """Reinstall the instance.
5284

5285
    """
5286
    inst = self.instance
5287

    
5288
    if self.op.os_type is not None:
5289
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5290
      inst.os = self.op.os_type
5291
      # Write to configuration
5292
      self.cfg.Update(inst, feedback_fn)
5293

    
5294
    _StartInstanceDisks(self, inst, None)
5295
    try:
5296
      feedback_fn("Running the instance OS create scripts...")
5297
      # FIXME: pass debug option from opcode to backend
5298
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5299
                                             self.op.debug_level,
5300
                                             osparams=self.os_inst)
5301
      result.Raise("Could not install OS for instance %s on node %s" %
5302
                   (inst.name, inst.primary_node))
5303
    finally:
5304
      _ShutdownInstanceDisks(self, inst)
5305

    
5306

    
5307
class LUInstanceRecreateDisks(LogicalUnit):
5308
  """Recreate an instance's missing disks.
5309

5310
  """
5311
  HPATH = "instance-recreate-disks"
5312
  HTYPE = constants.HTYPE_INSTANCE
5313
  REQ_BGL = False
5314

    
5315
  def ExpandNames(self):
5316
    self._ExpandAndLockInstance()
5317

    
5318
  def BuildHooksEnv(self):
5319
    """Build hooks env.
5320

5321
    This runs on master, primary and secondary nodes of the instance.
5322

5323
    """
5324
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5325
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5326
    return env, nl, nl
5327

    
5328
  def CheckPrereq(self):
5329
    """Check prerequisites.
5330

5331
    This checks that the instance is in the cluster and is not running.
5332

5333
    """
5334
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5335
    assert instance is not None, \
5336
      "Cannot retrieve locked instance %s" % self.op.instance_name
5337
    _CheckNodeOnline(self, instance.primary_node)
5338

    
5339
    if instance.disk_template == constants.DT_DISKLESS:
5340
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5341
                                 self.op.instance_name, errors.ECODE_INVAL)
5342
    _CheckInstanceDown(self, instance, "cannot recreate disks")
5343

    
5344
    if not self.op.disks:
5345
      self.op.disks = range(len(instance.disks))
5346
    else:
5347
      for idx in self.op.disks:
5348
        if idx >= len(instance.disks):
5349
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5350
                                     errors.ECODE_INVAL)
5351

    
5352
    self.instance = instance
5353

    
5354
  def Exec(self, feedback_fn):
5355
    """Recreate the disks.
5356

5357
    """
5358
    to_skip = []
5359
    for idx, _ in enumerate(self.instance.disks):
5360
      if idx not in self.op.disks: # disk idx has not been passed in
5361
        to_skip.append(idx)
5362
        continue
5363

    
5364
    _CreateDisks(self, self.instance, to_skip=to_skip)
5365

    
5366

    
5367
class LUInstanceRename(LogicalUnit):
5368
  """Rename an instance.
5369

5370
  """
5371
  HPATH = "instance-rename"
5372
  HTYPE = constants.HTYPE_INSTANCE
5373

    
5374
  def CheckArguments(self):
5375
    """Check arguments.
5376

5377
    """
5378
    if self.op.ip_check and not self.op.name_check:
5379
      # TODO: make the ip check more flexible and not depend on the name check
5380
      raise errors.OpPrereqError("Cannot do ip check without a name check",
5381
                                 errors.ECODE_INVAL)
5382

    
5383
  def BuildHooksEnv(self):
5384
    """Build hooks env.
5385

5386
    This runs on master, primary and secondary nodes of the instance.
5387

5388
    """
5389
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5390
    env["INSTANCE_NEW_NAME"] = self.op.new_name
5391
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5392
    return env, nl, nl
5393

    
5394
  def CheckPrereq(self):
5395
    """Check prerequisites.
5396

5397
    This checks that the instance is in the cluster and is not running.
5398

5399
    """
5400
    self.op.instance_name = _ExpandInstanceName(self.cfg,
5401
                                                self.op.instance_name)
5402
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5403
    assert instance is not None
5404
    _CheckNodeOnline(self, instance.primary_node)
5405
    _CheckInstanceDown(self, instance, "cannot rename")
5406
    self.instance = instance
5407

    
5408
    new_name = self.op.new_name
5409
    if self.op.name_check:
5410
      hostname = netutils.GetHostname(name=new_name)
5411
      self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5412
                   hostname.name)
5413
      new_name = self.op.new_name = hostname.name
5414
      if (self.op.ip_check and
5415
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5416
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5417
                                   (hostname.ip, new_name),
5418
                                   errors.ECODE_NOTUNIQUE)
5419

    
5420
    instance_list = self.cfg.GetInstanceList()
5421
    if new_name in instance_list and new_name != instance.name:
5422
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5423
                                 new_name, errors.ECODE_EXISTS)
5424

    
5425
  def Exec(self, feedback_fn):
5426
    """Rename the instance.
5427

5428
    """
5429
    inst = self.instance
5430
    old_name = inst.name
5431

    
5432
    rename_file_storage = False
5433
    if (inst.disk_template == constants.DT_FILE and
5434
        self.op.new_name != inst.name):
5435
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5436
      rename_file_storage = True
5437

    
5438
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5439
    # Change the instance lock. This is definitely safe while we hold the BGL
5440
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5441
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5442

    
5443
    # re-read the instance from the configuration after rename
5444
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5445

    
5446
    if rename_file_storage:
5447
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5448
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5449
                                                     old_file_storage_dir,
5450
                                                     new_file_storage_dir)
5451
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5452
                   " (but the instance has been renamed in Ganeti)" %
5453
                   (inst.primary_node, old_file_storage_dir,
5454
                    new_file_storage_dir))
5455

    
5456
    _StartInstanceDisks(self, inst, None)
5457
    try:
5458
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5459
                                                 old_name, self.op.debug_level)
5460
      msg = result.fail_msg
5461
      if msg:
5462
        msg = ("Could not run OS rename script for instance %s on node %s"
5463
               " (but the instance has been renamed in Ganeti): %s" %
5464
               (inst.name, inst.primary_node, msg))
5465
        self.proc.LogWarning(msg)
5466
    finally:
5467
      _ShutdownInstanceDisks(self, inst)
5468

    
5469
    return inst.name
5470

    
5471

    
5472
class LUInstanceRemove(LogicalUnit):
5473
  """Remove an instance.
5474

5475
  """
5476
  HPATH = "instance-remove"
5477
  HTYPE = constants.HTYPE_INSTANCE
5478
  REQ_BGL = False
5479

    
5480
  def ExpandNames(self):
5481
    self._ExpandAndLockInstance()
5482
    self.needed_locks[locking.LEVEL_NODE] = []
5483
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5484

    
5485
  def DeclareLocks(self, level):
5486
    if level == locking.LEVEL_NODE:
5487
      self._LockInstancesNodes()
5488

    
5489
  def BuildHooksEnv(self):
5490
    """Build hooks env.
5491

5492
    This runs on master, primary and secondary nodes of the instance.
5493

5494
    """
5495
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5496
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5497
    nl = [self.cfg.GetMasterNode()]
5498
    nl_post = list(self.instance.all_nodes) + nl
5499
    return env, nl, nl_post
5500

    
5501
  def CheckPrereq(self):
5502
    """Check prerequisites.
5503

5504
    This checks that the instance is in the cluster.
5505

5506
    """
5507
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5508
    assert self.instance is not None, \
5509
      "Cannot retrieve locked instance %s" % self.op.instance_name
5510

    
5511
  def Exec(self, feedback_fn):
5512
    """Remove the instance.
5513

5514
    """
5515
    instance = self.instance
5516
    logging.info("Shutting down instance %s on node %s",
5517
                 instance.name, instance.primary_node)
5518

    
5519
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5520
                                             self.op.shutdown_timeout)
5521
    msg = result.fail_msg
5522
    if msg:
5523
      if self.op.ignore_failures:
5524
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5525
      else:
5526
        raise errors.OpExecError("Could not shutdown instance %s on"
5527
                                 " node %s: %s" %
5528
                                 (instance.name, instance.primary_node, msg))
5529

    
5530
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5531

    
5532

    
5533
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5534
  """Utility function to remove an instance.
5535

5536
  """
5537
  logging.info("Removing block devices for instance %s", instance.name)
5538

    
5539
  if not _RemoveDisks(lu, instance):
5540
    if not ignore_failures:
5541
      raise errors.OpExecError("Can't remove instance's disks")
5542
    feedback_fn("Warning: can't remove instance's disks")
5543

    
5544
  logging.info("Removing instance %s out of cluster config", instance.name)
5545

    
5546
  lu.cfg.RemoveInstance(instance.name)
5547

    
5548
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5549
    "Instance lock removal conflict"
5550

    
5551
  # Remove lock for the instance
5552
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5553

    
5554

    
5555
class LUInstanceQuery(NoHooksLU):
5556
  """Logical unit for querying instances.
5557

5558
  """
5559
  # pylint: disable-msg=W0142
5560
  REQ_BGL = False
5561

    
5562
  def CheckArguments(self):
5563
    self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5564
                             self.op.use_locking)
5565

    
5566
  def ExpandNames(self):
5567
    self.iq.ExpandNames(self)
5568

    
5569
  def DeclareLocks(self, level):
5570
    self.iq.DeclareLocks(self, level)
5571

    
5572
  def Exec(self, feedback_fn):
5573
    return self.iq.OldStyleQuery(self)
5574

    
5575

    
5576
class LUInstanceFailover(LogicalUnit):
5577
  """Failover an instance.
5578

5579
  """
5580
  HPATH = "instance-failover"
5581
  HTYPE = constants.HTYPE_INSTANCE
5582
  REQ_BGL = False
5583

    
5584
  def ExpandNames(self):
5585
    self._ExpandAndLockInstance()
5586
    self.needed_locks[locking.LEVEL_NODE] = []
5587
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5588

    
5589
  def DeclareLocks(self, level):
5590
    if level == locking.LEVEL_NODE:
5591
      self._LockInstancesNodes()
5592

    
5593
  def BuildHooksEnv(self):
5594
    """Build hooks env.
5595

5596
    This runs on master, primary and secondary nodes of the instance.
5597

5598
    """
5599
    instance = self.instance
5600
    source_node = instance.primary_node
5601
    target_node = instance.secondary_nodes[0]
5602
    env = {
5603
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5604
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5605
      "OLD_PRIMARY": source_node,
5606
      "OLD_SECONDARY": target_node,
5607
      "NEW_PRIMARY": target_node,
5608
      "NEW_SECONDARY": source_node,
5609
      }
5610
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5611
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5612
    nl_post = list(nl)
5613
    nl_post.append(source_node)
5614
    return env, nl, nl_post
5615

    
5616
  def CheckPrereq(self):
5617
    """Check prerequisites.
5618

5619
    This checks that the instance is in the cluster.
5620

5621
    """
5622
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5623
    assert self.instance is not None, \
5624
      "Cannot retrieve locked instance %s" % self.op.instance_name
5625

    
5626
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5627
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5628
      raise errors.OpPrereqError("Instance's disk layout is not"
5629
                                 " network mirrored, cannot failover.",
5630
                                 errors.ECODE_STATE)
5631

    
5632
    secondary_nodes = instance.secondary_nodes
5633
    if not secondary_nodes:
5634
      raise errors.ProgrammerError("no secondary node but using "
5635
                                   "a mirrored disk template")
5636

    
5637
    target_node = secondary_nodes[0]
5638
    _CheckNodeOnline(self, target_node)
5639
    _CheckNodeNotDrained(self, target_node)
5640
    if instance.admin_up:
5641
      # check memory requirements on the secondary node
5642
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5643
                           instance.name, bep[constants.BE_MEMORY],
5644
                           instance.hypervisor)
5645
    else:
5646
      self.LogInfo("Not checking memory on the secondary node as"
5647
                   " instance will not be started")
5648

    
5649
    # check bridge existance
5650
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5651

    
5652
  def Exec(self, feedback_fn):
5653
    """Failover an instance.
5654

5655
    The failover is done by shutting it down on its present node and
5656
    starting it on the secondary.
5657

5658
    """
5659
    instance = self.instance
5660
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5661

    
5662
    source_node = instance.primary_node
5663
    target_node = instance.secondary_nodes[0]
5664

    
5665
    if instance.admin_up:
5666
      feedback_fn("* checking disk consistency between source and target")
5667
      for dev in instance.disks:
5668
        # for drbd, these are drbd over lvm
5669
        if not _CheckDiskConsistency(self, dev, target_node, False):
5670
          if not self.op.ignore_consistency:
5671
            raise errors.OpExecError("Disk %s is degraded on target node,"
5672
                                     " aborting failover." % dev.iv_name)
5673
    else:
5674
      feedback_fn("* not checking disk consistency as instance is not running")
5675

    
5676
    feedback_fn("* shutting down instance on source node")
5677
    logging.info("Shutting down instance %s on node %s",
5678
                 instance.name, source_node)
5679

    
5680
    result = self.rpc.call_instance_shutdown(source_node, instance,
5681
                                             self.op.shutdown_timeout)
5682
    msg = result.fail_msg
5683
    if msg:
5684
      if self.op.ignore_consistency or primary_node.offline:
5685
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5686
                             " Proceeding anyway. Please make sure node"
5687
                             " %s is down. Error details: %s",
5688
                             instance.name, source_node, source_node, msg)
5689
      else:
5690
        raise errors.OpExecError("Could not shutdown instance %s on"
5691
                                 " node %s: %s" %
5692
                                 (instance.name, source_node, msg))
5693

    
5694
    feedback_fn("* deactivating the instance's disks on source node")
5695
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5696
      raise errors.OpExecError("Can't shut down the instance's disks.")
5697

    
5698
    instance.primary_node = target_node
5699
    # distribute new instance config to the other nodes
5700
    self.cfg.Update(instance, feedback_fn)
5701

    
5702
    # Only start the instance if it's marked as up
5703
    if instance.admin_up:
5704
      feedback_fn("* activating the instance's disks on target node")
5705
      logging.info("Starting instance %s on node %s",
5706
                   instance.name, target_node)
5707

    
5708
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5709
                                           ignore_secondaries=True)
5710
      if not disks_ok:
5711
        _ShutdownInstanceDisks(self, instance)
5712
        raise errors.OpExecError("Can't activate the instance's disks")
5713

    
5714
      feedback_fn("* starting the instance on the target node")
5715
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5716
      msg = result.fail_msg
5717
      if msg:
5718
        _ShutdownInstanceDisks(self, instance)
5719
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5720
                                 (instance.name, target_node, msg))
5721

    
5722

    
5723
class LUInstanceMigrate(LogicalUnit):
5724
  """Migrate an instance.
5725

5726
  This is migration without shutting down, compared to the failover,
5727
  which is done with shutdown.
5728

5729
  """
5730
  HPATH = "instance-migrate"
5731
  HTYPE = constants.HTYPE_INSTANCE
5732
  REQ_BGL = False
5733

    
5734
  def ExpandNames(self):
5735
    self._ExpandAndLockInstance()
5736

    
5737
    self.needed_locks[locking.LEVEL_NODE] = []
5738
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5739

    
5740
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5741
                                       self.op.cleanup)
5742
    self.tasklets = [self._migrater]
5743

    
5744
  def DeclareLocks(self, level):
5745
    if level == locking.LEVEL_NODE:
5746
      self._LockInstancesNodes()
5747

    
5748
  def BuildHooksEnv(self):
5749
    """Build hooks env.
5750

5751
    This runs on master, primary and secondary nodes of the instance.
5752

5753
    """
5754
    instance = self._migrater.instance
5755
    source_node = instance.primary_node
5756
    target_node = instance.secondary_nodes[0]
5757
    env = _BuildInstanceHookEnvByObject(self, instance)
5758
    env["MIGRATE_LIVE"] = self._migrater.live
5759
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5760
    env.update({
5761
        "OLD_PRIMARY": source_node,
5762
        "OLD_SECONDARY": target_node,
5763
        "NEW_PRIMARY": target_node,
5764
        "NEW_SECONDARY": source_node,
5765
        })
5766
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5767
    nl_post = list(nl)
5768
    nl_post.append(source_node)
5769
    return env, nl, nl_post
5770

    
5771

    
5772
class LUInstanceMove(LogicalUnit):
5773
  """Move an instance by data-copying.
5774

5775
  """
5776
  HPATH = "instance-move"
5777
  HTYPE = constants.HTYPE_INSTANCE
5778
  REQ_BGL = False
5779

    
5780
  def ExpandNames(self):
5781
    self._ExpandAndLockInstance()
5782
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5783
    self.op.target_node = target_node
5784
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5785
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5786

    
5787
  def DeclareLocks(self, level):
5788
    if level == locking.LEVEL_NODE:
5789
      self._LockInstancesNodes(primary_only=True)
5790

    
5791
  def BuildHooksEnv(self):
5792
    """Build hooks env.
5793

5794
    This runs on master, primary and secondary nodes of the instance.
5795

5796
    """
5797
    env = {
5798
      "TARGET_NODE": self.op.target_node,
5799
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5800
      }
5801
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5802
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5803
                                       self.op.target_node]
5804
    return env, nl, nl
5805

    
5806
  def CheckPrereq(self):
5807
    """Check prerequisites.
5808

5809
    This checks that the instance is in the cluster.
5810

5811
    """
5812
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5813
    assert self.instance is not None, \
5814
      "Cannot retrieve locked instance %s" % self.op.instance_name
5815

    
5816
    node = self.cfg.GetNodeInfo(self.op.target_node)
5817
    assert node is not None, \
5818
      "Cannot retrieve locked node %s" % self.op.target_node
5819

    
5820
    self.target_node = target_node = node.name
5821

    
5822
    if target_node == instance.primary_node:
5823
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5824
                                 (instance.name, target_node),
5825
                                 errors.ECODE_STATE)
5826

    
5827
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5828

    
5829
    for idx, dsk in enumerate(instance.disks):
5830
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5831
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5832
                                   " cannot copy" % idx, errors.ECODE_STATE)
5833

    
5834
    _CheckNodeOnline(self, target_node)
5835
    _CheckNodeNotDrained(self, target_node)
5836
    _CheckNodeVmCapable(self, target_node)
5837

    
5838
    if instance.admin_up:
5839
      # check memory requirements on the secondary node
5840
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5841
                           instance.name, bep[constants.BE_MEMORY],
5842
                           instance.hypervisor)
5843
    else:
5844
      self.LogInfo("Not checking memory on the secondary node as"
5845
                   " instance will not be started")
5846

    
5847
    # check bridge existance
5848
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5849

    
5850
  def Exec(self, feedback_fn):
5851
    """Move an instance.
5852

5853
    The move is done by shutting it down on its present node, copying
5854
    the data over (slow) and starting it on the new node.
5855

5856
    """
5857
    instance = self.instance
5858

    
5859
    source_node = instance.primary_node
5860
    target_node = self.target_node
5861

    
5862
    self.LogInfo("Shutting down instance %s on source node %s",
5863
                 instance.name, source_node)
5864

    
5865
    result = self.rpc.call_instance_shutdown(source_node, instance,
5866
                                             self.op.shutdown_timeout)
5867
    msg = result.fail_msg
5868
    if msg:
5869
      if self.op.ignore_consistency:
5870
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5871
                             " Proceeding anyway. Please make sure node"
5872
                             " %s is down. Error details: %s",
5873
                             instance.name, source_node, source_node, msg)
5874
      else:
5875
        raise errors.OpExecError("Could not shutdown instance %s on"
5876
                                 " node %s: %s" %
5877
                                 (instance.name, source_node, msg))
5878

    
5879
    # create the target disks
5880
    try:
5881
      _CreateDisks(self, instance, target_node=target_node)
5882
    except errors.OpExecError:
5883
      self.LogWarning("Device creation failed, reverting...")
5884
      try:
5885
        _RemoveDisks(self, instance, target_node=target_node)
5886
      finally:
5887
        self.cfg.ReleaseDRBDMinors(instance.name)
5888
        raise
5889

    
5890
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5891

    
5892
    errs = []
5893
    # activate, get path, copy the data over
5894
    for idx, disk in enumerate(instance.disks):
5895
      self.LogInfo("Copying data for disk %d", idx)
5896
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5897
                                               instance.name, True)
5898
      if result.fail_msg:
5899
        self.LogWarning("Can't assemble newly created disk %d: %s",
5900
                        idx, result.fail_msg)
5901
        errs.append(result.fail_msg)
5902
        break
5903
      dev_path = result.payload
5904
      result = self.rpc.call_blockdev_export(source_node, disk,
5905
                                             target_node, dev_path,
5906
                                             cluster_name)
5907
      if result.fail_msg:
5908
        self.LogWarning("Can't copy data over for disk %d: %s",
5909
                        idx, result.fail_msg)
5910
        errs.append(result.fail_msg)
5911
        break
5912

    
5913
    if errs:
5914
      self.LogWarning("Some disks failed to copy, aborting")
5915
      try:
5916
        _RemoveDisks(self, instance, target_node=target_node)
5917
      finally:
5918
        self.cfg.ReleaseDRBDMinors(instance.name)
5919
        raise errors.OpExecError("Errors during disk copy: %s" %
5920
                                 (",".join(errs),))
5921

    
5922
    instance.primary_node = target_node
5923
    self.cfg.Update(instance, feedback_fn)
5924

    
5925
    self.LogInfo("Removing the disks on the original node")
5926
    _RemoveDisks(self, instance, target_node=source_node)
5927

    
5928
    # Only start the instance if it's marked as up
5929
    if instance.admin_up:
5930
      self.LogInfo("Starting instance %s on node %s",
5931
                   instance.name, target_node)
5932

    
5933
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5934
                                           ignore_secondaries=True)
5935
      if not disks_ok:
5936
        _ShutdownInstanceDisks(self, instance)
5937
        raise errors.OpExecError("Can't activate the instance's disks")
5938

    
5939
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5940
      msg = result.fail_msg
5941
      if msg:
5942
        _ShutdownInstanceDisks(self, instance)
5943
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5944
                                 (instance.name, target_node, msg))
5945

    
5946

    
5947
class LUNodeMigrate(LogicalUnit):
5948
  """Migrate all instances from a node.
5949

5950
  """
5951
  HPATH = "node-migrate"
5952
  HTYPE = constants.HTYPE_NODE
5953
  REQ_BGL = False
5954

    
5955
  def ExpandNames(self):
5956
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5957

    
5958
    self.needed_locks = {
5959
      locking.LEVEL_NODE: [self.op.node_name],
5960
      }
5961

    
5962
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5963

    
5964
    # Create tasklets for migrating instances for all instances on this node
5965
    names = []
5966
    tasklets = []
5967

    
5968
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5969
      logging.debug("Migrating instance %s", inst.name)
5970
      names.append(inst.name)
5971

    
5972
      tasklets.append(TLMigrateInstance(self, inst.name, False))
5973

    
5974
    self.tasklets = tasklets
5975

    
5976
    # Declare instance locks
5977
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5978

    
5979
  def DeclareLocks(self, level):
5980
    if level == locking.LEVEL_NODE:
5981
      self._LockInstancesNodes()
5982

    
5983
  def BuildHooksEnv(self):
5984
    """Build hooks env.
5985

5986
    This runs on the master, the primary and all the secondaries.
5987

5988
    """
5989
    env = {
5990
      "NODE_NAME": self.op.node_name,
5991
      }
5992

    
5993
    nl = [self.cfg.GetMasterNode()]
5994

    
5995
    return (env, nl, nl)
5996

    
5997

    
5998
class TLMigrateInstance(Tasklet):
5999
  """Tasklet class for instance migration.
6000

6001
  @type live: boolean
6002
  @ivar live: whether the migration will be done live or non-live;
6003
      this variable is initalized only after CheckPrereq has run
6004

6005
  """
6006
  def __init__(self, lu, instance_name, cleanup):
6007
    """Initializes this class.
6008

6009
    """
6010
    Tasklet.__init__(self, lu)
6011

    
6012
    # Parameters
6013
    self.instance_name = instance_name
6014
    self.cleanup = cleanup
6015
    self.live = False # will be overridden later
6016

    
6017
  def CheckPrereq(self):
6018
    """Check prerequisites.
6019

6020
    This checks that the instance is in the cluster.
6021

6022
    """
6023
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6024
    instance = self.cfg.GetInstanceInfo(instance_name)
6025
    assert instance is not None
6026

    
6027
    if instance.disk_template != constants.DT_DRBD8:
6028
      raise errors.OpPrereqError("Instance's disk layout is not"
6029
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
6030

    
6031
    secondary_nodes = instance.secondary_nodes
6032
    if not secondary_nodes:
6033
      raise errors.ConfigurationError("No secondary node but using"
6034
                                      " drbd8 disk template")
6035

    
6036
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6037

    
6038
    target_node = secondary_nodes[0]
6039
    # check memory requirements on the secondary node
6040
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6041
                         instance.name, i_be[constants.BE_MEMORY],
6042
                         instance.hypervisor)
6043

    
6044
    # check bridge existance
6045
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6046

    
6047
    if not self.cleanup:
6048
      _CheckNodeNotDrained(self.lu, target_node)
6049
      result = self.rpc.call_instance_migratable(instance.primary_node,
6050
                                                 instance)
6051
      result.Raise("Can't migrate, please use failover",
6052
                   prereq=True, ecode=errors.ECODE_STATE)
6053

    
6054
    self.instance = instance
6055

    
6056
    if self.lu.op.live is not None and self.lu.op.mode is not None:
6057
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6058
                                 " parameters are accepted",
6059
                                 errors.ECODE_INVAL)
6060
    if self.lu.op.live is not None:
6061
      if self.lu.op.live:
6062
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
6063
      else:
6064
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6065
      # reset the 'live' parameter to None so that repeated
6066
      # invocations of CheckPrereq do not raise an exception
6067
      self.lu.op.live = None
6068
    elif self.lu.op.mode is None:
6069
      # read the default value from the hypervisor
6070
      i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6071
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6072

    
6073
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6074

    
6075
  def _WaitUntilSync(self):
6076
    """Poll with custom rpc for disk sync.
6077

6078
    This uses our own step-based rpc call.
6079

6080
    """
6081
    self.feedback_fn("* wait until resync is done")
6082
    all_done = False
6083
    while not all_done:
6084
      all_done = True
6085
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6086
                                            self.nodes_ip,
6087
                                            self.instance.disks)
6088
      min_percent = 100
6089
      for node, nres in result.items():
6090
        nres.Raise("Cannot resync disks on node %s" % node)
6091
        node_done, node_percent = nres.payload
6092
        all_done = all_done and node_done
6093
        if node_percent is not None:
6094
          min_percent = min(min_percent, node_percent)
6095
      if not all_done:
6096
        if min_percent < 100:
6097
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6098
        time.sleep(2)
6099

    
6100
  def _EnsureSecondary(self, node):
6101
    """Demote a node to secondary.
6102

6103
    """
6104
    self.feedback_fn("* switching node %s to secondary mode" % node)
6105

    
6106
    for dev in self.instance.disks:
6107
      self.cfg.SetDiskID(dev, node)
6108

    
6109
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6110
                                          self.instance.disks)
6111
    result.Raise("Cannot change disk to secondary on node %s" % node)
6112

    
6113
  def _GoStandalone(self):
6114
    """Disconnect from the network.
6115

6116
    """
6117
    self.feedback_fn("* changing into standalone mode")
6118
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6119
                                               self.instance.disks)
6120
    for node, nres in result.items():
6121
      nres.Raise("Cannot disconnect disks node %s" % node)
6122

    
6123
  def _GoReconnect(self, multimaster):
6124
    """Reconnect to the network.
6125

6126
    """
6127
    if multimaster:
6128
      msg = "dual-master"
6129
    else:
6130
      msg = "single-master"
6131
    self.feedback_fn("* changing disks into %s mode" % msg)
6132
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6133
                                           self.instance.disks,
6134
                                           self.instance.name, multimaster)
6135
    for node, nres in result.items():
6136
      nres.Raise("Cannot change disks config on node %s" % node)
6137

    
6138
  def _ExecCleanup(self):
6139
    """Try to cleanup after a failed migration.
6140

6141
    The cleanup is done by:
6142
      - check that the instance is running only on one node
6143
        (and update the config if needed)
6144
      - change disks on its secondary node to secondary
6145
      - wait until disks are fully synchronized
6146
      - disconnect from the network
6147
      - change disks into single-master mode
6148
      - wait again until disks are fully synchronized
6149

6150
    """
6151
    instance = self.instance
6152
    target_node = self.target_node
6153
    source_node = self.source_node
6154

    
6155
    # check running on only one node
6156
    self.feedback_fn("* checking where the instance actually runs"
6157
                     " (if this hangs, the hypervisor might be in"
6158
                     " a bad state)")
6159
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6160
    for node, result in ins_l.items():
6161
      result.Raise("Can't contact node %s" % node)
6162

    
6163
    runningon_source = instance.name in ins_l[source_node].payload
6164
    runningon_target = instance.name in ins_l[target_node].payload
6165

    
6166
    if runningon_source and runningon_target:
6167
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6168
                               " or the hypervisor is confused. You will have"
6169
                               " to ensure manually that it runs only on one"
6170
                               " and restart this operation.")
6171

    
6172
    if not (runningon_source or runningon_target):
6173
      raise errors.OpExecError("Instance does not seem to be running at all."
6174
                               " In this case, it's safer to repair by"
6175
                               " running 'gnt-instance stop' to ensure disk"
6176
                               " shutdown, and then restarting it.")
6177

    
6178
    if runningon_target:
6179
      # the migration has actually succeeded, we need to update the config
6180
      self.feedback_fn("* instance running on secondary node (%s),"
6181
                       " updating config" % target_node)
6182
      instance.primary_node = target_node
6183
      self.cfg.Update(instance, self.feedback_fn)
6184
      demoted_node = source_node
6185
    else:
6186
      self.feedback_fn("* instance confirmed to be running on its"
6187
                       " primary node (%s)" % source_node)
6188
      demoted_node = target_node
6189

    
6190
    self._EnsureSecondary(demoted_node)
6191
    try:
6192
      self._WaitUntilSync()
6193
    except errors.OpExecError:
6194
      # we ignore here errors, since if the device is standalone, it
6195
      # won't be able to sync
6196
      pass
6197
    self._GoStandalone()
6198
    self._GoReconnect(False)
6199
    self._WaitUntilSync()
6200

    
6201
    self.feedback_fn("* done")
6202

    
6203
  def _RevertDiskStatus(self):
6204
    """Try to revert the disk status after a failed migration.
6205

6206
    """
6207
    target_node = self.target_node
6208
    try:
6209
      self._EnsureSecondary(target_node)
6210
      self._GoStandalone()
6211
      self._GoReconnect(False)
6212
      self._WaitUntilSync()
6213
    except errors.OpExecError, err:
6214
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6215
                         " drives: error '%s'\n"
6216
                         "Please look and recover the instance status" %
6217
                         str(err))
6218

    
6219
  def _AbortMigration(self):
6220
    """Call the hypervisor code to abort a started migration.
6221

6222
    """
6223
    instance = self.instance
6224
    target_node = self.target_node
6225
    migration_info = self.migration_info
6226

    
6227
    abort_result = self.rpc.call_finalize_migration(target_node,
6228
                                                    instance,
6229
                                                    migration_info,
6230
                                                    False)
6231
    abort_msg = abort_result.fail_msg
6232
    if abort_msg:
6233
      logging.error("Aborting migration failed on target node %s: %s",
6234
                    target_node, abort_msg)
6235
      # Don't raise an exception here, as we stil have to try to revert the
6236
      # disk status, even if this step failed.
6237

    
6238
  def _ExecMigration(self):
6239
    """Migrate an instance.
6240

6241
    The migrate is done by:
6242
      - change the disks into dual-master mode
6243
      - wait until disks are fully synchronized again
6244
      - migrate the instance
6245
      - change disks on the new secondary node (the old primary) to secondary
6246
      - wait until disks are fully synchronized
6247
      - change disks into single-master mode
6248

6249
    """
6250
    instance = self.instance
6251
    target_node = self.target_node
6252
    source_node = self.source_node
6253

    
6254
    self.feedback_fn("* checking disk consistency between source and target")
6255
    for dev in instance.disks:
6256
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6257
        raise errors.OpExecError("Disk %s is degraded or not fully"
6258
                                 " synchronized on target node,"
6259
                                 " aborting migrate." % dev.iv_name)
6260

    
6261
    # First get the migration information from the remote node
6262
    result = self.rpc.call_migration_info(source_node, instance)
6263
    msg = result.fail_msg
6264
    if msg:
6265
      log_err = ("Failed fetching source migration information from %s: %s" %
6266
                 (source_node, msg))
6267
      logging.error(log_err)
6268
      raise errors.OpExecError(log_err)
6269

    
6270
    self.migration_info = migration_info = result.payload
6271

    
6272
    # Then switch the disks to master/master mode
6273
    self._EnsureSecondary(target_node)
6274
    self._GoStandalone()
6275
    self._GoReconnect(True)
6276
    self._WaitUntilSync()
6277

    
6278
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6279
    result = self.rpc.call_accept_instance(target_node,
6280
                                           instance,
6281
                                           migration_info,
6282
                                           self.nodes_ip[target_node])
6283

    
6284
    msg = result.fail_msg
6285
    if msg:
6286
      logging.error("Instance pre-migration failed, trying to revert"
6287
                    " disk status: %s", msg)
6288
      self.feedback_fn("Pre-migration failed, aborting")
6289
      self._AbortMigration()
6290
      self._RevertDiskStatus()
6291
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6292
                               (instance.name, msg))
6293

    
6294
    self.feedback_fn("* migrating instance to %s" % target_node)
6295
    time.sleep(10)
6296
    result = self.rpc.call_instance_migrate(source_node, instance,
6297
                                            self.nodes_ip[target_node],
6298
                                            self.live)
6299
    msg = result.fail_msg
6300
    if msg:
6301
      logging.error("Instance migration failed, trying to revert"
6302
                    " disk status: %s", msg)
6303
      self.feedback_fn("Migration failed, aborting")
6304
      self._AbortMigration()
6305
      self._RevertDiskStatus()
6306
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6307
                               (instance.name, msg))
6308
    time.sleep(10)
6309

    
6310
    instance.primary_node = target_node
6311
    # distribute new instance config to the other nodes
6312
    self.cfg.Update(instance, self.feedback_fn)
6313

    
6314
    result = self.rpc.call_finalize_migration(target_node,
6315
                                              instance,
6316
                                              migration_info,
6317
                                              True)
6318
    msg = result.fail_msg
6319
    if msg:
6320
      logging.error("Instance migration succeeded, but finalization failed:"
6321
                    " %s", msg)
6322
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6323
                               msg)
6324

    
6325
    self._EnsureSecondary(source_node)
6326
    self._WaitUntilSync()
6327
    self._GoStandalone()
6328
    self._GoReconnect(False)
6329
    self._WaitUntilSync()
6330

    
6331
    self.feedback_fn("* done")
6332

    
6333
  def Exec(self, feedback_fn):
6334
    """Perform the migration.
6335

6336
    """
6337
    feedback_fn("Migrating instance %s" % self.instance.name)
6338

    
6339
    self.feedback_fn = feedback_fn
6340

    
6341
    self.source_node = self.instance.primary_node
6342
    self.target_node = self.instance.secondary_nodes[0]
6343
    self.all_nodes = [self.source_node, self.target_node]
6344
    self.nodes_ip = {
6345
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6346
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6347
      }
6348

    
6349
    if self.cleanup:
6350
      return self._ExecCleanup()
6351
    else:
6352
      return self._ExecMigration()
6353

    
6354

    
6355
def _CreateBlockDev(lu, node, instance, device, force_create,
6356
                    info, force_open):
6357
  """Create a tree of block devices on a given node.
6358

6359
  If this device type has to be created on secondaries, create it and
6360
  all its children.
6361

6362
  If not, just recurse to children keeping the same 'force' value.
6363

6364
  @param lu: the lu on whose behalf we execute
6365
  @param node: the node on which to create the device
6366
  @type instance: L{objects.Instance}
6367
  @param instance: the instance which owns the device
6368
  @type device: L{objects.Disk}
6369
  @param device: the device to create
6370
  @type force_create: boolean
6371
  @param force_create: whether to force creation of this device; this
6372
      will be change to True whenever we find a device which has
6373
      CreateOnSecondary() attribute
6374
  @param info: the extra 'metadata' we should attach to the device
6375
      (this will be represented as a LVM tag)
6376
  @type force_open: boolean
6377
  @param force_open: this parameter will be passes to the
6378
      L{backend.BlockdevCreate} function where it specifies
6379
      whether we run on primary or not, and it affects both
6380
      the child assembly and the device own Open() execution
6381

6382
  """
6383
  if device.CreateOnSecondary():
6384
    force_create = True
6385

    
6386
  if device.children:
6387
    for child in device.children:
6388
      _CreateBlockDev(lu, node, instance, child, force_create,
6389
                      info, force_open)
6390

    
6391
  if not force_create:
6392
    return
6393

    
6394
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6395

    
6396

    
6397
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6398
  """Create a single block device on a given node.
6399

6400
  This will not recurse over children of the device, so they must be
6401
  created in advance.
6402

6403
  @param lu: the lu on whose behalf we execute
6404
  @param node: the node on which to create the device
6405
  @type instance: L{objects.Instance}
6406
  @param instance: the instance which owns the device
6407
  @type device: L{objects.Disk}
6408
  @param device: the device to create
6409
  @param info: the extra 'metadata' we should attach to the device
6410
      (this will be represented as a LVM tag)
6411
  @type force_open: boolean
6412
  @param force_open: this parameter will be passes to the
6413
      L{backend.BlockdevCreate} function where it specifies
6414
      whether we run on primary or not, and it affects both
6415
      the child assembly and the device own Open() execution
6416

6417
  """
6418
  lu.cfg.SetDiskID(device, node)
6419
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6420
                                       instance.name, force_open, info)
6421
  result.Raise("Can't create block device %s on"
6422
               " node %s for instance %s" % (device, node, instance.name))
6423
  if device.physical_id is None:
6424
    device.physical_id = result.payload
6425

    
6426

    
6427
def _GenerateUniqueNames(lu, exts):
6428
  """Generate a suitable LV name.
6429

6430
  This will generate a logical volume name for the given instance.
6431

6432
  """
6433
  results = []
6434
  for val in exts:
6435
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6436
    results.append("%s%s" % (new_id, val))
6437
  return results
6438

    
6439

    
6440
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6441
                         p_minor, s_minor):
6442
  """Generate a drbd8 device complete with its children.
6443

6444
  """
6445
  port = lu.cfg.AllocatePort()
6446
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6447
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6448
                          logical_id=(vgname, names[0]))
6449
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6450
                          logical_id=(vgname, names[1]))
6451
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6452
                          logical_id=(primary, secondary, port,
6453
                                      p_minor, s_minor,
6454
                                      shared_secret),
6455
                          children=[dev_data, dev_meta],
6456
                          iv_name=iv_name)
6457
  return drbd_dev
6458

    
6459

    
6460
def _GenerateDiskTemplate(lu, template_name,
6461
                          instance_name, primary_node,
6462
                          secondary_nodes, disk_info,
6463
                          file_storage_dir, file_driver,
6464
                          base_index, feedback_fn):
6465
  """Generate the entire disk layout for a given template type.
6466

6467
  """
6468
  #TODO: compute space requirements
6469

    
6470
  vgname = lu.cfg.GetVGName()
6471
  disk_count = len(disk_info)
6472
  disks = []
6473
  if template_name == constants.DT_DISKLESS:
6474
    pass
6475
  elif template_name == constants.DT_PLAIN:
6476
    if len(secondary_nodes) != 0:
6477
      raise errors.ProgrammerError("Wrong template configuration")
6478

    
6479
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6480
                                      for i in range(disk_count)])
6481
    for idx, disk in enumerate(disk_info):
6482
      disk_index = idx + base_index
6483
      vg = disk.get("vg", vgname)
6484
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6485
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6486
                              logical_id=(vg, names[idx]),
6487
                              iv_name="disk/%d" % disk_index,
6488
                              mode=disk["mode"])
6489
      disks.append(disk_dev)
6490
  elif template_name == constants.DT_DRBD8:
6491
    if len(secondary_nodes) != 1:
6492
      raise errors.ProgrammerError("Wrong template configuration")
6493
    remote_node = secondary_nodes[0]
6494
    minors = lu.cfg.AllocateDRBDMinor(
6495
      [primary_node, remote_node] * len(disk_info), instance_name)
6496

    
6497
    names = []
6498
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6499
                                               for i in range(disk_count)]):
6500
      names.append(lv_prefix + "_data")
6501
      names.append(lv_prefix + "_meta")
6502
    for idx, disk in enumerate(disk_info):
6503
      disk_index = idx + base_index
6504
      vg = disk.get("vg", vgname)
6505
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6506
                                      disk["size"], vg, names[idx*2:idx*2+2],
6507
                                      "disk/%d" % disk_index,
6508
                                      minors[idx*2], minors[idx*2+1])
6509
      disk_dev.mode = disk["mode"]
6510
      disks.append(disk_dev)
6511
  elif template_name == constants.DT_FILE:
6512
    if len(secondary_nodes) != 0:
6513
      raise errors.ProgrammerError("Wrong template configuration")
6514

    
6515
    opcodes.RequireFileStorage()
6516

    
6517
    for idx, disk in enumerate(disk_info):
6518
      disk_index = idx + base_index
6519
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6520
                              iv_name="disk/%d" % disk_index,
6521
                              logical_id=(file_driver,
6522
                                          "%s/disk%d" % (file_storage_dir,
6523
                                                         disk_index)),
6524
                              mode=disk["mode"])
6525
      disks.append(disk_dev)
6526
  else:
6527
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6528
  return disks
6529

    
6530

    
6531
def _GetInstanceInfoText(instance):
6532
  """Compute that text that should be added to the disk's metadata.
6533

6534
  """
6535
  return "originstname+%s" % instance.name
6536

    
6537

    
6538
def _CalcEta(time_taken, written, total_size):
6539
  """Calculates the ETA based on size written and total size.
6540

6541
  @param time_taken: The time taken so far
6542
  @param written: amount written so far
6543
  @param total_size: The total size of data to be written
6544
  @return: The remaining time in seconds
6545

6546
  """
6547
  avg_time = time_taken / float(written)
6548
  return (total_size - written) * avg_time
6549

    
6550

    
6551
def _WipeDisks(lu, instance):
6552
  """Wipes instance disks.
6553

6554
  @type lu: L{LogicalUnit}
6555
  @param lu: the logical unit on whose behalf we execute
6556
  @type instance: L{objects.Instance}
6557
  @param instance: the instance whose disks we should create
6558
  @return: the success of the wipe
6559

6560
  """
6561
  node = instance.primary_node
6562
  logging.info("Pause sync of instance %s disks", instance.name)
6563
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
6564

    
6565
  for idx, success in enumerate(result.payload):
6566
    if not success:
6567
      logging.warn("pause-sync of instance %s for disks %d failed",
6568
                   instance.name, idx)
6569

    
6570
  try:
6571
    for idx, device in enumerate(instance.disks):
6572
      lu.LogInfo("* Wiping disk %d", idx)
6573
      logging.info("Wiping disk %d for instance %s", idx, instance.name)
6574

    
6575
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6576
      # MAX_WIPE_CHUNK at max
6577
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6578
                            constants.MIN_WIPE_CHUNK_PERCENT)
6579

    
6580
      offset = 0
6581
      size = device.size
6582
      last_output = 0
6583
      start_time = time.time()
6584

    
6585
      while offset < size:
6586
        wipe_size = min(wipe_chunk_size, size - offset)
6587
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6588
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
6589
                     (idx, offset, wipe_size))
6590
        now = time.time()
6591
        offset += wipe_size
6592
        if now - last_output >= 60:
6593
          eta = _CalcEta(now - start_time, offset, size)
6594
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
6595
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
6596
          last_output = now
6597
  finally:
6598
    logging.info("Resume sync of instance %s disks", instance.name)
6599

    
6600
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
6601

    
6602
    for idx, success in enumerate(result.payload):
6603
      if not success:
6604
        lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
6605
                      " look at the status and troubleshoot the issue.", idx)
6606
        logging.warn("resume-sync of instance %s for disks %d failed",
6607
                     instance.name, idx)
6608

    
6609

    
6610
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6611
  """Create all disks for an instance.
6612

6613
  This abstracts away some work from AddInstance.
6614

6615
  @type lu: L{LogicalUnit}
6616
  @param lu: the logical unit on whose behalf we execute
6617
  @type instance: L{objects.Instance}
6618
  @param instance: the instance whose disks we should create
6619
  @type to_skip: list
6620
  @param to_skip: list of indices to skip
6621
  @type target_node: string
6622
  @param target_node: if passed, overrides the target node for creation
6623
  @rtype: boolean
6624
  @return: the success of the creation
6625

6626
  """
6627
  info = _GetInstanceInfoText(instance)
6628
  if target_node is None:
6629
    pnode = instance.primary_node
6630
    all_nodes = instance.all_nodes
6631
  else:
6632
    pnode = target_node
6633
    all_nodes = [pnode]
6634

    
6635
  if instance.disk_template == constants.DT_FILE:
6636
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6637
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6638

    
6639
    result.Raise("Failed to create directory '%s' on"
6640
                 " node %s" % (file_storage_dir, pnode))
6641

    
6642
  # Note: this needs to be kept in sync with adding of disks in
6643
  # LUInstanceSetParams
6644
  for idx, device in enumerate(instance.disks):
6645
    if to_skip and idx in to_skip:
6646
      continue
6647
    logging.info("Creating volume %s for instance %s",
6648
                 device.iv_name, instance.name)
6649
    #HARDCODE
6650
    for node in all_nodes:
6651
      f_create = node == pnode
6652
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6653

    
6654

    
6655
def _RemoveDisks(lu, instance, target_node=None):
6656
  """Remove all disks for an instance.
6657

6658
  This abstracts away some work from `AddInstance()` and
6659
  `RemoveInstance()`. Note that in case some of the devices couldn't
6660
  be removed, the removal will continue with the other ones (compare
6661
  with `_CreateDisks()`).
6662

6663
  @type lu: L{LogicalUnit}
6664
  @param lu: the logical unit on whose behalf we execute
6665
  @type instance: L{objects.Instance}
6666
  @param instance: the instance whose disks we should remove
6667
  @type target_node: string
6668
  @param target_node: used to override the node on which to remove the disks
6669
  @rtype: boolean
6670
  @return: the success of the removal
6671

6672
  """
6673
  logging.info("Removing block devices for instance %s", instance.name)
6674

    
6675
  all_result = True
6676
  for device in instance.disks:
6677
    if target_node:
6678
      edata = [(target_node, device)]
6679
    else:
6680
      edata = device.ComputeNodeTree(instance.primary_node)
6681
    for node, disk in edata:
6682
      lu.cfg.SetDiskID(disk, node)
6683
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6684
      if msg:
6685
        lu.LogWarning("Could not remove block device %s on node %s,"
6686
                      " continuing anyway: %s", device.iv_name, node, msg)
6687
        all_result = False
6688

    
6689
  if instance.disk_template == constants.DT_FILE:
6690
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6691
    if target_node:
6692
      tgt = target_node
6693
    else:
6694
      tgt = instance.primary_node
6695
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6696
    if result.fail_msg:
6697
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6698
                    file_storage_dir, instance.primary_node, result.fail_msg)
6699
      all_result = False
6700

    
6701
  return all_result
6702

    
6703

    
6704
def _ComputeDiskSizePerVG(disk_template, disks):
6705
  """Compute disk size requirements in the volume group
6706

6707
  """
6708
  def _compute(disks, payload):
6709
    """Universal algorithm
6710

6711
    """
6712
    vgs = {}
6713
    for disk in disks:
6714
      vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6715

    
6716
    return vgs
6717

    
6718
  # Required free disk space as a function of disk and swap space
6719
  req_size_dict = {
6720
    constants.DT_DISKLESS: {},
6721
    constants.DT_PLAIN: _compute(disks, 0),
6722
    # 128 MB are added for drbd metadata for each disk
6723
    constants.DT_DRBD8: _compute(disks, 128),
6724
    constants.DT_FILE: {},
6725
  }
6726

    
6727
  if disk_template not in req_size_dict:
6728
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6729
                                 " is unknown" %  disk_template)
6730

    
6731
  return req_size_dict[disk_template]
6732

    
6733

    
6734
def _ComputeDiskSize(disk_template, disks):
6735
  """Compute disk size requirements in the volume group
6736

6737
  """
6738
  # Required free disk space as a function of disk and swap space
6739
  req_size_dict = {
6740
    constants.DT_DISKLESS: None,
6741
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6742
    # 128 MB are added for drbd metadata for each disk
6743
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6744
    constants.DT_FILE: None,
6745
  }
6746

    
6747
  if disk_template not in req_size_dict:
6748
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6749
                                 " is unknown" %  disk_template)
6750

    
6751
  return req_size_dict[disk_template]
6752

    
6753

    
6754
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6755
  """Hypervisor parameter validation.
6756

6757
  This function abstract the hypervisor parameter validation to be
6758
  used in both instance create and instance modify.
6759

6760
  @type lu: L{LogicalUnit}
6761
  @param lu: the logical unit for which we check
6762
  @type nodenames: list
6763
  @param nodenames: the list of nodes on which we should check
6764
  @type hvname: string
6765
  @param hvname: the name of the hypervisor we should use
6766
  @type hvparams: dict
6767
  @param hvparams: the parameters which we need to check
6768
  @raise errors.OpPrereqError: if the parameters are not valid
6769

6770
  """
6771
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6772
                                                  hvname,
6773
                                                  hvparams)
6774
  for node in nodenames:
6775
    info = hvinfo[node]
6776
    if info.offline:
6777
      continue
6778
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6779

    
6780

    
6781
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6782
  """OS parameters validation.
6783

6784
  @type lu: L{LogicalUnit}
6785
  @param lu: the logical unit for which we check
6786
  @type required: boolean
6787
  @param required: whether the validation should fail if the OS is not
6788
      found
6789
  @type nodenames: list
6790
  @param nodenames: the list of nodes on which we should check
6791
  @type osname: string
6792
  @param osname: the name of the hypervisor we should use
6793
  @type osparams: dict
6794
  @param osparams: the parameters which we need to check
6795
  @raise errors.OpPrereqError: if the parameters are not valid
6796

6797
  """
6798
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6799
                                   [constants.OS_VALIDATE_PARAMETERS],
6800
                                   osparams)
6801
  for node, nres in result.items():
6802
    # we don't check for offline cases since this should be run only
6803
    # against the master node and/or an instance's nodes
6804
    nres.Raise("OS Parameters validation failed on node %s" % node)
6805
    if not nres.payload:
6806
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6807
                 osname, node)
6808

    
6809

    
6810
class LUInstanceCreate(LogicalUnit):
6811
  """Create an instance.
6812

6813
  """
6814
  HPATH = "instance-add"
6815
  HTYPE = constants.HTYPE_INSTANCE
6816
  REQ_BGL = False
6817

    
6818
  def CheckArguments(self):
6819
    """Check arguments.
6820

6821
    """
6822
    # do not require name_check to ease forward/backward compatibility
6823
    # for tools
6824
    if self.op.no_install and self.op.start:
6825
      self.LogInfo("No-installation mode selected, disabling startup")
6826
      self.op.start = False
6827
    # validate/normalize the instance name
6828
    self.op.instance_name = \
6829
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
6830

    
6831
    if self.op.ip_check and not self.op.name_check:
6832
      # TODO: make the ip check more flexible and not depend on the name check
6833
      raise errors.OpPrereqError("Cannot do ip check without a name check",
6834
                                 errors.ECODE_INVAL)
6835

    
6836
    # check nics' parameter names
6837
    for nic in self.op.nics:
6838
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6839

    
6840
    # check disks. parameter names and consistent adopt/no-adopt strategy
6841
    has_adopt = has_no_adopt = False
6842
    for disk in self.op.disks:
6843
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6844
      if "adopt" in disk:
6845
        has_adopt = True
6846
      else:
6847
        has_no_adopt = True
6848
    if has_adopt and has_no_adopt:
6849
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6850
                                 errors.ECODE_INVAL)
6851
    if has_adopt:
6852
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6853
        raise errors.OpPrereqError("Disk adoption is not supported for the"
6854
                                   " '%s' disk template" %
6855
                                   self.op.disk_template,
6856
                                   errors.ECODE_INVAL)
6857
      if self.op.iallocator is not None:
6858
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6859
                                   " iallocator script", errors.ECODE_INVAL)
6860
      if self.op.mode == constants.INSTANCE_IMPORT:
6861
        raise errors.OpPrereqError("Disk adoption not allowed for"
6862
                                   " instance import", errors.ECODE_INVAL)
6863

    
6864
    self.adopt_disks = has_adopt
6865

    
6866
    # instance name verification
6867
    if self.op.name_check:
6868
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6869
      self.op.instance_name = self.hostname1.name
6870
      # used in CheckPrereq for ip ping check
6871
      self.check_ip = self.hostname1.ip
6872
    else:
6873
      self.check_ip = None
6874

    
6875
    # file storage checks
6876
    if (self.op.file_driver and
6877
        not self.op.file_driver in constants.FILE_DRIVER):
6878
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6879
                                 self.op.file_driver, errors.ECODE_INVAL)
6880

    
6881
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6882
      raise errors.OpPrereqError("File storage directory path not absolute",
6883
                                 errors.ECODE_INVAL)
6884

    
6885
    ### Node/iallocator related checks
6886
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6887

    
6888
    if self.op.pnode is not None:
6889
      if self.op.disk_template in constants.DTS_NET_MIRROR:
6890
        if self.op.snode is None:
6891
          raise errors.OpPrereqError("The networked disk templates need"
6892
                                     " a mirror node", errors.ECODE_INVAL)
6893
      elif self.op.snode:
6894
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6895
                        " template")
6896
        self.op.snode = None
6897

    
6898
    self._cds = _GetClusterDomainSecret()
6899

    
6900
    if self.op.mode == constants.INSTANCE_IMPORT:
6901
      # On import force_variant must be True, because if we forced it at
6902
      # initial install, our only chance when importing it back is that it
6903
      # works again!
6904
      self.op.force_variant = True
6905

    
6906
      if self.op.no_install:
6907
        self.LogInfo("No-installation mode has no effect during import")
6908

    
6909
    elif self.op.mode == constants.INSTANCE_CREATE:
6910
      if self.op.os_type is None:
6911
        raise errors.OpPrereqError("No guest OS specified",
6912
                                   errors.ECODE_INVAL)
6913
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6914
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6915
                                   " installation" % self.op.os_type,
6916
                                   errors.ECODE_STATE)
6917
      if self.op.disk_template is None:
6918
        raise errors.OpPrereqError("No disk template specified",
6919
                                   errors.ECODE_INVAL)
6920

    
6921
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6922
      # Check handshake to ensure both clusters have the same domain secret
6923
      src_handshake = self.op.source_handshake
6924
      if not src_handshake:
6925
        raise errors.OpPrereqError("Missing source handshake",
6926
                                   errors.ECODE_INVAL)
6927

    
6928
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6929
                                                           src_handshake)
6930
      if errmsg:
6931
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6932
                                   errors.ECODE_INVAL)
6933

    
6934
      # Load and check source CA
6935
      self.source_x509_ca_pem = self.op.source_x509_ca
6936
      if not self.source_x509_ca_pem:
6937
        raise errors.OpPrereqError("Missing source X509 CA",
6938
                                   errors.ECODE_INVAL)
6939

    
6940
      try:
6941
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6942
                                                    self._cds)
6943
      except OpenSSL.crypto.Error, err:
6944
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6945
                                   (err, ), errors.ECODE_INVAL)
6946

    
6947
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6948
      if errcode is not None:
6949
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6950
                                   errors.ECODE_INVAL)
6951

    
6952
      self.source_x509_ca = cert
6953

    
6954
      src_instance_name = self.op.source_instance_name
6955
      if not src_instance_name:
6956
        raise errors.OpPrereqError("Missing source instance name",
6957
                                   errors.ECODE_INVAL)
6958

    
6959
      self.source_instance_name = \
6960
          netutils.GetHostname(name=src_instance_name).name
6961

    
6962
    else:
6963
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6964
                                 self.op.mode, errors.ECODE_INVAL)
6965

    
6966
  def ExpandNames(self):
6967
    """ExpandNames for CreateInstance.
6968

6969
    Figure out the right locks for instance creation.
6970

6971
    """
6972
    self.needed_locks = {}
6973

    
6974
    instance_name = self.op.instance_name
6975
    # this is just a preventive check, but someone might still add this
6976
    # instance in the meantime, and creation will fail at lock-add time
6977
    if instance_name in self.cfg.GetInstanceList():
6978
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6979
                                 instance_name, errors.ECODE_EXISTS)
6980

    
6981
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6982

    
6983
    if self.op.iallocator:
6984
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6985
    else:
6986
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6987
      nodelist = [self.op.pnode]
6988
      if self.op.snode is not None:
6989
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6990
        nodelist.append(self.op.snode)
6991
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6992

    
6993
    # in case of import lock the source node too
6994
    if self.op.mode == constants.INSTANCE_IMPORT:
6995
      src_node = self.op.src_node
6996
      src_path = self.op.src_path
6997

    
6998
      if src_path is None:
6999
        self.op.src_path = src_path = self.op.instance_name
7000

    
7001
      if src_node is None:
7002
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7003
        self.op.src_node = None
7004
        if os.path.isabs(src_path):
7005
          raise errors.OpPrereqError("Importing an instance from an absolute"
7006
                                     " path requires a source node option.",
7007
                                     errors.ECODE_INVAL)
7008
      else:
7009
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7010
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7011
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
7012
        if not os.path.isabs(src_path):
7013
          self.op.src_path = src_path = \
7014
            utils.PathJoin(constants.EXPORT_DIR, src_path)
7015

    
7016
  def _RunAllocator(self):
7017
    """Run the allocator based on input opcode.
7018

7019
    """
7020
    nics = [n.ToDict() for n in self.nics]
7021
    ial = IAllocator(self.cfg, self.rpc,
7022
                     mode=constants.IALLOCATOR_MODE_ALLOC,
7023
                     name=self.op.instance_name,
7024
                     disk_template=self.op.disk_template,
7025
                     tags=[],
7026
                     os=self.op.os_type,
7027
                     vcpus=self.be_full[constants.BE_VCPUS],
7028
                     mem_size=self.be_full[constants.BE_MEMORY],
7029
                     disks=self.disks,
7030
                     nics=nics,
7031
                     hypervisor=self.op.hypervisor,
7032
                     )
7033

    
7034
    ial.Run(self.op.iallocator)
7035

    
7036
    if not ial.success:
7037
      raise errors.OpPrereqError("Can't compute nodes using"
7038
                                 " iallocator '%s': %s" %
7039
                                 (self.op.iallocator, ial.info),
7040
                                 errors.ECODE_NORES)
7041
    if len(ial.result) != ial.required_nodes:
7042
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7043
                                 " of nodes (%s), required %s" %
7044
                                 (self.op.iallocator, len(ial.result),
7045
                                  ial.required_nodes), errors.ECODE_FAULT)
7046
    self.op.pnode = ial.result[0]
7047
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7048
                 self.op.instance_name, self.op.iallocator,
7049
                 utils.CommaJoin(ial.result))
7050
    if ial.required_nodes == 2:
7051
      self.op.snode = ial.result[1]
7052

    
7053
  def BuildHooksEnv(self):
7054
    """Build hooks env.
7055

7056
    This runs on master, primary and secondary nodes of the instance.
7057

7058
    """
7059
    env = {
7060
      "ADD_MODE": self.op.mode,
7061
      }
7062
    if self.op.mode == constants.INSTANCE_IMPORT:
7063
      env["SRC_NODE"] = self.op.src_node
7064
      env["SRC_PATH"] = self.op.src_path
7065
      env["SRC_IMAGES"] = self.src_images
7066

    
7067
    env.update(_BuildInstanceHookEnv(
7068
      name=self.op.instance_name,
7069
      primary_node=self.op.pnode,
7070
      secondary_nodes=self.secondaries,
7071
      status=self.op.start,
7072
      os_type=self.op.os_type,
7073
      memory=self.be_full[constants.BE_MEMORY],
7074
      vcpus=self.be_full[constants.BE_VCPUS],
7075
      nics=_NICListToTuple(self, self.nics),
7076
      disk_template=self.op.disk_template,
7077
      disks=[(d["size"], d["mode"]) for d in self.disks],
7078
      bep=self.be_full,
7079
      hvp=self.hv_full,
7080
      hypervisor_name=self.op.hypervisor,
7081
    ))
7082

    
7083
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7084
          self.secondaries)
7085
    return env, nl, nl
7086

    
7087
  def _ReadExportInfo(self):
7088
    """Reads the export information from disk.
7089

7090
    It will override the opcode source node and path with the actual
7091
    information, if these two were not specified before.
7092

7093
    @return: the export information
7094

7095
    """
7096
    assert self.op.mode == constants.INSTANCE_IMPORT
7097

    
7098
    src_node = self.op.src_node
7099
    src_path = self.op.src_path
7100

    
7101
    if src_node is None:
7102
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7103
      exp_list = self.rpc.call_export_list(locked_nodes)
7104
      found = False
7105
      for node in exp_list:
7106
        if exp_list[node].fail_msg:
7107
          continue
7108
        if src_path in exp_list[node].payload:
7109
          found = True
7110
          self.op.src_node = src_node = node
7111
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7112
                                                       src_path)
7113
          break
7114
      if not found:
7115
        raise errors.OpPrereqError("No export found for relative path %s" %
7116
                                    src_path, errors.ECODE_INVAL)
7117

    
7118
    _CheckNodeOnline(self, src_node)
7119
    result = self.rpc.call_export_info(src_node, src_path)
7120
    result.Raise("No export or invalid export found in dir %s" % src_path)
7121

    
7122
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7123
    if not export_info.has_section(constants.INISECT_EXP):
7124
      raise errors.ProgrammerError("Corrupted export config",
7125
                                   errors.ECODE_ENVIRON)
7126

    
7127
    ei_version = export_info.get(constants.INISECT_EXP, "version")
7128
    if (int(ei_version) != constants.EXPORT_VERSION):
7129
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7130
                                 (ei_version, constants.EXPORT_VERSION),
7131
                                 errors.ECODE_ENVIRON)
7132
    return export_info
7133

    
7134
  def _ReadExportParams(self, einfo):
7135
    """Use export parameters as defaults.
7136

7137
    In case the opcode doesn't specify (as in override) some instance
7138
    parameters, then try to use them from the export information, if
7139
    that declares them.
7140

7141
    """
7142
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7143

    
7144
    if self.op.disk_template is None:
7145
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
7146
        self.op.disk_template = einfo.get(constants.INISECT_INS,
7147
                                          "disk_template")
7148
      else:
7149
        raise errors.OpPrereqError("No disk template specified and the export"
7150
                                   " is missing the disk_template information",
7151
                                   errors.ECODE_INVAL)
7152

    
7153
    if not self.op.disks:
7154
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7155
        disks = []
7156
        # TODO: import the disk iv_name too
7157
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7158
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7159
          disks.append({"size": disk_sz})
7160
        self.op.disks = disks
7161
      else:
7162
        raise errors.OpPrereqError("No disk info specified and the export"
7163
                                   " is missing the disk information",
7164
                                   errors.ECODE_INVAL)
7165

    
7166
    if (not self.op.nics and
7167
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7168
      nics = []
7169
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7170
        ndict = {}
7171
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7172
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7173
          ndict[name] = v
7174
        nics.append(ndict)
7175
      self.op.nics = nics
7176

    
7177
    if (self.op.hypervisor is None and
7178
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7179
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7180
    if einfo.has_section(constants.INISECT_HYP):
7181
      # use the export parameters but do not override the ones
7182
      # specified by the user
7183
      for name, value in einfo.items(constants.INISECT_HYP):
7184
        if name not in self.op.hvparams:
7185
          self.op.hvparams[name] = value
7186

    
7187
    if einfo.has_section(constants.INISECT_BEP):
7188
      # use the parameters, without overriding
7189
      for name, value in einfo.items(constants.INISECT_BEP):
7190
        if name not in self.op.beparams:
7191
          self.op.beparams[name] = value
7192
    else:
7193
      # try to read the parameters old style, from the main section
7194
      for name in constants.BES_PARAMETERS:
7195
        if (name not in self.op.beparams and
7196
            einfo.has_option(constants.INISECT_INS, name)):
7197
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7198

    
7199
    if einfo.has_section(constants.INISECT_OSP):
7200
      # use the parameters, without overriding
7201
      for name, value in einfo.items(constants.INISECT_OSP):
7202
        if name not in self.op.osparams:
7203
          self.op.osparams[name] = value
7204

    
7205
  def _RevertToDefaults(self, cluster):
7206
    """Revert the instance parameters to the default values.
7207

7208
    """
7209
    # hvparams
7210
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7211
    for name in self.op.hvparams.keys():
7212
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7213
        del self.op.hvparams[name]
7214
    # beparams
7215
    be_defs = cluster.SimpleFillBE({})
7216
    for name in self.op.beparams.keys():
7217
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7218
        del self.op.beparams[name]
7219
    # nic params
7220
    nic_defs = cluster.SimpleFillNIC({})
7221
    for nic in self.op.nics:
7222
      for name in constants.NICS_PARAMETERS:
7223
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7224
          del nic[name]
7225
    # osparams
7226
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7227
    for name in self.op.osparams.keys():
7228
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7229
        del self.op.osparams[name]
7230

    
7231
  def CheckPrereq(self):
7232
    """Check prerequisites.
7233

7234
    """
7235
    if self.op.mode == constants.INSTANCE_IMPORT:
7236
      export_info = self._ReadExportInfo()
7237
      self._ReadExportParams(export_info)
7238

    
7239
    if (not self.cfg.GetVGName() and
7240
        self.op.disk_template not in constants.DTS_NOT_LVM):
7241
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7242
                                 " instances", errors.ECODE_STATE)
7243

    
7244
    if self.op.hypervisor is None:
7245
      self.op.hypervisor = self.cfg.GetHypervisorType()
7246

    
7247
    cluster = self.cfg.GetClusterInfo()
7248
    enabled_hvs = cluster.enabled_hypervisors
7249
    if self.op.hypervisor not in enabled_hvs:
7250
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7251
                                 " cluster (%s)" % (self.op.hypervisor,
7252
                                  ",".join(enabled_hvs)),
7253
                                 errors.ECODE_STATE)
7254

    
7255
    # check hypervisor parameter syntax (locally)
7256
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7257
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7258
                                      self.op.hvparams)
7259
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7260
    hv_type.CheckParameterSyntax(filled_hvp)
7261
    self.hv_full = filled_hvp
7262
    # check that we don't specify global parameters on an instance
7263
    _CheckGlobalHvParams(self.op.hvparams)
7264

    
7265
    # fill and remember the beparams dict
7266
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7267
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7268

    
7269
    # build os parameters
7270
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7271

    
7272
    # now that hvp/bep are in final format, let's reset to defaults,
7273
    # if told to do so
7274
    if self.op.identify_defaults:
7275
      self._RevertToDefaults(cluster)
7276

    
7277
    # NIC buildup
7278
    self.nics = []
7279
    for idx, nic in enumerate(self.op.nics):
7280
      nic_mode_req = nic.get("mode", None)
7281
      nic_mode = nic_mode_req
7282
      if nic_mode is None:
7283
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7284

    
7285
      # in routed mode, for the first nic, the default ip is 'auto'
7286
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7287
        default_ip_mode = constants.VALUE_AUTO
7288
      else:
7289
        default_ip_mode = constants.VALUE_NONE
7290

    
7291
      # ip validity checks
7292
      ip = nic.get("ip", default_ip_mode)
7293
      if ip is None or ip.lower() == constants.VALUE_NONE:
7294
        nic_ip = None
7295
      elif ip.lower() == constants.VALUE_AUTO:
7296
        if not self.op.name_check:
7297
          raise errors.OpPrereqError("IP address set to auto but name checks"
7298
                                     " have been skipped",
7299
                                     errors.ECODE_INVAL)
7300
        nic_ip = self.hostname1.ip
7301
      else:
7302
        if not netutils.IPAddress.IsValid(ip):
7303
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7304
                                     errors.ECODE_INVAL)
7305
        nic_ip = ip
7306

    
7307
      # TODO: check the ip address for uniqueness
7308
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7309
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7310
                                   errors.ECODE_INVAL)
7311

    
7312
      # MAC address verification
7313
      mac = nic.get("mac", constants.VALUE_AUTO)
7314
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7315
        mac = utils.NormalizeAndValidateMac(mac)
7316

    
7317
        try:
7318
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7319
        except errors.ReservationError:
7320
          raise errors.OpPrereqError("MAC address %s already in use"
7321
                                     " in cluster" % mac,
7322
                                     errors.ECODE_NOTUNIQUE)
7323

    
7324
      # bridge verification
7325
      bridge = nic.get("bridge", None)
7326
      link = nic.get("link", None)
7327
      if bridge and link:
7328
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7329
                                   " at the same time", errors.ECODE_INVAL)
7330
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7331
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7332
                                   errors.ECODE_INVAL)
7333
      elif bridge:
7334
        link = bridge
7335

    
7336
      nicparams = {}
7337
      if nic_mode_req:
7338
        nicparams[constants.NIC_MODE] = nic_mode_req
7339
      if link:
7340
        nicparams[constants.NIC_LINK] = link
7341

    
7342
      check_params = cluster.SimpleFillNIC(nicparams)
7343
      objects.NIC.CheckParameterSyntax(check_params)
7344
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7345

    
7346
    # disk checks/pre-build
7347
    self.disks = []
7348
    for disk in self.op.disks:
7349
      mode = disk.get("mode", constants.DISK_RDWR)
7350
      if mode not in constants.DISK_ACCESS_SET:
7351
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7352
                                   mode, errors.ECODE_INVAL)
7353
      size = disk.get("size", None)
7354
      if size is None:
7355
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7356
      try:
7357
        size = int(size)
7358
      except (TypeError, ValueError):
7359
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7360
                                   errors.ECODE_INVAL)
7361
      vg = disk.get("vg", self.cfg.GetVGName())
7362
      new_disk = {"size": size, "mode": mode, "vg": vg}
7363
      if "adopt" in disk:
7364
        new_disk["adopt"] = disk["adopt"]
7365
      self.disks.append(new_disk)
7366

    
7367
    if self.op.mode == constants.INSTANCE_IMPORT:
7368

    
7369
      # Check that the new instance doesn't have less disks than the export
7370
      instance_disks = len(self.disks)
7371
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7372
      if instance_disks < export_disks:
7373
        raise errors.OpPrereqError("Not enough disks to import."
7374
                                   " (instance: %d, export: %d)" %
7375
                                   (instance_disks, export_disks),
7376
                                   errors.ECODE_INVAL)
7377

    
7378
      disk_images = []
7379
      for idx in range(export_disks):
7380
        option = 'disk%d_dump' % idx
7381
        if export_info.has_option(constants.INISECT_INS, option):
7382
          # FIXME: are the old os-es, disk sizes, etc. useful?
7383
          export_name = export_info.get(constants.INISECT_INS, option)
7384
          image = utils.PathJoin(self.op.src_path, export_name)
7385
          disk_images.append(image)
7386
        else:
7387
          disk_images.append(False)
7388

    
7389
      self.src_images = disk_images
7390

    
7391
      old_name = export_info.get(constants.INISECT_INS, 'name')
7392
      try:
7393
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7394
      except (TypeError, ValueError), err:
7395
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7396
                                   " an integer: %s" % str(err),
7397
                                   errors.ECODE_STATE)
7398
      if self.op.instance_name == old_name:
7399
        for idx, nic in enumerate(self.nics):
7400
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7401
            nic_mac_ini = 'nic%d_mac' % idx
7402
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7403

    
7404
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7405

    
7406
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7407
    if self.op.ip_check:
7408
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7409
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7410
                                   (self.check_ip, self.op.instance_name),
7411
                                   errors.ECODE_NOTUNIQUE)
7412

    
7413
    #### mac address generation
7414
    # By generating here the mac address both the allocator and the hooks get
7415
    # the real final mac address rather than the 'auto' or 'generate' value.
7416
    # There is a race condition between the generation and the instance object
7417
    # creation, which means that we know the mac is valid now, but we're not
7418
    # sure it will be when we actually add the instance. If things go bad
7419
    # adding the instance will abort because of a duplicate mac, and the
7420
    # creation job will fail.
7421
    for nic in self.nics:
7422
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7423
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7424

    
7425
    #### allocator run
7426

    
7427
    if self.op.iallocator is not None:
7428
      self._RunAllocator()
7429

    
7430
    #### node related checks
7431

    
7432
    # check primary node
7433
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7434
    assert self.pnode is not None, \
7435
      "Cannot retrieve locked node %s" % self.op.pnode
7436
    if pnode.offline:
7437
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7438
                                 pnode.name, errors.ECODE_STATE)
7439
    if pnode.drained:
7440
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7441
                                 pnode.name, errors.ECODE_STATE)
7442
    if not pnode.vm_capable:
7443
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7444
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
7445

    
7446
    self.secondaries = []
7447

    
7448
    # mirror node verification
7449
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7450
      if self.op.snode == pnode.name:
7451
        raise errors.OpPrereqError("The secondary node cannot be the"
7452
                                   " primary node.", errors.ECODE_INVAL)
7453
      _CheckNodeOnline(self, self.op.snode)
7454
      _CheckNodeNotDrained(self, self.op.snode)
7455
      _CheckNodeVmCapable(self, self.op.snode)
7456
      self.secondaries.append(self.op.snode)
7457

    
7458
    nodenames = [pnode.name] + self.secondaries
7459

    
7460
    if not self.adopt_disks:
7461
      # Check lv size requirements, if not adopting
7462
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7463
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7464

    
7465
    else: # instead, we must check the adoption data
7466
      all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7467
      if len(all_lvs) != len(self.disks):
7468
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7469
                                   errors.ECODE_INVAL)
7470
      for lv_name in all_lvs:
7471
        try:
7472
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7473
          # to ReserveLV uses the same syntax
7474
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7475
        except errors.ReservationError:
7476
          raise errors.OpPrereqError("LV named %s used by another instance" %
7477
                                     lv_name, errors.ECODE_NOTUNIQUE)
7478

    
7479
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
7480
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7481

    
7482
      node_lvs = self.rpc.call_lv_list([pnode.name],
7483
                                       vg_names.payload.keys())[pnode.name]
7484
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7485
      node_lvs = node_lvs.payload
7486

    
7487
      delta = all_lvs.difference(node_lvs.keys())
7488
      if delta:
7489
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7490
                                   utils.CommaJoin(delta),
7491
                                   errors.ECODE_INVAL)
7492
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7493
      if online_lvs:
7494
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7495
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7496
                                   errors.ECODE_STATE)
7497
      # update the size of disk based on what is found
7498
      for dsk in self.disks:
7499
        dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7500

    
7501
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7502

    
7503
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7504
    # check OS parameters (remotely)
7505
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7506

    
7507
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7508

    
7509
    # memory check on primary node
7510
    if self.op.start:
7511
      _CheckNodeFreeMemory(self, self.pnode.name,
7512
                           "creating instance %s" % self.op.instance_name,
7513
                           self.be_full[constants.BE_MEMORY],
7514
                           self.op.hypervisor)
7515

    
7516
    self.dry_run_result = list(nodenames)
7517

    
7518
  def Exec(self, feedback_fn):
7519
    """Create and add the instance to the cluster.
7520

7521
    """
7522
    instance = self.op.instance_name
7523
    pnode_name = self.pnode.name
7524

    
7525
    ht_kind = self.op.hypervisor
7526
    if ht_kind in constants.HTS_REQ_PORT:
7527
      network_port = self.cfg.AllocatePort()
7528
    else:
7529
      network_port = None
7530

    
7531
    if constants.ENABLE_FILE_STORAGE:
7532
      # this is needed because os.path.join does not accept None arguments
7533
      if self.op.file_storage_dir is None:
7534
        string_file_storage_dir = ""
7535
      else:
7536
        string_file_storage_dir = self.op.file_storage_dir
7537

    
7538
      # build the full file storage dir path
7539
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7540
                                        string_file_storage_dir, instance)
7541
    else:
7542
      file_storage_dir = ""
7543

    
7544
    disks = _GenerateDiskTemplate(self,
7545
                                  self.op.disk_template,
7546
                                  instance, pnode_name,
7547
                                  self.secondaries,
7548
                                  self.disks,
7549
                                  file_storage_dir,
7550
                                  self.op.file_driver,
7551
                                  0,
7552
                                  feedback_fn)
7553

    
7554
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7555
                            primary_node=pnode_name,
7556
                            nics=self.nics, disks=disks,
7557
                            disk_template=self.op.disk_template,
7558
                            admin_up=False,
7559
                            network_port=network_port,
7560
                            beparams=self.op.beparams,
7561
                            hvparams=self.op.hvparams,
7562
                            hypervisor=self.op.hypervisor,
7563
                            osparams=self.op.osparams,
7564
                            )
7565

    
7566
    if self.adopt_disks:
7567
      # rename LVs to the newly-generated names; we need to construct
7568
      # 'fake' LV disks with the old data, plus the new unique_id
7569
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7570
      rename_to = []
7571
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7572
        rename_to.append(t_dsk.logical_id)
7573
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7574
        self.cfg.SetDiskID(t_dsk, pnode_name)
7575
      result = self.rpc.call_blockdev_rename(pnode_name,
7576
                                             zip(tmp_disks, rename_to))
7577
      result.Raise("Failed to rename adoped LVs")
7578
    else:
7579
      feedback_fn("* creating instance disks...")
7580
      try:
7581
        _CreateDisks(self, iobj)
7582
      except errors.OpExecError:
7583
        self.LogWarning("Device creation failed, reverting...")
7584
        try:
7585
          _RemoveDisks(self, iobj)
7586
        finally:
7587
          self.cfg.ReleaseDRBDMinors(instance)
7588
          raise
7589

    
7590
      if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7591
        feedback_fn("* wiping instance disks...")
7592
        try:
7593
          _WipeDisks(self, iobj)
7594
        except errors.OpExecError:
7595
          self.LogWarning("Device wiping failed, reverting...")
7596
          try:
7597
            _RemoveDisks(self, iobj)
7598
          finally:
7599
            self.cfg.ReleaseDRBDMinors(instance)
7600
            raise
7601

    
7602
    feedback_fn("adding instance %s to cluster config" % instance)
7603

    
7604
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7605

    
7606
    # Declare that we don't want to remove the instance lock anymore, as we've
7607
    # added the instance to the config
7608
    del self.remove_locks[locking.LEVEL_INSTANCE]
7609
    # Unlock all the nodes
7610
    if self.op.mode == constants.INSTANCE_IMPORT:
7611
      nodes_keep = [self.op.src_node]
7612
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7613
                       if node != self.op.src_node]
7614
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7615
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7616
    else:
7617
      self.context.glm.release(locking.LEVEL_NODE)
7618
      del self.acquired_locks[locking.LEVEL_NODE]
7619

    
7620
    if self.op.wait_for_sync:
7621
      disk_abort = not _WaitForSync(self, iobj)
7622
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7623
      # make sure the disks are not degraded (still sync-ing is ok)
7624
      time.sleep(15)
7625
      feedback_fn("* checking mirrors status")
7626
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7627
    else:
7628
      disk_abort = False
7629

    
7630
    if disk_abort:
7631
      _RemoveDisks(self, iobj)
7632
      self.cfg.RemoveInstance(iobj.name)
7633
      # Make sure the instance lock gets removed
7634
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7635
      raise errors.OpExecError("There are some degraded disks for"
7636
                               " this instance")
7637

    
7638
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7639
      if self.op.mode == constants.INSTANCE_CREATE:
7640
        if not self.op.no_install:
7641
          feedback_fn("* running the instance OS create scripts...")
7642
          # FIXME: pass debug option from opcode to backend
7643
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7644
                                                 self.op.debug_level)
7645
          result.Raise("Could not add os for instance %s"
7646
                       " on node %s" % (instance, pnode_name))
7647

    
7648
      elif self.op.mode == constants.INSTANCE_IMPORT:
7649
        feedback_fn("* running the instance OS import scripts...")
7650

    
7651
        transfers = []
7652

    
7653
        for idx, image in enumerate(self.src_images):
7654
          if not image:
7655
            continue
7656

    
7657
          # FIXME: pass debug option from opcode to backend
7658
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7659
                                             constants.IEIO_FILE, (image, ),
7660
                                             constants.IEIO_SCRIPT,
7661
                                             (iobj.disks[idx], idx),
7662
                                             None)
7663
          transfers.append(dt)
7664

    
7665
        import_result = \
7666
          masterd.instance.TransferInstanceData(self, feedback_fn,
7667
                                                self.op.src_node, pnode_name,
7668
                                                self.pnode.secondary_ip,
7669
                                                iobj, transfers)
7670
        if not compat.all(import_result):
7671
          self.LogWarning("Some disks for instance %s on node %s were not"
7672
                          " imported successfully" % (instance, pnode_name))
7673

    
7674
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7675
        feedback_fn("* preparing remote import...")
7676
        # The source cluster will stop the instance before attempting to make a
7677
        # connection. In some cases stopping an instance can take a long time,
7678
        # hence the shutdown timeout is added to the connection timeout.
7679
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7680
                           self.op.source_shutdown_timeout)
7681
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7682

    
7683
        assert iobj.primary_node == self.pnode.name
7684
        disk_results = \
7685
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7686
                                        self.source_x509_ca,
7687
                                        self._cds, timeouts)
7688
        if not compat.all(disk_results):
7689
          # TODO: Should the instance still be started, even if some disks
7690
          # failed to import (valid for local imports, too)?
7691
          self.LogWarning("Some disks for instance %s on node %s were not"
7692
                          " imported successfully" % (instance, pnode_name))
7693

    
7694
        # Run rename script on newly imported instance
7695
        assert iobj.name == instance
7696
        feedback_fn("Running rename script for %s" % instance)
7697
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7698
                                                   self.source_instance_name,
7699
                                                   self.op.debug_level)
7700
        if result.fail_msg:
7701
          self.LogWarning("Failed to run rename script for %s on node"
7702
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7703

    
7704
      else:
7705
        # also checked in the prereq part
7706
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7707
                                     % self.op.mode)
7708

    
7709
    if self.op.start:
7710
      iobj.admin_up = True
7711
      self.cfg.Update(iobj, feedback_fn)
7712
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7713
      feedback_fn("* starting instance...")
7714
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7715
      result.Raise("Could not start instance")
7716

    
7717
    return list(iobj.all_nodes)
7718

    
7719

    
7720
class LUInstanceConsole(NoHooksLU):
7721
  """Connect to an instance's console.
7722

7723
  This is somewhat special in that it returns the command line that
7724
  you need to run on the master node in order to connect to the
7725
  console.
7726

7727
  """
7728
  REQ_BGL = False
7729

    
7730
  def ExpandNames(self):
7731
    self._ExpandAndLockInstance()
7732

    
7733
  def CheckPrereq(self):
7734
    """Check prerequisites.
7735

7736
    This checks that the instance is in the cluster.
7737

7738
    """
7739
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7740
    assert self.instance is not None, \
7741
      "Cannot retrieve locked instance %s" % self.op.instance_name
7742
    _CheckNodeOnline(self, self.instance.primary_node)
7743

    
7744
  def Exec(self, feedback_fn):
7745
    """Connect to the console of an instance
7746

7747
    """
7748
    instance = self.instance
7749
    node = instance.primary_node
7750

    
7751
    node_insts = self.rpc.call_instance_list([node],
7752
                                             [instance.hypervisor])[node]
7753
    node_insts.Raise("Can't get node information from %s" % node)
7754

    
7755
    if instance.name not in node_insts.payload:
7756
      if instance.admin_up:
7757
        state = "ERROR_down"
7758
      else:
7759
        state = "ADMIN_down"
7760
      raise errors.OpExecError("Instance %s is not running (state %s)" %
7761
                               (instance.name, state))
7762

    
7763
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7764

    
7765
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7766
    cluster = self.cfg.GetClusterInfo()
7767
    # beparams and hvparams are passed separately, to avoid editing the
7768
    # instance and then saving the defaults in the instance itself.
7769
    hvparams = cluster.FillHV(instance)
7770
    beparams = cluster.FillBE(instance)
7771
    console = hyper.GetInstanceConsole(instance, hvparams, beparams)
7772

    
7773
    assert console.instance == instance.name
7774
    assert console.Validate()
7775

    
7776
    return console.ToDict()
7777

    
7778

    
7779
class LUInstanceReplaceDisks(LogicalUnit):
7780
  """Replace the disks of an instance.
7781

7782
  """
7783
  HPATH = "mirrors-replace"
7784
  HTYPE = constants.HTYPE_INSTANCE
7785
  REQ_BGL = False
7786

    
7787
  def CheckArguments(self):
7788
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7789
                                  self.op.iallocator)
7790

    
7791
  def ExpandNames(self):
7792
    self._ExpandAndLockInstance()
7793

    
7794
    if self.op.iallocator is not None:
7795
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7796

    
7797
    elif self.op.remote_node is not None:
7798
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7799
      self.op.remote_node = remote_node
7800

    
7801
      # Warning: do not remove the locking of the new secondary here
7802
      # unless DRBD8.AddChildren is changed to work in parallel;
7803
      # currently it doesn't since parallel invocations of
7804
      # FindUnusedMinor will conflict
7805
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7806
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7807

    
7808
    else:
7809
      self.needed_locks[locking.LEVEL_NODE] = []
7810
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7811

    
7812
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7813
                                   self.op.iallocator, self.op.remote_node,
7814
                                   self.op.disks, False, self.op.early_release)
7815

    
7816
    self.tasklets = [self.replacer]
7817

    
7818
  def DeclareLocks(self, level):
7819
    # If we're not already locking all nodes in the set we have to declare the
7820
    # instance's primary/secondary nodes.
7821
    if (level == locking.LEVEL_NODE and
7822
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7823
      self._LockInstancesNodes()
7824

    
7825
  def BuildHooksEnv(self):
7826
    """Build hooks env.
7827

7828
    This runs on the master, the primary and all the secondaries.
7829

7830
    """
7831
    instance = self.replacer.instance
7832
    env = {
7833
      "MODE": self.op.mode,
7834
      "NEW_SECONDARY": self.op.remote_node,
7835
      "OLD_SECONDARY": instance.secondary_nodes[0],
7836
      }
7837
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7838
    nl = [
7839
      self.cfg.GetMasterNode(),
7840
      instance.primary_node,
7841
      ]
7842
    if self.op.remote_node is not None:
7843
      nl.append(self.op.remote_node)
7844
    return env, nl, nl
7845

    
7846

    
7847
class TLReplaceDisks(Tasklet):
7848
  """Replaces disks for an instance.
7849

7850
  Note: Locking is not within the scope of this class.
7851

7852
  """
7853
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7854
               disks, delay_iallocator, early_release):
7855
    """Initializes this class.
7856

7857
    """
7858
    Tasklet.__init__(self, lu)
7859

    
7860
    # Parameters
7861
    self.instance_name = instance_name
7862
    self.mode = mode
7863
    self.iallocator_name = iallocator_name
7864
    self.remote_node = remote_node
7865
    self.disks = disks
7866
    self.delay_iallocator = delay_iallocator
7867
    self.early_release = early_release
7868

    
7869
    # Runtime data
7870
    self.instance = None
7871
    self.new_node = None
7872
    self.target_node = None
7873
    self.other_node = None
7874
    self.remote_node_info = None
7875
    self.node_secondary_ip = None
7876

    
7877
  @staticmethod
7878
  def CheckArguments(mode, remote_node, iallocator):
7879
    """Helper function for users of this class.
7880

7881
    """
7882
    # check for valid parameter combination
7883
    if mode == constants.REPLACE_DISK_CHG:
7884
      if remote_node is None and iallocator is None:
7885
        raise errors.OpPrereqError("When changing the secondary either an"
7886
                                   " iallocator script must be used or the"
7887
                                   " new node given", errors.ECODE_INVAL)
7888

    
7889
      if remote_node is not None and iallocator is not None:
7890
        raise errors.OpPrereqError("Give either the iallocator or the new"
7891
                                   " secondary, not both", errors.ECODE_INVAL)
7892

    
7893
    elif remote_node is not None or iallocator is not None:
7894
      # Not replacing the secondary
7895
      raise errors.OpPrereqError("The iallocator and new node options can"
7896
                                 " only be used when changing the"
7897
                                 " secondary node", errors.ECODE_INVAL)
7898

    
7899
  @staticmethod
7900
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7901
    """Compute a new secondary node using an IAllocator.
7902

7903
    """
7904
    ial = IAllocator(lu.cfg, lu.rpc,
7905
                     mode=constants.IALLOCATOR_MODE_RELOC,
7906
                     name=instance_name,
7907
                     relocate_from=relocate_from)
7908

    
7909
    ial.Run(iallocator_name)
7910

    
7911
    if not ial.success:
7912
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7913
                                 " %s" % (iallocator_name, ial.info),
7914
                                 errors.ECODE_NORES)
7915

    
7916
    if len(ial.result) != ial.required_nodes:
7917
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7918
                                 " of nodes (%s), required %s" %
7919
                                 (iallocator_name,
7920
                                  len(ial.result), ial.required_nodes),
7921
                                 errors.ECODE_FAULT)
7922

    
7923
    remote_node_name = ial.result[0]
7924

    
7925
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7926
               instance_name, remote_node_name)
7927

    
7928
    return remote_node_name
7929

    
7930
  def _FindFaultyDisks(self, node_name):
7931
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7932
                                    node_name, True)
7933

    
7934
  def CheckPrereq(self):
7935
    """Check prerequisites.
7936

7937
    This checks that the instance is in the cluster.
7938

7939
    """
7940
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7941
    assert instance is not None, \
7942
      "Cannot retrieve locked instance %s" % self.instance_name
7943

    
7944
    if instance.disk_template != constants.DT_DRBD8:
7945
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7946
                                 " instances", errors.ECODE_INVAL)
7947

    
7948
    if len(instance.secondary_nodes) != 1:
7949
      raise errors.OpPrereqError("The instance has a strange layout,"
7950
                                 " expected one secondary but found %d" %
7951
                                 len(instance.secondary_nodes),
7952
                                 errors.ECODE_FAULT)
7953

    
7954
    if not self.delay_iallocator:
7955
      self._CheckPrereq2()
7956

    
7957
  def _CheckPrereq2(self):
7958
    """Check prerequisites, second part.
7959

7960
    This function should always be part of CheckPrereq. It was separated and is
7961
    now called from Exec because during node evacuation iallocator was only
7962
    called with an unmodified cluster model, not taking planned changes into
7963
    account.
7964

7965
    """
7966
    instance = self.instance
7967
    secondary_node = instance.secondary_nodes[0]
7968

    
7969
    if self.iallocator_name is None:
7970
      remote_node = self.remote_node
7971
    else:
7972
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7973
                                       instance.name, instance.secondary_nodes)
7974

    
7975
    if remote_node is not None:
7976
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7977
      assert self.remote_node_info is not None, \
7978
        "Cannot retrieve locked node %s" % remote_node
7979
    else:
7980
      self.remote_node_info = None
7981

    
7982
    if remote_node == self.instance.primary_node:
7983
      raise errors.OpPrereqError("The specified node is the primary node of"
7984
                                 " the instance.", errors.ECODE_INVAL)
7985

    
7986
    if remote_node == secondary_node:
7987
      raise errors.OpPrereqError("The specified node is already the"
7988
                                 " secondary node of the instance.",
7989
                                 errors.ECODE_INVAL)
7990

    
7991
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7992
                                    constants.REPLACE_DISK_CHG):
7993
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7994
                                 errors.ECODE_INVAL)
7995

    
7996
    if self.mode == constants.REPLACE_DISK_AUTO:
7997
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7998
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7999

    
8000
      if faulty_primary and faulty_secondary:
8001
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8002
                                   " one node and can not be repaired"
8003
                                   " automatically" % self.instance_name,
8004
                                   errors.ECODE_STATE)
8005

    
8006
      if faulty_primary:
8007
        self.disks = faulty_primary
8008
        self.target_node = instance.primary_node
8009
        self.other_node = secondary_node
8010
        check_nodes = [self.target_node, self.other_node]
8011
      elif faulty_secondary:
8012
        self.disks = faulty_secondary
8013
        self.target_node = secondary_node
8014
        self.other_node = instance.primary_node
8015
        check_nodes = [self.target_node, self.other_node]
8016
      else:
8017
        self.disks = []
8018
        check_nodes = []
8019

    
8020
    else:
8021
      # Non-automatic modes
8022
      if self.mode == constants.REPLACE_DISK_PRI:
8023
        self.target_node = instance.primary_node
8024
        self.other_node = secondary_node
8025
        check_nodes = [self.target_node, self.other_node]
8026

    
8027
      elif self.mode == constants.REPLACE_DISK_SEC:
8028
        self.target_node = secondary_node
8029
        self.other_node = instance.primary_node
8030
        check_nodes = [self.target_node, self.other_node]
8031

    
8032
      elif self.mode == constants.REPLACE_DISK_CHG:
8033
        self.new_node = remote_node
8034
        self.other_node = instance.primary_node
8035
        self.target_node = secondary_node
8036
        check_nodes = [self.new_node, self.other_node]
8037

    
8038
        _CheckNodeNotDrained(self.lu, remote_node)
8039
        _CheckNodeVmCapable(self.lu, remote_node)
8040

    
8041
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
8042
        assert old_node_info is not None
8043
        if old_node_info.offline and not self.early_release:
8044
          # doesn't make sense to delay the release
8045
          self.early_release = True
8046
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8047
                          " early-release mode", secondary_node)
8048

    
8049
      else:
8050
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8051
                                     self.mode)
8052

    
8053
      # If not specified all disks should be replaced
8054
      if not self.disks:
8055
        self.disks = range(len(self.instance.disks))
8056

    
8057
    for node in check_nodes:
8058
      _CheckNodeOnline(self.lu, node)
8059

    
8060
    # Check whether disks are valid
8061
    for disk_idx in self.disks:
8062
      instance.FindDisk(disk_idx)
8063

    
8064
    # Get secondary node IP addresses
8065
    node_2nd_ip = {}
8066

    
8067
    for node_name in [self.target_node, self.other_node, self.new_node]:
8068
      if node_name is not None:
8069
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8070

    
8071
    self.node_secondary_ip = node_2nd_ip
8072

    
8073
  def Exec(self, feedback_fn):
8074
    """Execute disk replacement.
8075

8076
    This dispatches the disk replacement to the appropriate handler.
8077

8078
    """
8079
    if self.delay_iallocator:
8080
      self._CheckPrereq2()
8081

    
8082
    if not self.disks:
8083
      feedback_fn("No disks need replacement")
8084
      return
8085

    
8086
    feedback_fn("Replacing disk(s) %s for %s" %
8087
                (utils.CommaJoin(self.disks), self.instance.name))
8088

    
8089
    activate_disks = (not self.instance.admin_up)
8090

    
8091
    # Activate the instance disks if we're replacing them on a down instance
8092
    if activate_disks:
8093
      _StartInstanceDisks(self.lu, self.instance, True)
8094

    
8095
    try:
8096
      # Should we replace the secondary node?
8097
      if self.new_node is not None:
8098
        fn = self._ExecDrbd8Secondary
8099
      else:
8100
        fn = self._ExecDrbd8DiskOnly
8101

    
8102
      return fn(feedback_fn)
8103

    
8104
    finally:
8105
      # Deactivate the instance disks if we're replacing them on a
8106
      # down instance
8107
      if activate_disks:
8108
        _SafeShutdownInstanceDisks(self.lu, self.instance)
8109

    
8110
  def _CheckVolumeGroup(self, nodes):
8111
    self.lu.LogInfo("Checking volume groups")
8112

    
8113
    vgname = self.cfg.GetVGName()
8114

    
8115
    # Make sure volume group exists on all involved nodes
8116
    results = self.rpc.call_vg_list(nodes)
8117
    if not results:
8118
      raise errors.OpExecError("Can't list volume groups on the nodes")
8119

    
8120
    for node in nodes:
8121
      res = results[node]
8122
      res.Raise("Error checking node %s" % node)
8123
      if vgname not in res.payload:
8124
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
8125
                                 (vgname, node))
8126

    
8127
  def _CheckDisksExistence(self, nodes):
8128
    # Check disk existence
8129
    for idx, dev in enumerate(self.instance.disks):
8130
      if idx not in self.disks:
8131
        continue
8132

    
8133
      for node in nodes:
8134
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8135
        self.cfg.SetDiskID(dev, node)
8136

    
8137
        result = self.rpc.call_blockdev_find(node, dev)
8138

    
8139
        msg = result.fail_msg
8140
        if msg or not result.payload:
8141
          if not msg:
8142
            msg = "disk not found"
8143
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8144
                                   (idx, node, msg))
8145

    
8146
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8147
    for idx, dev in enumerate(self.instance.disks):
8148
      if idx not in self.disks:
8149
        continue
8150

    
8151
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8152
                      (idx, node_name))
8153

    
8154
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8155
                                   ldisk=ldisk):
8156
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8157
                                 " replace disks for instance %s" %
8158
                                 (node_name, self.instance.name))
8159

    
8160
  def _CreateNewStorage(self, node_name):
8161
    vgname = self.cfg.GetVGName()
8162
    iv_names = {}
8163

    
8164
    for idx, dev in enumerate(self.instance.disks):
8165
      if idx not in self.disks:
8166
        continue
8167

    
8168
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8169

    
8170
      self.cfg.SetDiskID(dev, node_name)
8171

    
8172
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8173
      names = _GenerateUniqueNames(self.lu, lv_names)
8174

    
8175
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8176
                             logical_id=(vgname, names[0]))
8177
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8178
                             logical_id=(vgname, names[1]))
8179

    
8180
      new_lvs = [lv_data, lv_meta]
8181
      old_lvs = dev.children
8182
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8183

    
8184
      # we pass force_create=True to force the LVM creation
8185
      for new_lv in new_lvs:
8186
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8187
                        _GetInstanceInfoText(self.instance), False)
8188

    
8189
    return iv_names
8190

    
8191
  def _CheckDevices(self, node_name, iv_names):
8192
    for name, (dev, _, _) in iv_names.iteritems():
8193
      self.cfg.SetDiskID(dev, node_name)
8194

    
8195
      result = self.rpc.call_blockdev_find(node_name, dev)
8196

    
8197
      msg = result.fail_msg
8198
      if msg or not result.payload:
8199
        if not msg:
8200
          msg = "disk not found"
8201
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
8202
                                 (name, msg))
8203

    
8204
      if result.payload.is_degraded:
8205
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
8206

    
8207
  def _RemoveOldStorage(self, node_name, iv_names):
8208
    for name, (_, old_lvs, _) in iv_names.iteritems():
8209
      self.lu.LogInfo("Remove logical volumes for %s" % name)
8210

    
8211
      for lv in old_lvs:
8212
        self.cfg.SetDiskID(lv, node_name)
8213

    
8214
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8215
        if msg:
8216
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
8217
                             hint="remove unused LVs manually")
8218

    
8219
  def _ReleaseNodeLock(self, node_name):
8220
    """Releases the lock for a given node."""
8221
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8222

    
8223
  def _ExecDrbd8DiskOnly(self, feedback_fn):
8224
    """Replace a disk on the primary or secondary for DRBD 8.
8225

8226
    The algorithm for replace is quite complicated:
8227

8228
      1. for each disk to be replaced:
8229

8230
        1. create new LVs on the target node with unique names
8231
        1. detach old LVs from the drbd device
8232
        1. rename old LVs to name_replaced.<time_t>
8233
        1. rename new LVs to old LVs
8234
        1. attach the new LVs (with the old names now) to the drbd device
8235

8236
      1. wait for sync across all devices
8237

8238
      1. for each modified disk:
8239

8240
        1. remove old LVs (which have the name name_replaces.<time_t>)
8241

8242
    Failures are not very well handled.
8243

8244
    """
8245
    steps_total = 6
8246

    
8247
    # Step: check device activation
8248
    self.lu.LogStep(1, steps_total, "Check device existence")
8249
    self._CheckDisksExistence([self.other_node, self.target_node])
8250
    self._CheckVolumeGroup([self.target_node, self.other_node])
8251

    
8252
    # Step: check other node consistency
8253
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8254
    self._CheckDisksConsistency(self.other_node,
8255
                                self.other_node == self.instance.primary_node,
8256
                                False)
8257

    
8258
    # Step: create new storage
8259
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8260
    iv_names = self._CreateNewStorage(self.target_node)
8261

    
8262
    # Step: for each lv, detach+rename*2+attach
8263
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8264
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8265
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8266

    
8267
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8268
                                                     old_lvs)
8269
      result.Raise("Can't detach drbd from local storage on node"
8270
                   " %s for device %s" % (self.target_node, dev.iv_name))
8271
      #dev.children = []
8272
      #cfg.Update(instance)
8273

    
8274
      # ok, we created the new LVs, so now we know we have the needed
8275
      # storage; as such, we proceed on the target node to rename
8276
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8277
      # using the assumption that logical_id == physical_id (which in
8278
      # turn is the unique_id on that node)
8279

    
8280
      # FIXME(iustin): use a better name for the replaced LVs
8281
      temp_suffix = int(time.time())
8282
      ren_fn = lambda d, suff: (d.physical_id[0],
8283
                                d.physical_id[1] + "_replaced-%s" % suff)
8284

    
8285
      # Build the rename list based on what LVs exist on the node
8286
      rename_old_to_new = []
8287
      for to_ren in old_lvs:
8288
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8289
        if not result.fail_msg and result.payload:
8290
          # device exists
8291
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8292

    
8293
      self.lu.LogInfo("Renaming the old LVs on the target node")
8294
      result = self.rpc.call_blockdev_rename(self.target_node,
8295
                                             rename_old_to_new)
8296
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
8297

    
8298
      # Now we rename the new LVs to the old LVs
8299
      self.lu.LogInfo("Renaming the new LVs on the target node")
8300
      rename_new_to_old = [(new, old.physical_id)
8301
                           for old, new in zip(old_lvs, new_lvs)]
8302
      result = self.rpc.call_blockdev_rename(self.target_node,
8303
                                             rename_new_to_old)
8304
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8305

    
8306
      for old, new in zip(old_lvs, new_lvs):
8307
        new.logical_id = old.logical_id
8308
        self.cfg.SetDiskID(new, self.target_node)
8309

    
8310
      for disk in old_lvs:
8311
        disk.logical_id = ren_fn(disk, temp_suffix)
8312
        self.cfg.SetDiskID(disk, self.target_node)
8313

    
8314
      # Now that the new lvs have the old name, we can add them to the device
8315
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8316
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8317
                                                  new_lvs)
8318
      msg = result.fail_msg
8319
      if msg:
8320
        for new_lv in new_lvs:
8321
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
8322
                                               new_lv).fail_msg
8323
          if msg2:
8324
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8325
                               hint=("cleanup manually the unused logical"
8326
                                     "volumes"))
8327
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8328

    
8329
      dev.children = new_lvs
8330

    
8331
      self.cfg.Update(self.instance, feedback_fn)
8332

    
8333
    cstep = 5
8334
    if self.early_release:
8335
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8336
      cstep += 1
8337
      self._RemoveOldStorage(self.target_node, iv_names)
8338
      # WARNING: we release both node locks here, do not do other RPCs
8339
      # than WaitForSync to the primary node
8340
      self._ReleaseNodeLock([self.target_node, self.other_node])
8341

    
8342
    # Wait for sync
8343
    # This can fail as the old devices are degraded and _WaitForSync
8344
    # does a combined result over all disks, so we don't check its return value
8345
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8346
    cstep += 1
8347
    _WaitForSync(self.lu, self.instance)
8348

    
8349
    # Check all devices manually
8350
    self._CheckDevices(self.instance.primary_node, iv_names)
8351

    
8352
    # Step: remove old storage
8353
    if not self.early_release:
8354
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8355
      cstep += 1
8356
      self._RemoveOldStorage(self.target_node, iv_names)
8357

    
8358
  def _ExecDrbd8Secondary(self, feedback_fn):
8359
    """Replace the secondary node for DRBD 8.
8360

8361
    The algorithm for replace is quite complicated:
8362
      - for all disks of the instance:
8363
        - create new LVs on the new node with same names
8364
        - shutdown the drbd device on the old secondary
8365
        - disconnect the drbd network on the primary
8366
        - create the drbd device on the new secondary
8367
        - network attach the drbd on the primary, using an artifice:
8368
          the drbd code for Attach() will connect to the network if it
8369
          finds a device which is connected to the good local disks but
8370
          not network enabled
8371
      - wait for sync across all devices
8372
      - remove all disks from the old secondary
8373

8374
    Failures are not very well handled.
8375

8376
    """
8377
    steps_total = 6
8378

    
8379
    # Step: check device activation
8380
    self.lu.LogStep(1, steps_total, "Check device existence")
8381
    self._CheckDisksExistence([self.instance.primary_node])
8382
    self._CheckVolumeGroup([self.instance.primary_node])
8383

    
8384
    # Step: check other node consistency
8385
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8386
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8387

    
8388
    # Step: create new storage
8389
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8390
    for idx, dev in enumerate(self.instance.disks):
8391
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8392
                      (self.new_node, idx))
8393
      # we pass force_create=True to force LVM creation
8394
      for new_lv in dev.children:
8395
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8396
                        _GetInstanceInfoText(self.instance), False)
8397

    
8398
    # Step 4: dbrd minors and drbd setups changes
8399
    # after this, we must manually remove the drbd minors on both the
8400
    # error and the success paths
8401
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8402
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8403
                                         for dev in self.instance.disks],
8404
                                        self.instance.name)
8405
    logging.debug("Allocated minors %r", minors)
8406

    
8407
    iv_names = {}
8408
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8409
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8410
                      (self.new_node, idx))
8411
      # create new devices on new_node; note that we create two IDs:
8412
      # one without port, so the drbd will be activated without
8413
      # networking information on the new node at this stage, and one
8414
      # with network, for the latter activation in step 4
8415
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8416
      if self.instance.primary_node == o_node1:
8417
        p_minor = o_minor1
8418
      else:
8419
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8420
        p_minor = o_minor2
8421

    
8422
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8423
                      p_minor, new_minor, o_secret)
8424
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8425
                    p_minor, new_minor, o_secret)
8426

    
8427
      iv_names[idx] = (dev, dev.children, new_net_id)
8428
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8429
                    new_net_id)
8430
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8431
                              logical_id=new_alone_id,
8432
                              children=dev.children,
8433
                              size=dev.size)
8434
      try:
8435
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8436
                              _GetInstanceInfoText(self.instance), False)
8437
      except errors.GenericError:
8438
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8439
        raise
8440

    
8441
    # We have new devices, shutdown the drbd on the old secondary
8442
    for idx, dev in enumerate(self.instance.disks):
8443
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8444
      self.cfg.SetDiskID(dev, self.target_node)
8445
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8446
      if msg:
8447
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8448
                           "node: %s" % (idx, msg),
8449
                           hint=("Please cleanup this device manually as"
8450
                                 " soon as possible"))
8451

    
8452
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8453
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8454
                                               self.node_secondary_ip,
8455
                                               self.instance.disks)\
8456
                                              [self.instance.primary_node]
8457

    
8458
    msg = result.fail_msg
8459
    if msg:
8460
      # detaches didn't succeed (unlikely)
8461
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8462
      raise errors.OpExecError("Can't detach the disks from the network on"
8463
                               " old node: %s" % (msg,))
8464

    
8465
    # if we managed to detach at least one, we update all the disks of
8466
    # the instance to point to the new secondary
8467
    self.lu.LogInfo("Updating instance configuration")
8468
    for dev, _, new_logical_id in iv_names.itervalues():
8469
      dev.logical_id = new_logical_id
8470
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8471

    
8472
    self.cfg.Update(self.instance, feedback_fn)
8473

    
8474
    # and now perform the drbd attach
8475
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8476
                    " (standalone => connected)")
8477
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8478
                                            self.new_node],
8479
                                           self.node_secondary_ip,
8480
                                           self.instance.disks,
8481
                                           self.instance.name,
8482
                                           False)
8483
    for to_node, to_result in result.items():
8484
      msg = to_result.fail_msg
8485
      if msg:
8486
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8487
                           to_node, msg,
8488
                           hint=("please do a gnt-instance info to see the"
8489
                                 " status of disks"))
8490
    cstep = 5
8491
    if self.early_release:
8492
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8493
      cstep += 1
8494
      self._RemoveOldStorage(self.target_node, iv_names)
8495
      # WARNING: we release all node locks here, do not do other RPCs
8496
      # than WaitForSync to the primary node
8497
      self._ReleaseNodeLock([self.instance.primary_node,
8498
                             self.target_node,
8499
                             self.new_node])
8500

    
8501
    # Wait for sync
8502
    # This can fail as the old devices are degraded and _WaitForSync
8503
    # does a combined result over all disks, so we don't check its return value
8504
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8505
    cstep += 1
8506
    _WaitForSync(self.lu, self.instance)
8507

    
8508
    # Check all devices manually
8509
    self._CheckDevices(self.instance.primary_node, iv_names)
8510

    
8511
    # Step: remove old storage
8512
    if not self.early_release:
8513
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8514
      self._RemoveOldStorage(self.target_node, iv_names)
8515

    
8516

    
8517
class LURepairNodeStorage(NoHooksLU):
8518
  """Repairs the volume group on a node.
8519

8520
  """
8521
  REQ_BGL = False
8522

    
8523
  def CheckArguments(self):
8524
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8525

    
8526
    storage_type = self.op.storage_type
8527

    
8528
    if (constants.SO_FIX_CONSISTENCY not in
8529
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8530
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8531
                                 " repaired" % storage_type,
8532
                                 errors.ECODE_INVAL)
8533

    
8534
  def ExpandNames(self):
8535
    self.needed_locks = {
8536
      locking.LEVEL_NODE: [self.op.node_name],
8537
      }
8538

    
8539
  def _CheckFaultyDisks(self, instance, node_name):
8540
    """Ensure faulty disks abort the opcode or at least warn."""
8541
    try:
8542
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8543
                                  node_name, True):
8544
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8545
                                   " node '%s'" % (instance.name, node_name),
8546
                                   errors.ECODE_STATE)
8547
    except errors.OpPrereqError, err:
8548
      if self.op.ignore_consistency:
8549
        self.proc.LogWarning(str(err.args[0]))
8550
      else:
8551
        raise
8552

    
8553
  def CheckPrereq(self):
8554
    """Check prerequisites.
8555

8556
    """
8557
    # Check whether any instance on this node has faulty disks
8558
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8559
      if not inst.admin_up:
8560
        continue
8561
      check_nodes = set(inst.all_nodes)
8562
      check_nodes.discard(self.op.node_name)
8563
      for inst_node_name in check_nodes:
8564
        self._CheckFaultyDisks(inst, inst_node_name)
8565

    
8566
  def Exec(self, feedback_fn):
8567
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8568
                (self.op.name, self.op.node_name))
8569

    
8570
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8571
    result = self.rpc.call_storage_execute(self.op.node_name,
8572
                                           self.op.storage_type, st_args,
8573
                                           self.op.name,
8574
                                           constants.SO_FIX_CONSISTENCY)
8575
    result.Raise("Failed to repair storage unit '%s' on %s" %
8576
                 (self.op.name, self.op.node_name))
8577

    
8578

    
8579
class LUNodeEvacStrategy(NoHooksLU):
8580
  """Computes the node evacuation strategy.
8581

8582
  """
8583
  REQ_BGL = False
8584

    
8585
  def CheckArguments(self):
8586
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8587

    
8588
  def ExpandNames(self):
8589
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8590
    self.needed_locks = locks = {}
8591
    if self.op.remote_node is None:
8592
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8593
    else:
8594
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8595
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8596

    
8597
  def Exec(self, feedback_fn):
8598
    if self.op.remote_node is not None:
8599
      instances = []
8600
      for node in self.op.nodes:
8601
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8602
      result = []
8603
      for i in instances:
8604
        if i.primary_node == self.op.remote_node:
8605
          raise errors.OpPrereqError("Node %s is the primary node of"
8606
                                     " instance %s, cannot use it as"
8607
                                     " secondary" %
8608
                                     (self.op.remote_node, i.name),
8609
                                     errors.ECODE_INVAL)
8610
        result.append([i.name, self.op.remote_node])
8611
    else:
8612
      ial = IAllocator(self.cfg, self.rpc,
8613
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8614
                       evac_nodes=self.op.nodes)
8615
      ial.Run(self.op.iallocator, validate=True)
8616
      if not ial.success:
8617
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8618
                                 errors.ECODE_NORES)
8619
      result = ial.result
8620
    return result
8621

    
8622

    
8623
class LUInstanceGrowDisk(LogicalUnit):
8624
  """Grow a disk of an instance.
8625

8626
  """
8627
  HPATH = "disk-grow"
8628
  HTYPE = constants.HTYPE_INSTANCE
8629
  REQ_BGL = False
8630

    
8631
  def ExpandNames(self):
8632
    self._ExpandAndLockInstance()
8633
    self.needed_locks[locking.LEVEL_NODE] = []
8634
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8635

    
8636
  def DeclareLocks(self, level):
8637
    if level == locking.LEVEL_NODE:
8638
      self._LockInstancesNodes()
8639

    
8640
  def BuildHooksEnv(self):
8641
    """Build hooks env.
8642

8643
    This runs on the master, the primary and all the secondaries.
8644

8645
    """
8646
    env = {
8647
      "DISK": self.op.disk,
8648
      "AMOUNT": self.op.amount,
8649
      }
8650
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8651
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8652
    return env, nl, nl
8653

    
8654
  def CheckPrereq(self):
8655
    """Check prerequisites.
8656

8657
    This checks that the instance is in the cluster.
8658

8659
    """
8660
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8661
    assert instance is not None, \
8662
      "Cannot retrieve locked instance %s" % self.op.instance_name
8663
    nodenames = list(instance.all_nodes)
8664
    for node in nodenames:
8665
      _CheckNodeOnline(self, node)
8666

    
8667
    self.instance = instance
8668

    
8669
    if instance.disk_template not in constants.DTS_GROWABLE:
8670
      raise errors.OpPrereqError("Instance's disk layout does not support"
8671
                                 " growing.", errors.ECODE_INVAL)
8672

    
8673
    self.disk = instance.FindDisk(self.op.disk)
8674

    
8675
    if instance.disk_template != constants.DT_FILE:
8676
      # TODO: check the free disk space for file, when that feature
8677
      # will be supported
8678
      _CheckNodesFreeDiskPerVG(self, nodenames,
8679
                               self.disk.ComputeGrowth(self.op.amount))
8680

    
8681
  def Exec(self, feedback_fn):
8682
    """Execute disk grow.
8683

8684
    """
8685
    instance = self.instance
8686
    disk = self.disk
8687

    
8688
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8689
    if not disks_ok:
8690
      raise errors.OpExecError("Cannot activate block device to grow")
8691

    
8692
    for node in instance.all_nodes:
8693
      self.cfg.SetDiskID(disk, node)
8694
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8695
      result.Raise("Grow request failed to node %s" % node)
8696

    
8697
      # TODO: Rewrite code to work properly
8698
      # DRBD goes into sync mode for a short amount of time after executing the
8699
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8700
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8701
      # time is a work-around.
8702
      time.sleep(5)
8703

    
8704
    disk.RecordGrow(self.op.amount)
8705
    self.cfg.Update(instance, feedback_fn)
8706
    if self.op.wait_for_sync:
8707
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8708
      if disk_abort:
8709
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8710
                             " status.\nPlease check the instance.")
8711
      if not instance.admin_up:
8712
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8713
    elif not instance.admin_up:
8714
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8715
                           " not supposed to be running because no wait for"
8716
                           " sync mode was requested.")
8717

    
8718

    
8719
class LUInstanceQueryData(NoHooksLU):
8720
  """Query runtime instance data.
8721

8722
  """
8723
  REQ_BGL = False
8724

    
8725
  def ExpandNames(self):
8726
    self.needed_locks = {}
8727
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8728

    
8729
    if self.op.instances:
8730
      self.wanted_names = []
8731
      for name in self.op.instances:
8732
        full_name = _ExpandInstanceName(self.cfg, name)
8733
        self.wanted_names.append(full_name)
8734
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8735
    else:
8736
      self.wanted_names = None
8737
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8738

    
8739
    self.needed_locks[locking.LEVEL_NODE] = []
8740
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8741

    
8742
  def DeclareLocks(self, level):
8743
    if level == locking.LEVEL_NODE:
8744
      self._LockInstancesNodes()
8745

    
8746
  def CheckPrereq(self):
8747
    """Check prerequisites.
8748

8749
    This only checks the optional instance list against the existing names.
8750

8751
    """
8752
    if self.wanted_names is None:
8753
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8754

    
8755
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8756
                             in self.wanted_names]
8757

    
8758
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8759
    """Returns the status of a block device
8760

8761
    """
8762
    if self.op.static or not node:
8763
      return None
8764

    
8765
    self.cfg.SetDiskID(dev, node)
8766

    
8767
    result = self.rpc.call_blockdev_find(node, dev)
8768
    if result.offline:
8769
      return None
8770

    
8771
    result.Raise("Can't compute disk status for %s" % instance_name)
8772

    
8773
    status = result.payload
8774
    if status is None:
8775
      return None
8776

    
8777
    return (status.dev_path, status.major, status.minor,
8778
            status.sync_percent, status.estimated_time,
8779
            status.is_degraded, status.ldisk_status)
8780

    
8781
  def _ComputeDiskStatus(self, instance, snode, dev):
8782
    """Compute block device status.
8783

8784
    """
8785
    if dev.dev_type in constants.LDS_DRBD:
8786
      # we change the snode then (otherwise we use the one passed in)
8787
      if dev.logical_id[0] == instance.primary_node:
8788
        snode = dev.logical_id[1]
8789
      else:
8790
        snode = dev.logical_id[0]
8791

    
8792
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8793
                                              instance.name, dev)
8794
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8795

    
8796
    if dev.children:
8797
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8798
                      for child in dev.children]
8799
    else:
8800
      dev_children = []
8801

    
8802
    data = {
8803
      "iv_name": dev.iv_name,
8804
      "dev_type": dev.dev_type,
8805
      "logical_id": dev.logical_id,
8806
      "physical_id": dev.physical_id,
8807
      "pstatus": dev_pstatus,
8808
      "sstatus": dev_sstatus,
8809
      "children": dev_children,
8810
      "mode": dev.mode,
8811
      "size": dev.size,
8812
      }
8813

    
8814
    return data
8815

    
8816
  def Exec(self, feedback_fn):
8817
    """Gather and return data"""
8818
    result = {}
8819

    
8820
    cluster = self.cfg.GetClusterInfo()
8821

    
8822
    for instance in self.wanted_instances:
8823
      if not self.op.static:
8824
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8825
                                                  instance.name,
8826
                                                  instance.hypervisor)
8827
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8828
        remote_info = remote_info.payload
8829
        if remote_info and "state" in remote_info:
8830
          remote_state = "up"
8831
        else:
8832
          remote_state = "down"
8833
      else:
8834
        remote_state = None
8835
      if instance.admin_up:
8836
        config_state = "up"
8837
      else:
8838
        config_state = "down"
8839

    
8840
      disks = [self._ComputeDiskStatus(instance, None, device)
8841
               for device in instance.disks]
8842

    
8843
      idict = {
8844
        "name": instance.name,
8845
        "config_state": config_state,
8846
        "run_state": remote_state,
8847
        "pnode": instance.primary_node,
8848
        "snodes": instance.secondary_nodes,
8849
        "os": instance.os,
8850
        # this happens to be the same format used for hooks
8851
        "nics": _NICListToTuple(self, instance.nics),
8852
        "disk_template": instance.disk_template,
8853
        "disks": disks,
8854
        "hypervisor": instance.hypervisor,
8855
        "network_port": instance.network_port,
8856
        "hv_instance": instance.hvparams,
8857
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8858
        "be_instance": instance.beparams,
8859
        "be_actual": cluster.FillBE(instance),
8860
        "os_instance": instance.osparams,
8861
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8862
        "serial_no": instance.serial_no,
8863
        "mtime": instance.mtime,
8864
        "ctime": instance.ctime,
8865
        "uuid": instance.uuid,
8866
        }
8867

    
8868
      result[instance.name] = idict
8869

    
8870
    return result
8871

    
8872

    
8873
class LUInstanceSetParams(LogicalUnit):
8874
  """Modifies an instances's parameters.
8875

8876
  """
8877
  HPATH = "instance-modify"
8878
  HTYPE = constants.HTYPE_INSTANCE
8879
  REQ_BGL = False
8880

    
8881
  def CheckArguments(self):
8882
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8883
            self.op.hvparams or self.op.beparams or self.op.os_name):
8884
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8885

    
8886
    if self.op.hvparams:
8887
      _CheckGlobalHvParams(self.op.hvparams)
8888

    
8889
    # Disk validation
8890
    disk_addremove = 0
8891
    for disk_op, disk_dict in self.op.disks:
8892
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8893
      if disk_op == constants.DDM_REMOVE:
8894
        disk_addremove += 1
8895
        continue
8896
      elif disk_op == constants.DDM_ADD:
8897
        disk_addremove += 1
8898
      else:
8899
        if not isinstance(disk_op, int):
8900
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8901
        if not isinstance(disk_dict, dict):
8902
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8903
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8904

    
8905
      if disk_op == constants.DDM_ADD:
8906
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8907
        if mode not in constants.DISK_ACCESS_SET:
8908
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8909
                                     errors.ECODE_INVAL)
8910
        size = disk_dict.get('size', None)
8911
        if size is None:
8912
          raise errors.OpPrereqError("Required disk parameter size missing",
8913
                                     errors.ECODE_INVAL)
8914
        try:
8915
          size = int(size)
8916
        except (TypeError, ValueError), err:
8917
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8918
                                     str(err), errors.ECODE_INVAL)
8919
        disk_dict['size'] = size
8920
      else:
8921
        # modification of disk
8922
        if 'size' in disk_dict:
8923
          raise errors.OpPrereqError("Disk size change not possible, use"
8924
                                     " grow-disk", errors.ECODE_INVAL)
8925

    
8926
    if disk_addremove > 1:
8927
      raise errors.OpPrereqError("Only one disk add or remove operation"
8928
                                 " supported at a time", errors.ECODE_INVAL)
8929

    
8930
    if self.op.disks and self.op.disk_template is not None:
8931
      raise errors.OpPrereqError("Disk template conversion and other disk"
8932
                                 " changes not supported at the same time",
8933
                                 errors.ECODE_INVAL)
8934

    
8935
    if (self.op.disk_template and
8936
        self.op.disk_template in constants.DTS_NET_MIRROR and
8937
        self.op.remote_node is None):
8938
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
8939
                                 " one requires specifying a secondary node",
8940
                                 errors.ECODE_INVAL)
8941

    
8942
    # NIC validation
8943
    nic_addremove = 0
8944
    for nic_op, nic_dict in self.op.nics:
8945
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8946
      if nic_op == constants.DDM_REMOVE:
8947
        nic_addremove += 1
8948
        continue
8949
      elif nic_op == constants.DDM_ADD:
8950
        nic_addremove += 1
8951
      else:
8952
        if not isinstance(nic_op, int):
8953
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8954
        if not isinstance(nic_dict, dict):
8955
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8956
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8957

    
8958
      # nic_dict should be a dict
8959
      nic_ip = nic_dict.get('ip', None)
8960
      if nic_ip is not None:
8961
        if nic_ip.lower() == constants.VALUE_NONE:
8962
          nic_dict['ip'] = None
8963
        else:
8964
          if not netutils.IPAddress.IsValid(nic_ip):
8965
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8966
                                       errors.ECODE_INVAL)
8967

    
8968
      nic_bridge = nic_dict.get('bridge', None)
8969
      nic_link = nic_dict.get('link', None)
8970
      if nic_bridge and nic_link:
8971
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8972
                                   " at the same time", errors.ECODE_INVAL)
8973
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8974
        nic_dict['bridge'] = None
8975
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8976
        nic_dict['link'] = None
8977

    
8978
      if nic_op == constants.DDM_ADD:
8979
        nic_mac = nic_dict.get('mac', None)
8980
        if nic_mac is None:
8981
          nic_dict['mac'] = constants.VALUE_AUTO
8982

    
8983
      if 'mac' in nic_dict:
8984
        nic_mac = nic_dict['mac']
8985
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8986
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8987

    
8988
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8989
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8990
                                     " modifying an existing nic",
8991
                                     errors.ECODE_INVAL)
8992

    
8993
    if nic_addremove > 1:
8994
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8995
                                 " supported at a time", errors.ECODE_INVAL)
8996

    
8997
  def ExpandNames(self):
8998
    self._ExpandAndLockInstance()
8999
    self.needed_locks[locking.LEVEL_NODE] = []
9000
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9001

    
9002
  def DeclareLocks(self, level):
9003
    if level == locking.LEVEL_NODE:
9004
      self._LockInstancesNodes()
9005
      if self.op.disk_template and self.op.remote_node:
9006
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9007
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9008

    
9009
  def BuildHooksEnv(self):
9010
    """Build hooks env.
9011

9012
    This runs on the master, primary and secondaries.
9013

9014
    """
9015
    args = dict()
9016
    if constants.BE_MEMORY in self.be_new:
9017
      args['memory'] = self.be_new[constants.BE_MEMORY]
9018
    if constants.BE_VCPUS in self.be_new:
9019
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
9020
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9021
    # information at all.
9022
    if self.op.nics:
9023
      args['nics'] = []
9024
      nic_override = dict(self.op.nics)
9025
      for idx, nic in enumerate(self.instance.nics):
9026
        if idx in nic_override:
9027
          this_nic_override = nic_override[idx]
9028
        else:
9029
          this_nic_override = {}
9030
        if 'ip' in this_nic_override:
9031
          ip = this_nic_override['ip']
9032
        else:
9033
          ip = nic.ip
9034
        if 'mac' in this_nic_override:
9035
          mac = this_nic_override['mac']
9036
        else:
9037
          mac = nic.mac
9038
        if idx in self.nic_pnew:
9039
          nicparams = self.nic_pnew[idx]
9040
        else:
9041
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9042
        mode = nicparams[constants.NIC_MODE]
9043
        link = nicparams[constants.NIC_LINK]
9044
        args['nics'].append((ip, mac, mode, link))
9045
      if constants.DDM_ADD in nic_override:
9046
        ip = nic_override[constants.DDM_ADD].get('ip', None)
9047
        mac = nic_override[constants.DDM_ADD]['mac']
9048
        nicparams = self.nic_pnew[constants.DDM_ADD]
9049
        mode = nicparams[constants.NIC_MODE]
9050
        link = nicparams[constants.NIC_LINK]
9051
        args['nics'].append((ip, mac, mode, link))
9052
      elif constants.DDM_REMOVE in nic_override:
9053
        del args['nics'][-1]
9054

    
9055
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9056
    if self.op.disk_template:
9057
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9058
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9059
    return env, nl, nl
9060

    
9061
  def CheckPrereq(self):
9062
    """Check prerequisites.
9063

9064
    This only checks the instance list against the existing names.
9065

9066
    """
9067
    # checking the new params on the primary/secondary nodes
9068

    
9069
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9070
    cluster = self.cluster = self.cfg.GetClusterInfo()
9071
    assert self.instance is not None, \
9072
      "Cannot retrieve locked instance %s" % self.op.instance_name
9073
    pnode = instance.primary_node
9074
    nodelist = list(instance.all_nodes)
9075

    
9076
    # OS change
9077
    if self.op.os_name and not self.op.force:
9078
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9079
                      self.op.force_variant)
9080
      instance_os = self.op.os_name
9081
    else:
9082
      instance_os = instance.os
9083

    
9084
    if self.op.disk_template:
9085
      if instance.disk_template == self.op.disk_template:
9086
        raise errors.OpPrereqError("Instance already has disk template %s" %
9087
                                   instance.disk_template, errors.ECODE_INVAL)
9088

    
9089
      if (instance.disk_template,
9090
          self.op.disk_template) not in self._DISK_CONVERSIONS:
9091
        raise errors.OpPrereqError("Unsupported disk template conversion from"
9092
                                   " %s to %s" % (instance.disk_template,
9093
                                                  self.op.disk_template),
9094
                                   errors.ECODE_INVAL)
9095
      _CheckInstanceDown(self, instance, "cannot change disk template")
9096
      if self.op.disk_template in constants.DTS_NET_MIRROR:
9097
        if self.op.remote_node == pnode:
9098
          raise errors.OpPrereqError("Given new secondary node %s is the same"
9099
                                     " as the primary node of the instance" %
9100
                                     self.op.remote_node, errors.ECODE_STATE)
9101
        _CheckNodeOnline(self, self.op.remote_node)
9102
        _CheckNodeNotDrained(self, self.op.remote_node)
9103
        # FIXME: here we assume that the old instance type is DT_PLAIN
9104
        assert instance.disk_template == constants.DT_PLAIN
9105
        disks = [{"size": d.size, "vg": d.logical_id[0]}
9106
                 for d in instance.disks]
9107
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9108
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9109

    
9110
    # hvparams processing
9111
    if self.op.hvparams:
9112
      hv_type = instance.hypervisor
9113
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9114
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9115
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9116

    
9117
      # local check
9118
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9119
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9120
      self.hv_new = hv_new # the new actual values
9121
      self.hv_inst = i_hvdict # the new dict (without defaults)
9122
    else:
9123
      self.hv_new = self.hv_inst = {}
9124

    
9125
    # beparams processing
9126
    if self.op.beparams:
9127
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9128
                                   use_none=True)
9129
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9130
      be_new = cluster.SimpleFillBE(i_bedict)
9131
      self.be_new = be_new # the new actual values
9132
      self.be_inst = i_bedict # the new dict (without defaults)
9133
    else:
9134
      self.be_new = self.be_inst = {}
9135

    
9136
    # osparams processing
9137
    if self.op.osparams:
9138
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9139
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9140
      self.os_inst = i_osdict # the new dict (without defaults)
9141
    else:
9142
      self.os_inst = {}
9143

    
9144
    self.warn = []
9145

    
9146
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9147
      mem_check_list = [pnode]
9148
      if be_new[constants.BE_AUTO_BALANCE]:
9149
        # either we changed auto_balance to yes or it was from before
9150
        mem_check_list.extend(instance.secondary_nodes)
9151
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9152
                                                  instance.hypervisor)
9153
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9154
                                         instance.hypervisor)
9155
      pninfo = nodeinfo[pnode]
9156
      msg = pninfo.fail_msg
9157
      if msg:
9158
        # Assume the primary node is unreachable and go ahead
9159
        self.warn.append("Can't get info from primary node %s: %s" %
9160
                         (pnode,  msg))
9161
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
9162
        self.warn.append("Node data from primary node %s doesn't contain"
9163
                         " free memory information" % pnode)
9164
      elif instance_info.fail_msg:
9165
        self.warn.append("Can't get instance runtime information: %s" %
9166
                        instance_info.fail_msg)
9167
      else:
9168
        if instance_info.payload:
9169
          current_mem = int(instance_info.payload['memory'])
9170
        else:
9171
          # Assume instance not running
9172
          # (there is a slight race condition here, but it's not very probable,
9173
          # and we have no other way to check)
9174
          current_mem = 0
9175
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9176
                    pninfo.payload['memory_free'])
9177
        if miss_mem > 0:
9178
          raise errors.OpPrereqError("This change will prevent the instance"
9179
                                     " from starting, due to %d MB of memory"
9180
                                     " missing on its primary node" % miss_mem,
9181
                                     errors.ECODE_NORES)
9182

    
9183
      if be_new[constants.BE_AUTO_BALANCE]:
9184
        for node, nres in nodeinfo.items():
9185
          if node not in instance.secondary_nodes:
9186
            continue
9187
          msg = nres.fail_msg
9188
          if msg:
9189
            self.warn.append("Can't get info from secondary node %s: %s" %
9190
                             (node, msg))
9191
          elif not isinstance(nres.payload.get('memory_free', None), int):
9192
            self.warn.append("Secondary node %s didn't return free"
9193
                             " memory information" % node)
9194
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9195
            self.warn.append("Not enough memory to failover instance to"
9196
                             " secondary node %s" % node)
9197

    
9198
    # NIC processing
9199
    self.nic_pnew = {}
9200
    self.nic_pinst = {}
9201
    for nic_op, nic_dict in self.op.nics:
9202
      if nic_op == constants.DDM_REMOVE:
9203
        if not instance.nics:
9204
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9205
                                     errors.ECODE_INVAL)
9206
        continue
9207
      if nic_op != constants.DDM_ADD:
9208
        # an existing nic
9209
        if not instance.nics:
9210
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9211
                                     " no NICs" % nic_op,
9212
                                     errors.ECODE_INVAL)
9213
        if nic_op < 0 or nic_op >= len(instance.nics):
9214
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9215
                                     " are 0 to %d" %
9216
                                     (nic_op, len(instance.nics) - 1),
9217
                                     errors.ECODE_INVAL)
9218
        old_nic_params = instance.nics[nic_op].nicparams
9219
        old_nic_ip = instance.nics[nic_op].ip
9220
      else:
9221
        old_nic_params = {}
9222
        old_nic_ip = None
9223

    
9224
      update_params_dict = dict([(key, nic_dict[key])
9225
                                 for key in constants.NICS_PARAMETERS
9226
                                 if key in nic_dict])
9227

    
9228
      if 'bridge' in nic_dict:
9229
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9230

    
9231
      new_nic_params = _GetUpdatedParams(old_nic_params,
9232
                                         update_params_dict)
9233
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9234
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9235
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9236
      self.nic_pinst[nic_op] = new_nic_params
9237
      self.nic_pnew[nic_op] = new_filled_nic_params
9238
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9239

    
9240
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
9241
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9242
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9243
        if msg:
9244
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9245
          if self.op.force:
9246
            self.warn.append(msg)
9247
          else:
9248
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9249
      if new_nic_mode == constants.NIC_MODE_ROUTED:
9250
        if 'ip' in nic_dict:
9251
          nic_ip = nic_dict['ip']
9252
        else:
9253
          nic_ip = old_nic_ip
9254
        if nic_ip is None:
9255
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9256
                                     ' on a routed nic', errors.ECODE_INVAL)
9257
      if 'mac' in nic_dict:
9258
        nic_mac = nic_dict['mac']
9259
        if nic_mac is None:
9260
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9261
                                     errors.ECODE_INVAL)
9262
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9263
          # otherwise generate the mac
9264
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9265
        else:
9266
          # or validate/reserve the current one
9267
          try:
9268
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9269
          except errors.ReservationError:
9270
            raise errors.OpPrereqError("MAC address %s already in use"
9271
                                       " in cluster" % nic_mac,
9272
                                       errors.ECODE_NOTUNIQUE)
9273

    
9274
    # DISK processing
9275
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9276
      raise errors.OpPrereqError("Disk operations not supported for"
9277
                                 " diskless instances",
9278
                                 errors.ECODE_INVAL)
9279
    for disk_op, _ in self.op.disks:
9280
      if disk_op == constants.DDM_REMOVE:
9281
        if len(instance.disks) == 1:
9282
          raise errors.OpPrereqError("Cannot remove the last disk of"
9283
                                     " an instance", errors.ECODE_INVAL)
9284
        _CheckInstanceDown(self, instance, "cannot remove disks")
9285

    
9286
      if (disk_op == constants.DDM_ADD and
9287
          len(instance.disks) >= constants.MAX_DISKS):
9288
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9289
                                   " add more" % constants.MAX_DISKS,
9290
                                   errors.ECODE_STATE)
9291
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9292
        # an existing disk
9293
        if disk_op < 0 or disk_op >= len(instance.disks):
9294
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
9295
                                     " are 0 to %d" %
9296
                                     (disk_op, len(instance.disks)),
9297
                                     errors.ECODE_INVAL)
9298

    
9299
    return
9300

    
9301
  def _ConvertPlainToDrbd(self, feedback_fn):
9302
    """Converts an instance from plain to drbd.
9303

9304
    """
9305
    feedback_fn("Converting template to drbd")
9306
    instance = self.instance
9307
    pnode = instance.primary_node
9308
    snode = self.op.remote_node
9309

    
9310
    # create a fake disk info for _GenerateDiskTemplate
9311
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9312
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9313
                                      instance.name, pnode, [snode],
9314
                                      disk_info, None, None, 0, feedback_fn)
9315
    info = _GetInstanceInfoText(instance)
9316
    feedback_fn("Creating aditional volumes...")
9317
    # first, create the missing data and meta devices
9318
    for disk in new_disks:
9319
      # unfortunately this is... not too nice
9320
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9321
                            info, True)
9322
      for child in disk.children:
9323
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
9324
    # at this stage, all new LVs have been created, we can rename the
9325
    # old ones
9326
    feedback_fn("Renaming original volumes...")
9327
    rename_list = [(o, n.children[0].logical_id)
9328
                   for (o, n) in zip(instance.disks, new_disks)]
9329
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
9330
    result.Raise("Failed to rename original LVs")
9331

    
9332
    feedback_fn("Initializing DRBD devices...")
9333
    # all child devices are in place, we can now create the DRBD devices
9334
    for disk in new_disks:
9335
      for node in [pnode, snode]:
9336
        f_create = node == pnode
9337
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9338

    
9339
    # at this point, the instance has been modified
9340
    instance.disk_template = constants.DT_DRBD8
9341
    instance.disks = new_disks
9342
    self.cfg.Update(instance, feedback_fn)
9343

    
9344
    # disks are created, waiting for sync
9345
    disk_abort = not _WaitForSync(self, instance)
9346
    if disk_abort:
9347
      raise errors.OpExecError("There are some degraded disks for"
9348
                               " this instance, please cleanup manually")
9349

    
9350
  def _ConvertDrbdToPlain(self, feedback_fn):
9351
    """Converts an instance from drbd to plain.
9352

9353
    """
9354
    instance = self.instance
9355
    assert len(instance.secondary_nodes) == 1
9356
    pnode = instance.primary_node
9357
    snode = instance.secondary_nodes[0]
9358
    feedback_fn("Converting template to plain")
9359

    
9360
    old_disks = instance.disks
9361
    new_disks = [d.children[0] for d in old_disks]
9362

    
9363
    # copy over size and mode
9364
    for parent, child in zip(old_disks, new_disks):
9365
      child.size = parent.size
9366
      child.mode = parent.mode
9367

    
9368
    # update instance structure
9369
    instance.disks = new_disks
9370
    instance.disk_template = constants.DT_PLAIN
9371
    self.cfg.Update(instance, feedback_fn)
9372

    
9373
    feedback_fn("Removing volumes on the secondary node...")
9374
    for disk in old_disks:
9375
      self.cfg.SetDiskID(disk, snode)
9376
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9377
      if msg:
9378
        self.LogWarning("Could not remove block device %s on node %s,"
9379
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9380

    
9381
    feedback_fn("Removing unneeded volumes on the primary node...")
9382
    for idx, disk in enumerate(old_disks):
9383
      meta = disk.children[1]
9384
      self.cfg.SetDiskID(meta, pnode)
9385
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9386
      if msg:
9387
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9388
                        " continuing anyway: %s", idx, pnode, msg)
9389

    
9390
  def Exec(self, feedback_fn):
9391
    """Modifies an instance.
9392

9393
    All parameters take effect only at the next restart of the instance.
9394

9395
    """
9396
    # Process here the warnings from CheckPrereq, as we don't have a
9397
    # feedback_fn there.
9398
    for warn in self.warn:
9399
      feedback_fn("WARNING: %s" % warn)
9400

    
9401
    result = []
9402
    instance = self.instance
9403
    # disk changes
9404
    for disk_op, disk_dict in self.op.disks:
9405
      if disk_op == constants.DDM_REMOVE:
9406
        # remove the last disk
9407
        device = instance.disks.pop()
9408
        device_idx = len(instance.disks)
9409
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9410
          self.cfg.SetDiskID(disk, node)
9411
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9412
          if msg:
9413
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9414
                            " continuing anyway", device_idx, node, msg)
9415
        result.append(("disk/%d" % device_idx, "remove"))
9416
      elif disk_op == constants.DDM_ADD:
9417
        # add a new disk
9418
        if instance.disk_template == constants.DT_FILE:
9419
          file_driver, file_path = instance.disks[0].logical_id
9420
          file_path = os.path.dirname(file_path)
9421
        else:
9422
          file_driver = file_path = None
9423
        disk_idx_base = len(instance.disks)
9424
        new_disk = _GenerateDiskTemplate(self,
9425
                                         instance.disk_template,
9426
                                         instance.name, instance.primary_node,
9427
                                         instance.secondary_nodes,
9428
                                         [disk_dict],
9429
                                         file_path,
9430
                                         file_driver,
9431
                                         disk_idx_base, feedback_fn)[0]
9432
        instance.disks.append(new_disk)
9433
        info = _GetInstanceInfoText(instance)
9434

    
9435
        logging.info("Creating volume %s for instance %s",
9436
                     new_disk.iv_name, instance.name)
9437
        # Note: this needs to be kept in sync with _CreateDisks
9438
        #HARDCODE
9439
        for node in instance.all_nodes:
9440
          f_create = node == instance.primary_node
9441
          try:
9442
            _CreateBlockDev(self, node, instance, new_disk,
9443
                            f_create, info, f_create)
9444
          except errors.OpExecError, err:
9445
            self.LogWarning("Failed to create volume %s (%s) on"
9446
                            " node %s: %s",
9447
                            new_disk.iv_name, new_disk, node, err)
9448
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9449
                       (new_disk.size, new_disk.mode)))
9450
      else:
9451
        # change a given disk
9452
        instance.disks[disk_op].mode = disk_dict['mode']
9453
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9454

    
9455
    if self.op.disk_template:
9456
      r_shut = _ShutdownInstanceDisks(self, instance)
9457
      if not r_shut:
9458
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9459
                                 " proceed with disk template conversion")
9460
      mode = (instance.disk_template, self.op.disk_template)
9461
      try:
9462
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9463
      except:
9464
        self.cfg.ReleaseDRBDMinors(instance.name)
9465
        raise
9466
      result.append(("disk_template", self.op.disk_template))
9467

    
9468
    # NIC changes
9469
    for nic_op, nic_dict in self.op.nics:
9470
      if nic_op == constants.DDM_REMOVE:
9471
        # remove the last nic
9472
        del instance.nics[-1]
9473
        result.append(("nic.%d" % len(instance.nics), "remove"))
9474
      elif nic_op == constants.DDM_ADD:
9475
        # mac and bridge should be set, by now
9476
        mac = nic_dict['mac']
9477
        ip = nic_dict.get('ip', None)
9478
        nicparams = self.nic_pinst[constants.DDM_ADD]
9479
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9480
        instance.nics.append(new_nic)
9481
        result.append(("nic.%d" % (len(instance.nics) - 1),
9482
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9483
                       (new_nic.mac, new_nic.ip,
9484
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9485
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9486
                       )))
9487
      else:
9488
        for key in 'mac', 'ip':
9489
          if key in nic_dict:
9490
            setattr(instance.nics[nic_op], key, nic_dict[key])
9491
        if nic_op in self.nic_pinst:
9492
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9493
        for key, val in nic_dict.iteritems():
9494
          result.append(("nic.%s/%d" % (key, nic_op), val))
9495

    
9496
    # hvparams changes
9497
    if self.op.hvparams:
9498
      instance.hvparams = self.hv_inst
9499
      for key, val in self.op.hvparams.iteritems():
9500
        result.append(("hv/%s" % key, val))
9501

    
9502
    # beparams changes
9503
    if self.op.beparams:
9504
      instance.beparams = self.be_inst
9505
      for key, val in self.op.beparams.iteritems():
9506
        result.append(("be/%s" % key, val))
9507

    
9508
    # OS change
9509
    if self.op.os_name:
9510
      instance.os = self.op.os_name
9511

    
9512
    # osparams changes
9513
    if self.op.osparams:
9514
      instance.osparams = self.os_inst
9515
      for key, val in self.op.osparams.iteritems():
9516
        result.append(("os/%s" % key, val))
9517

    
9518
    self.cfg.Update(instance, feedback_fn)
9519

    
9520
    return result
9521

    
9522
  _DISK_CONVERSIONS = {
9523
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9524
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9525
    }
9526

    
9527

    
9528
class LUBackupQuery(NoHooksLU):
9529
  """Query the exports list
9530

9531
  """
9532
  REQ_BGL = False
9533

    
9534
  def ExpandNames(self):
9535
    self.needed_locks = {}
9536
    self.share_locks[locking.LEVEL_NODE] = 1
9537
    if not self.op.nodes:
9538
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9539
    else:
9540
      self.needed_locks[locking.LEVEL_NODE] = \
9541
        _GetWantedNodes(self, self.op.nodes)
9542

    
9543
  def Exec(self, feedback_fn):
9544
    """Compute the list of all the exported system images.
9545

9546
    @rtype: dict
9547
    @return: a dictionary with the structure node->(export-list)
9548
        where export-list is a list of the instances exported on
9549
        that node.
9550

9551
    """
9552
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9553
    rpcresult = self.rpc.call_export_list(self.nodes)
9554
    result = {}
9555
    for node in rpcresult:
9556
      if rpcresult[node].fail_msg:
9557
        result[node] = False
9558
      else:
9559
        result[node] = rpcresult[node].payload
9560

    
9561
    return result
9562

    
9563

    
9564
class LUBackupPrepare(NoHooksLU):
9565
  """Prepares an instance for an export and returns useful information.
9566

9567
  """
9568
  REQ_BGL = False
9569

    
9570
  def ExpandNames(self):
9571
    self._ExpandAndLockInstance()
9572

    
9573
  def CheckPrereq(self):
9574
    """Check prerequisites.
9575

9576
    """
9577
    instance_name = self.op.instance_name
9578

    
9579
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9580
    assert self.instance is not None, \
9581
          "Cannot retrieve locked instance %s" % self.op.instance_name
9582
    _CheckNodeOnline(self, self.instance.primary_node)
9583

    
9584
    self._cds = _GetClusterDomainSecret()
9585

    
9586
  def Exec(self, feedback_fn):
9587
    """Prepares an instance for an export.
9588

9589
    """
9590
    instance = self.instance
9591

    
9592
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9593
      salt = utils.GenerateSecret(8)
9594

    
9595
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9596
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9597
                                              constants.RIE_CERT_VALIDITY)
9598
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9599

    
9600
      (name, cert_pem) = result.payload
9601

    
9602
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9603
                                             cert_pem)
9604

    
9605
      return {
9606
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9607
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9608
                          salt),
9609
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9610
        }
9611

    
9612
    return None
9613

    
9614

    
9615
class LUBackupExport(LogicalUnit):
9616
  """Export an instance to an image in the cluster.
9617

9618
  """
9619
  HPATH = "instance-export"
9620
  HTYPE = constants.HTYPE_INSTANCE
9621
  REQ_BGL = False
9622

    
9623
  def CheckArguments(self):
9624
    """Check the arguments.
9625

9626
    """
9627
    self.x509_key_name = self.op.x509_key_name
9628
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9629

    
9630
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9631
      if not self.x509_key_name:
9632
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9633
                                   errors.ECODE_INVAL)
9634

    
9635
      if not self.dest_x509_ca_pem:
9636
        raise errors.OpPrereqError("Missing destination X509 CA",
9637
                                   errors.ECODE_INVAL)
9638

    
9639
  def ExpandNames(self):
9640
    self._ExpandAndLockInstance()
9641

    
9642
    # Lock all nodes for local exports
9643
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9644
      # FIXME: lock only instance primary and destination node
9645
      #
9646
      # Sad but true, for now we have do lock all nodes, as we don't know where
9647
      # the previous export might be, and in this LU we search for it and
9648
      # remove it from its current node. In the future we could fix this by:
9649
      #  - making a tasklet to search (share-lock all), then create the
9650
      #    new one, then one to remove, after
9651
      #  - removing the removal operation altogether
9652
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9653

    
9654
  def DeclareLocks(self, level):
9655
    """Last minute lock declaration."""
9656
    # All nodes are locked anyway, so nothing to do here.
9657

    
9658
  def BuildHooksEnv(self):
9659
    """Build hooks env.
9660

9661
    This will run on the master, primary node and target node.
9662

9663
    """
9664
    env = {
9665
      "EXPORT_MODE": self.op.mode,
9666
      "EXPORT_NODE": self.op.target_node,
9667
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9668
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9669
      # TODO: Generic function for boolean env variables
9670
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9671
      }
9672

    
9673
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9674

    
9675
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9676

    
9677
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9678
      nl.append(self.op.target_node)
9679

    
9680
    return env, nl, nl
9681

    
9682
  def CheckPrereq(self):
9683
    """Check prerequisites.
9684

9685
    This checks that the instance and node names are valid.
9686

9687
    """
9688
    instance_name = self.op.instance_name
9689

    
9690
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9691
    assert self.instance is not None, \
9692
          "Cannot retrieve locked instance %s" % self.op.instance_name
9693
    _CheckNodeOnline(self, self.instance.primary_node)
9694

    
9695
    if (self.op.remove_instance and self.instance.admin_up and
9696
        not self.op.shutdown):
9697
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9698
                                 " down before")
9699

    
9700
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9701
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9702
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9703
      assert self.dst_node is not None
9704

    
9705
      _CheckNodeOnline(self, self.dst_node.name)
9706
      _CheckNodeNotDrained(self, self.dst_node.name)
9707

    
9708
      self._cds = None
9709
      self.dest_disk_info = None
9710
      self.dest_x509_ca = None
9711

    
9712
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9713
      self.dst_node = None
9714

    
9715
      if len(self.op.target_node) != len(self.instance.disks):
9716
        raise errors.OpPrereqError(("Received destination information for %s"
9717
                                    " disks, but instance %s has %s disks") %
9718
                                   (len(self.op.target_node), instance_name,
9719
                                    len(self.instance.disks)),
9720
                                   errors.ECODE_INVAL)
9721

    
9722
      cds = _GetClusterDomainSecret()
9723

    
9724
      # Check X509 key name
9725
      try:
9726
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9727
      except (TypeError, ValueError), err:
9728
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9729

    
9730
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9731
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9732
                                   errors.ECODE_INVAL)
9733

    
9734
      # Load and verify CA
9735
      try:
9736
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9737
      except OpenSSL.crypto.Error, err:
9738
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9739
                                   (err, ), errors.ECODE_INVAL)
9740

    
9741
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9742
      if errcode is not None:
9743
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9744
                                   (msg, ), errors.ECODE_INVAL)
9745

    
9746
      self.dest_x509_ca = cert
9747

    
9748
      # Verify target information
9749
      disk_info = []
9750
      for idx, disk_data in enumerate(self.op.target_node):
9751
        try:
9752
          (host, port, magic) = \
9753
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9754
        except errors.GenericError, err:
9755
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9756
                                     (idx, err), errors.ECODE_INVAL)
9757

    
9758
        disk_info.append((host, port, magic))
9759

    
9760
      assert len(disk_info) == len(self.op.target_node)
9761
      self.dest_disk_info = disk_info
9762

    
9763
    else:
9764
      raise errors.ProgrammerError("Unhandled export mode %r" %
9765
                                   self.op.mode)
9766

    
9767
    # instance disk type verification
9768
    # TODO: Implement export support for file-based disks
9769
    for disk in self.instance.disks:
9770
      if disk.dev_type == constants.LD_FILE:
9771
        raise errors.OpPrereqError("Export not supported for instances with"
9772
                                   " file-based disks", errors.ECODE_INVAL)
9773

    
9774
  def _CleanupExports(self, feedback_fn):
9775
    """Removes exports of current instance from all other nodes.
9776

9777
    If an instance in a cluster with nodes A..D was exported to node C, its
9778
    exports will be removed from the nodes A, B and D.
9779

9780
    """
9781
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
9782

    
9783
    nodelist = self.cfg.GetNodeList()
9784
    nodelist.remove(self.dst_node.name)
9785

    
9786
    # on one-node clusters nodelist will be empty after the removal
9787
    # if we proceed the backup would be removed because OpBackupQuery
9788
    # substitutes an empty list with the full cluster node list.
9789
    iname = self.instance.name
9790
    if nodelist:
9791
      feedback_fn("Removing old exports for instance %s" % iname)
9792
      exportlist = self.rpc.call_export_list(nodelist)
9793
      for node in exportlist:
9794
        if exportlist[node].fail_msg:
9795
          continue
9796
        if iname in exportlist[node].payload:
9797
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9798
          if msg:
9799
            self.LogWarning("Could not remove older export for instance %s"
9800
                            " on node %s: %s", iname, node, msg)
9801

    
9802
  def Exec(self, feedback_fn):
9803
    """Export an instance to an image in the cluster.
9804

9805
    """
9806
    assert self.op.mode in constants.EXPORT_MODES
9807

    
9808
    instance = self.instance
9809
    src_node = instance.primary_node
9810

    
9811
    if self.op.shutdown:
9812
      # shutdown the instance, but not the disks
9813
      feedback_fn("Shutting down instance %s" % instance.name)
9814
      result = self.rpc.call_instance_shutdown(src_node, instance,
9815
                                               self.op.shutdown_timeout)
9816
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9817
      result.Raise("Could not shutdown instance %s on"
9818
                   " node %s" % (instance.name, src_node))
9819

    
9820
    # set the disks ID correctly since call_instance_start needs the
9821
    # correct drbd minor to create the symlinks
9822
    for disk in instance.disks:
9823
      self.cfg.SetDiskID(disk, src_node)
9824

    
9825
    activate_disks = (not instance.admin_up)
9826

    
9827
    if activate_disks:
9828
      # Activate the instance disks if we'exporting a stopped instance
9829
      feedback_fn("Activating disks for %s" % instance.name)
9830
      _StartInstanceDisks(self, instance, None)
9831

    
9832
    try:
9833
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9834
                                                     instance)
9835

    
9836
      helper.CreateSnapshots()
9837
      try:
9838
        if (self.op.shutdown and instance.admin_up and
9839
            not self.op.remove_instance):
9840
          assert not activate_disks
9841
          feedback_fn("Starting instance %s" % instance.name)
9842
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9843
          msg = result.fail_msg
9844
          if msg:
9845
            feedback_fn("Failed to start instance: %s" % msg)
9846
            _ShutdownInstanceDisks(self, instance)
9847
            raise errors.OpExecError("Could not start instance: %s" % msg)
9848

    
9849
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
9850
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9851
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9852
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9853
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9854

    
9855
          (key_name, _, _) = self.x509_key_name
9856

    
9857
          dest_ca_pem = \
9858
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9859
                                            self.dest_x509_ca)
9860

    
9861
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9862
                                                     key_name, dest_ca_pem,
9863
                                                     timeouts)
9864
      finally:
9865
        helper.Cleanup()
9866

    
9867
      # Check for backwards compatibility
9868
      assert len(dresults) == len(instance.disks)
9869
      assert compat.all(isinstance(i, bool) for i in dresults), \
9870
             "Not all results are boolean: %r" % dresults
9871

    
9872
    finally:
9873
      if activate_disks:
9874
        feedback_fn("Deactivating disks for %s" % instance.name)
9875
        _ShutdownInstanceDisks(self, instance)
9876

    
9877
    if not (compat.all(dresults) and fin_resu):
9878
      failures = []
9879
      if not fin_resu:
9880
        failures.append("export finalization")
9881
      if not compat.all(dresults):
9882
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9883
                               if not dsk)
9884
        failures.append("disk export: disk(s) %s" % fdsk)
9885

    
9886
      raise errors.OpExecError("Export failed, errors in %s" %
9887
                               utils.CommaJoin(failures))
9888

    
9889
    # At this point, the export was successful, we can cleanup/finish
9890

    
9891
    # Remove instance if requested
9892
    if self.op.remove_instance:
9893
      feedback_fn("Removing instance %s" % instance.name)
9894
      _RemoveInstance(self, feedback_fn, instance,
9895
                      self.op.ignore_remove_failures)
9896

    
9897
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9898
      self._CleanupExports(feedback_fn)
9899

    
9900
    return fin_resu, dresults
9901

    
9902

    
9903
class LUBackupRemove(NoHooksLU):
9904
  """Remove exports related to the named instance.
9905

9906
  """
9907
  REQ_BGL = False
9908

    
9909
  def ExpandNames(self):
9910
    self.needed_locks = {}
9911
    # We need all nodes to be locked in order for RemoveExport to work, but we
9912
    # don't need to lock the instance itself, as nothing will happen to it (and
9913
    # we can remove exports also for a removed instance)
9914
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9915

    
9916
  def Exec(self, feedback_fn):
9917
    """Remove any export.
9918

9919
    """
9920
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9921
    # If the instance was not found we'll try with the name that was passed in.
9922
    # This will only work if it was an FQDN, though.
9923
    fqdn_warn = False
9924
    if not instance_name:
9925
      fqdn_warn = True
9926
      instance_name = self.op.instance_name
9927

    
9928
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9929
    exportlist = self.rpc.call_export_list(locked_nodes)
9930
    found = False
9931
    for node in exportlist:
9932
      msg = exportlist[node].fail_msg
9933
      if msg:
9934
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9935
        continue
9936
      if instance_name in exportlist[node].payload:
9937
        found = True
9938
        result = self.rpc.call_export_remove(node, instance_name)
9939
        msg = result.fail_msg
9940
        if msg:
9941
          logging.error("Could not remove export for instance %s"
9942
                        " on node %s: %s", instance_name, node, msg)
9943

    
9944
    if fqdn_warn and not found:
9945
      feedback_fn("Export not found. If trying to remove an export belonging"
9946
                  " to a deleted instance please use its Fully Qualified"
9947
                  " Domain Name.")
9948

    
9949

    
9950
class LUGroupAdd(LogicalUnit):
9951
  """Logical unit for creating node groups.
9952

9953
  """
9954
  HPATH = "group-add"
9955
  HTYPE = constants.HTYPE_GROUP
9956
  REQ_BGL = False
9957

    
9958
  def ExpandNames(self):
9959
    # We need the new group's UUID here so that we can create and acquire the
9960
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
9961
    # that it should not check whether the UUID exists in the configuration.
9962
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
9963
    self.needed_locks = {}
9964
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
9965

    
9966
  def CheckPrereq(self):
9967
    """Check prerequisites.
9968

9969
    This checks that the given group name is not an existing node group
9970
    already.
9971

9972
    """
9973
    try:
9974
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
9975
    except errors.OpPrereqError:
9976
      pass
9977
    else:
9978
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
9979
                                 " node group (UUID: %s)" %
9980
                                 (self.op.group_name, existing_uuid),
9981
                                 errors.ECODE_EXISTS)
9982

    
9983
    if self.op.ndparams:
9984
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
9985

    
9986
  def BuildHooksEnv(self):
9987
    """Build hooks env.
9988

9989
    """
9990
    env = {
9991
      "GROUP_NAME": self.op.group_name,
9992
      }
9993
    mn = self.cfg.GetMasterNode()
9994
    return env, [mn], [mn]
9995

    
9996
  def Exec(self, feedback_fn):
9997
    """Add the node group to the cluster.
9998

9999
    """
10000
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10001
                                  uuid=self.group_uuid,
10002
                                  alloc_policy=self.op.alloc_policy,
10003
                                  ndparams=self.op.ndparams)
10004

    
10005
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10006
    del self.remove_locks[locking.LEVEL_NODEGROUP]
10007

    
10008

    
10009
class LUGroupAssignNodes(NoHooksLU):
10010
  """Logical unit for assigning nodes to groups.
10011

10012
  """
10013
  REQ_BGL = False
10014

    
10015
  def ExpandNames(self):
10016
    # These raise errors.OpPrereqError on their own:
10017
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10018
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10019

    
10020
    # We want to lock all the affected nodes and groups. We have readily
10021
    # available the list of nodes, and the *destination* group. To gather the
10022
    # list of "source" groups, we need to fetch node information.
10023
    self.node_data = self.cfg.GetAllNodesInfo()
10024
    affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10025
    affected_groups.add(self.group_uuid)
10026

    
10027
    self.needed_locks = {
10028
      locking.LEVEL_NODEGROUP: list(affected_groups),
10029
      locking.LEVEL_NODE: self.op.nodes,
10030
      }
10031

    
10032
  def CheckPrereq(self):
10033
    """Check prerequisites.
10034

10035
    """
10036
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10037
    instance_data = self.cfg.GetAllInstancesInfo()
10038

    
10039
    if self.group is None:
10040
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10041
                               (self.op.group_name, self.group_uuid))
10042

    
10043
    (new_splits, previous_splits) = \
10044
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10045
                                             for node in self.op.nodes],
10046
                                            self.node_data, instance_data)
10047

    
10048
    if new_splits:
10049
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10050

    
10051
      if not self.op.force:
10052
        raise errors.OpExecError("The following instances get split by this"
10053
                                 " change and --force was not given: %s" %
10054
                                 fmt_new_splits)
10055
      else:
10056
        self.LogWarning("This operation will split the following instances: %s",
10057
                        fmt_new_splits)
10058

    
10059
        if previous_splits:
10060
          self.LogWarning("In addition, these already-split instances continue"
10061
                          " to be spit across groups: %s",
10062
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
10063

    
10064
  def Exec(self, feedback_fn):
10065
    """Assign nodes to a new group.
10066

10067
    """
10068
    for node in self.op.nodes:
10069
      self.node_data[node].group = self.group_uuid
10070

    
10071
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10072

    
10073
  @staticmethod
10074
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10075
    """Check for split instances after a node assignment.
10076

10077
    This method considers a series of node assignments as an atomic operation,
10078
    and returns information about split instances after applying the set of
10079
    changes.
10080

10081
    In particular, it returns information about newly split instances, and
10082
    instances that were already split, and remain so after the change.
10083

10084
    Only instances whose disk template is listed in constants.DTS_NET_MIRROR are
10085
    considered.
10086

10087
    @type changes: list of (node_name, new_group_uuid) pairs.
10088
    @param changes: list of node assignments to consider.
10089
    @param node_data: a dict with data for all nodes
10090
    @param instance_data: a dict with all instances to consider
10091
    @rtype: a two-tuple
10092
    @return: a list of instances that were previously okay and result split as a
10093
      consequence of this change, and a list of instances that were previously
10094
      split and this change does not fix.
10095

10096
    """
10097
    changed_nodes = dict((node, group) for node, group in changes
10098
                         if node_data[node].group != group)
10099

    
10100
    all_split_instances = set()
10101
    previously_split_instances = set()
10102

    
10103
    def InstanceNodes(instance):
10104
      return [instance.primary_node] + list(instance.secondary_nodes)
10105

    
10106
    for inst in instance_data.values():
10107
      if inst.disk_template not in constants.DTS_NET_MIRROR:
10108
        continue
10109

    
10110
      instance_nodes = InstanceNodes(inst)
10111

    
10112
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
10113
        previously_split_instances.add(inst.name)
10114

    
10115
      if len(set(changed_nodes.get(node, node_data[node].group)
10116
                 for node in instance_nodes)) > 1:
10117
        all_split_instances.add(inst.name)
10118

    
10119
    return (list(all_split_instances - previously_split_instances),
10120
            list(previously_split_instances & all_split_instances))
10121

    
10122

    
10123
class _GroupQuery(_QueryBase):
10124

    
10125
  FIELDS = query.GROUP_FIELDS
10126

    
10127
  def ExpandNames(self, lu):
10128
    lu.needed_locks = {}
10129

    
10130
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10131
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10132

    
10133
    if not self.names:
10134
      self.wanted = [name_to_uuid[name]
10135
                     for name in utils.NiceSort(name_to_uuid.keys())]
10136
    else:
10137
      # Accept names to be either names or UUIDs.
10138
      missing = []
10139
      self.wanted = []
10140
      all_uuid = frozenset(self._all_groups.keys())
10141

    
10142
      for name in self.names:
10143
        if name in all_uuid:
10144
          self.wanted.append(name)
10145
        elif name in name_to_uuid:
10146
          self.wanted.append(name_to_uuid[name])
10147
        else:
10148
          missing.append(name)
10149

    
10150
      if missing:
10151
        raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10152
                                   errors.ECODE_NOENT)
10153

    
10154
  def DeclareLocks(self, lu, level):
10155
    pass
10156

    
10157
  def _GetQueryData(self, lu):
10158
    """Computes the list of node groups and their attributes.
10159

10160
    """
10161
    do_nodes = query.GQ_NODE in self.requested_data
10162
    do_instances = query.GQ_INST in self.requested_data
10163

    
10164
    group_to_nodes = None
10165
    group_to_instances = None
10166

    
10167
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10168
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10169
    # latter GetAllInstancesInfo() is not enough, for we have to go through
10170
    # instance->node. Hence, we will need to process nodes even if we only need
10171
    # instance information.
10172
    if do_nodes or do_instances:
10173
      all_nodes = lu.cfg.GetAllNodesInfo()
10174
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10175
      node_to_group = {}
10176

    
10177
      for node in all_nodes.values():
10178
        if node.group in group_to_nodes:
10179
          group_to_nodes[node.group].append(node.name)
10180
          node_to_group[node.name] = node.group
10181

    
10182
      if do_instances:
10183
        all_instances = lu.cfg.GetAllInstancesInfo()
10184
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
10185

    
10186
        for instance in all_instances.values():
10187
          node = instance.primary_node
10188
          if node in node_to_group:
10189
            group_to_instances[node_to_group[node]].append(instance.name)
10190

    
10191
        if not do_nodes:
10192
          # Do not pass on node information if it was not requested.
10193
          group_to_nodes = None
10194

    
10195
    return query.GroupQueryData([self._all_groups[uuid]
10196
                                 for uuid in self.wanted],
10197
                                group_to_nodes, group_to_instances)
10198

    
10199

    
10200
class LUGroupQuery(NoHooksLU):
10201
  """Logical unit for querying node groups.
10202

10203
  """
10204
  REQ_BGL = False
10205

    
10206
  def CheckArguments(self):
10207
    self.gq = _GroupQuery(self.op.names, self.op.output_fields, False)
10208

    
10209
  def ExpandNames(self):
10210
    self.gq.ExpandNames(self)
10211

    
10212
  def Exec(self, feedback_fn):
10213
    return self.gq.OldStyleQuery(self)
10214

    
10215

    
10216
class LUGroupSetParams(LogicalUnit):
10217
  """Modifies the parameters of a node group.
10218

10219
  """
10220
  HPATH = "group-modify"
10221
  HTYPE = constants.HTYPE_GROUP
10222
  REQ_BGL = False
10223

    
10224
  def CheckArguments(self):
10225
    all_changes = [
10226
      self.op.ndparams,
10227
      self.op.alloc_policy,
10228
      ]
10229

    
10230
    if all_changes.count(None) == len(all_changes):
10231
      raise errors.OpPrereqError("Please pass at least one modification",
10232
                                 errors.ECODE_INVAL)
10233

    
10234
  def ExpandNames(self):
10235
    # This raises errors.OpPrereqError on its own:
10236
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10237

    
10238
    self.needed_locks = {
10239
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10240
      }
10241

    
10242
  def CheckPrereq(self):
10243
    """Check prerequisites.
10244

10245
    """
10246
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10247

    
10248
    if self.group is None:
10249
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10250
                               (self.op.group_name, self.group_uuid))
10251

    
10252
    if self.op.ndparams:
10253
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10254
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10255
      self.new_ndparams = new_ndparams
10256

    
10257
  def BuildHooksEnv(self):
10258
    """Build hooks env.
10259

10260
    """
10261
    env = {
10262
      "GROUP_NAME": self.op.group_name,
10263
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
10264
      }
10265
    mn = self.cfg.GetMasterNode()
10266
    return env, [mn], [mn]
10267

    
10268
  def Exec(self, feedback_fn):
10269
    """Modifies the node group.
10270

10271
    """
10272
    result = []
10273

    
10274
    if self.op.ndparams:
10275
      self.group.ndparams = self.new_ndparams
10276
      result.append(("ndparams", str(self.group.ndparams)))
10277

    
10278
    if self.op.alloc_policy:
10279
      self.group.alloc_policy = self.op.alloc_policy
10280

    
10281
    self.cfg.Update(self.group, feedback_fn)
10282
    return result
10283

    
10284

    
10285

    
10286
class LUGroupRemove(LogicalUnit):
10287
  HPATH = "group-remove"
10288
  HTYPE = constants.HTYPE_GROUP
10289
  REQ_BGL = False
10290

    
10291
  def ExpandNames(self):
10292
    # This will raises errors.OpPrereqError on its own:
10293
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10294
    self.needed_locks = {
10295
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10296
      }
10297

    
10298
  def CheckPrereq(self):
10299
    """Check prerequisites.
10300

10301
    This checks that the given group name exists as a node group, that is
10302
    empty (i.e., contains no nodes), and that is not the last group of the
10303
    cluster.
10304

10305
    """
10306
    # Verify that the group is empty.
10307
    group_nodes = [node.name
10308
                   for node in self.cfg.GetAllNodesInfo().values()
10309
                   if node.group == self.group_uuid]
10310

    
10311
    if group_nodes:
10312
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
10313
                                 " nodes: %s" %
10314
                                 (self.op.group_name,
10315
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
10316
                                 errors.ECODE_STATE)
10317

    
10318
    # Verify the cluster would not be left group-less.
10319
    if len(self.cfg.GetNodeGroupList()) == 1:
10320
      raise errors.OpPrereqError("Group '%s' is the last group in the cluster,"
10321
                                 " which cannot be left without at least one"
10322
                                 " group" % self.op.group_name,
10323
                                 errors.ECODE_STATE)
10324

    
10325
  def BuildHooksEnv(self):
10326
    """Build hooks env.
10327

10328
    """
10329
    env = {
10330
      "GROUP_NAME": self.op.group_name,
10331
      }
10332
    mn = self.cfg.GetMasterNode()
10333
    return env, [mn], [mn]
10334

    
10335
  def Exec(self, feedback_fn):
10336
    """Remove the node group.
10337

10338
    """
10339
    try:
10340
      self.cfg.RemoveNodeGroup(self.group_uuid)
10341
    except errors.ConfigurationError:
10342
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10343
                               (self.op.group_name, self.group_uuid))
10344

    
10345
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10346

    
10347

    
10348
class LUGroupRename(LogicalUnit):
10349
  HPATH = "group-rename"
10350
  HTYPE = constants.HTYPE_GROUP
10351
  REQ_BGL = False
10352

    
10353
  def ExpandNames(self):
10354
    # This raises errors.OpPrereqError on its own:
10355
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10356

    
10357
    self.needed_locks = {
10358
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10359
      }
10360

    
10361
  def CheckPrereq(self):
10362
    """Check prerequisites.
10363

10364
    This checks that the given old_name exists as a node group, and that
10365
    new_name doesn't.
10366

10367
    """
10368
    try:
10369
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10370
    except errors.OpPrereqError:
10371
      pass
10372
    else:
10373
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10374
                                 " node group (UUID: %s)" %
10375
                                 (self.op.new_name, new_name_uuid),
10376
                                 errors.ECODE_EXISTS)
10377

    
10378
  def BuildHooksEnv(self):
10379
    """Build hooks env.
10380

10381
    """
10382
    env = {
10383
      "OLD_NAME": self.op.old_name,
10384
      "NEW_NAME": self.op.new_name,
10385
      }
10386

    
10387
    mn = self.cfg.GetMasterNode()
10388
    all_nodes = self.cfg.GetAllNodesInfo()
10389
    run_nodes = [mn]
10390
    all_nodes.pop(mn, None)
10391

    
10392
    for node in all_nodes.values():
10393
      if node.group == self.group_uuid:
10394
        run_nodes.append(node.name)
10395

    
10396
    return env, run_nodes, run_nodes
10397

    
10398
  def Exec(self, feedback_fn):
10399
    """Rename the node group.
10400

10401
    """
10402
    group = self.cfg.GetNodeGroup(self.group_uuid)
10403

    
10404
    if group is None:
10405
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10406
                               (self.op.old_name, self.group_uuid))
10407

    
10408
    group.name = self.op.new_name
10409
    self.cfg.Update(group, feedback_fn)
10410

    
10411
    return self.op.new_name
10412

    
10413

    
10414
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10415
  """Generic tags LU.
10416

10417
  This is an abstract class which is the parent of all the other tags LUs.
10418

10419
  """
10420

    
10421
  def ExpandNames(self):
10422
    self.needed_locks = {}
10423
    if self.op.kind == constants.TAG_NODE:
10424
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10425
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
10426
    elif self.op.kind == constants.TAG_INSTANCE:
10427
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10428
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10429

    
10430
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10431
    # not possible to acquire the BGL based on opcode parameters)
10432

    
10433
  def CheckPrereq(self):
10434
    """Check prerequisites.
10435

10436
    """
10437
    if self.op.kind == constants.TAG_CLUSTER:
10438
      self.target = self.cfg.GetClusterInfo()
10439
    elif self.op.kind == constants.TAG_NODE:
10440
      self.target = self.cfg.GetNodeInfo(self.op.name)
10441
    elif self.op.kind == constants.TAG_INSTANCE:
10442
      self.target = self.cfg.GetInstanceInfo(self.op.name)
10443
    else:
10444
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10445
                                 str(self.op.kind), errors.ECODE_INVAL)
10446

    
10447

    
10448
class LUTagsGet(TagsLU):
10449
  """Returns the tags of a given object.
10450

10451
  """
10452
  REQ_BGL = False
10453

    
10454
  def ExpandNames(self):
10455
    TagsLU.ExpandNames(self)
10456

    
10457
    # Share locks as this is only a read operation
10458
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10459

    
10460
  def Exec(self, feedback_fn):
10461
    """Returns the tag list.
10462

10463
    """
10464
    return list(self.target.GetTags())
10465

    
10466

    
10467
class LUTagsSearch(NoHooksLU):
10468
  """Searches the tags for a given pattern.
10469

10470
  """
10471
  REQ_BGL = False
10472

    
10473
  def ExpandNames(self):
10474
    self.needed_locks = {}
10475

    
10476
  def CheckPrereq(self):
10477
    """Check prerequisites.
10478

10479
    This checks the pattern passed for validity by compiling it.
10480

10481
    """
10482
    try:
10483
      self.re = re.compile(self.op.pattern)
10484
    except re.error, err:
10485
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10486
                                 (self.op.pattern, err), errors.ECODE_INVAL)
10487

    
10488
  def Exec(self, feedback_fn):
10489
    """Returns the tag list.
10490

10491
    """
10492
    cfg = self.cfg
10493
    tgts = [("/cluster", cfg.GetClusterInfo())]
10494
    ilist = cfg.GetAllInstancesInfo().values()
10495
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10496
    nlist = cfg.GetAllNodesInfo().values()
10497
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10498
    results = []
10499
    for path, target in tgts:
10500
      for tag in target.GetTags():
10501
        if self.re.search(tag):
10502
          results.append((path, tag))
10503
    return results
10504

    
10505

    
10506
class LUTagsSet(TagsLU):
10507
  """Sets a tag on a given object.
10508

10509
  """
10510
  REQ_BGL = False
10511

    
10512
  def CheckPrereq(self):
10513
    """Check prerequisites.
10514

10515
    This checks the type and length of the tag name and value.
10516

10517
    """
10518
    TagsLU.CheckPrereq(self)
10519
    for tag in self.op.tags:
10520
      objects.TaggableObject.ValidateTag(tag)
10521

    
10522
  def Exec(self, feedback_fn):
10523
    """Sets the tag.
10524

10525
    """
10526
    try:
10527
      for tag in self.op.tags:
10528
        self.target.AddTag(tag)
10529
    except errors.TagError, err:
10530
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
10531
    self.cfg.Update(self.target, feedback_fn)
10532

    
10533

    
10534
class LUTagsDel(TagsLU):
10535
  """Delete a list of tags from a given object.
10536

10537
  """
10538
  REQ_BGL = False
10539

    
10540
  def CheckPrereq(self):
10541
    """Check prerequisites.
10542

10543
    This checks that we have the given tag.
10544

10545
    """
10546
    TagsLU.CheckPrereq(self)
10547
    for tag in self.op.tags:
10548
      objects.TaggableObject.ValidateTag(tag)
10549
    del_tags = frozenset(self.op.tags)
10550
    cur_tags = self.target.GetTags()
10551

    
10552
    diff_tags = del_tags - cur_tags
10553
    if diff_tags:
10554
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
10555
      raise errors.OpPrereqError("Tag(s) %s not found" %
10556
                                 (utils.CommaJoin(diff_names), ),
10557
                                 errors.ECODE_NOENT)
10558

    
10559
  def Exec(self, feedback_fn):
10560
    """Remove the tag from the object.
10561

10562
    """
10563
    for tag in self.op.tags:
10564
      self.target.RemoveTag(tag)
10565
    self.cfg.Update(self.target, feedback_fn)
10566

    
10567

    
10568
class LUTestDelay(NoHooksLU):
10569
  """Sleep for a specified amount of time.
10570

10571
  This LU sleeps on the master and/or nodes for a specified amount of
10572
  time.
10573

10574
  """
10575
  REQ_BGL = False
10576

    
10577
  def ExpandNames(self):
10578
    """Expand names and set required locks.
10579

10580
    This expands the node list, if any.
10581

10582
    """
10583
    self.needed_locks = {}
10584
    if self.op.on_nodes:
10585
      # _GetWantedNodes can be used here, but is not always appropriate to use
10586
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10587
      # more information.
10588
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10589
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10590

    
10591
  def _TestDelay(self):
10592
    """Do the actual sleep.
10593

10594
    """
10595
    if self.op.on_master:
10596
      if not utils.TestDelay(self.op.duration):
10597
        raise errors.OpExecError("Error during master delay test")
10598
    if self.op.on_nodes:
10599
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10600
      for node, node_result in result.items():
10601
        node_result.Raise("Failure during rpc call to node %s" % node)
10602

    
10603
  def Exec(self, feedback_fn):
10604
    """Execute the test delay opcode, with the wanted repetitions.
10605

10606
    """
10607
    if self.op.repeat == 0:
10608
      self._TestDelay()
10609
    else:
10610
      top_value = self.op.repeat - 1
10611
      for i in range(self.op.repeat):
10612
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10613
        self._TestDelay()
10614

    
10615

    
10616
class LUTestJqueue(NoHooksLU):
10617
  """Utility LU to test some aspects of the job queue.
10618

10619
  """
10620
  REQ_BGL = False
10621

    
10622
  # Must be lower than default timeout for WaitForJobChange to see whether it
10623
  # notices changed jobs
10624
  _CLIENT_CONNECT_TIMEOUT = 20.0
10625
  _CLIENT_CONFIRM_TIMEOUT = 60.0
10626

    
10627
  @classmethod
10628
  def _NotifyUsingSocket(cls, cb, errcls):
10629
    """Opens a Unix socket and waits for another program to connect.
10630

10631
    @type cb: callable
10632
    @param cb: Callback to send socket name to client
10633
    @type errcls: class
10634
    @param errcls: Exception class to use for errors
10635

10636
    """
10637
    # Using a temporary directory as there's no easy way to create temporary
10638
    # sockets without writing a custom loop around tempfile.mktemp and
10639
    # socket.bind
10640
    tmpdir = tempfile.mkdtemp()
10641
    try:
10642
      tmpsock = utils.PathJoin(tmpdir, "sock")
10643

    
10644
      logging.debug("Creating temporary socket at %s", tmpsock)
10645
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10646
      try:
10647
        sock.bind(tmpsock)
10648
        sock.listen(1)
10649

    
10650
        # Send details to client
10651
        cb(tmpsock)
10652

    
10653
        # Wait for client to connect before continuing
10654
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10655
        try:
10656
          (conn, _) = sock.accept()
10657
        except socket.error, err:
10658
          raise errcls("Client didn't connect in time (%s)" % err)
10659
      finally:
10660
        sock.close()
10661
    finally:
10662
      # Remove as soon as client is connected
10663
      shutil.rmtree(tmpdir)
10664

    
10665
    # Wait for client to close
10666
    try:
10667
      try:
10668
        # pylint: disable-msg=E1101
10669
        # Instance of '_socketobject' has no ... member
10670
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10671
        conn.recv(1)
10672
      except socket.error, err:
10673
        raise errcls("Client failed to confirm notification (%s)" % err)
10674
    finally:
10675
      conn.close()
10676

    
10677
  def _SendNotification(self, test, arg, sockname):
10678
    """Sends a notification to the client.
10679

10680
    @type test: string
10681
    @param test: Test name
10682
    @param arg: Test argument (depends on test)
10683
    @type sockname: string
10684
    @param sockname: Socket path
10685

10686
    """
10687
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10688

    
10689
  def _Notify(self, prereq, test, arg):
10690
    """Notifies the client of a test.
10691

10692
    @type prereq: bool
10693
    @param prereq: Whether this is a prereq-phase test
10694
    @type test: string
10695
    @param test: Test name
10696
    @param arg: Test argument (depends on test)
10697

10698
    """
10699
    if prereq:
10700
      errcls = errors.OpPrereqError
10701
    else:
10702
      errcls = errors.OpExecError
10703

    
10704
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10705
                                                  test, arg),
10706
                                   errcls)
10707

    
10708
  def CheckArguments(self):
10709
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10710
    self.expandnames_calls = 0
10711

    
10712
  def ExpandNames(self):
10713
    checkargs_calls = getattr(self, "checkargs_calls", 0)
10714
    if checkargs_calls < 1:
10715
      raise errors.ProgrammerError("CheckArguments was not called")
10716

    
10717
    self.expandnames_calls += 1
10718

    
10719
    if self.op.notify_waitlock:
10720
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
10721

    
10722
    self.LogInfo("Expanding names")
10723

    
10724
    # Get lock on master node (just to get a lock, not for a particular reason)
10725
    self.needed_locks = {
10726
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10727
      }
10728

    
10729
  def Exec(self, feedback_fn):
10730
    if self.expandnames_calls < 1:
10731
      raise errors.ProgrammerError("ExpandNames was not called")
10732

    
10733
    if self.op.notify_exec:
10734
      self._Notify(False, constants.JQT_EXEC, None)
10735

    
10736
    self.LogInfo("Executing")
10737

    
10738
    if self.op.log_messages:
10739
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10740
      for idx, msg in enumerate(self.op.log_messages):
10741
        self.LogInfo("Sending log message %s", idx + 1)
10742
        feedback_fn(constants.JQT_MSGPREFIX + msg)
10743
        # Report how many test messages have been sent
10744
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10745

    
10746
    if self.op.fail:
10747
      raise errors.OpExecError("Opcode failure was requested")
10748

    
10749
    return True
10750

    
10751

    
10752
class IAllocator(object):
10753
  """IAllocator framework.
10754

10755
  An IAllocator instance has three sets of attributes:
10756
    - cfg that is needed to query the cluster
10757
    - input data (all members of the _KEYS class attribute are required)
10758
    - four buffer attributes (in|out_data|text), that represent the
10759
      input (to the external script) in text and data structure format,
10760
      and the output from it, again in two formats
10761
    - the result variables from the script (success, info, nodes) for
10762
      easy usage
10763

10764
  """
10765
  # pylint: disable-msg=R0902
10766
  # lots of instance attributes
10767
  _ALLO_KEYS = [
10768
    "name", "mem_size", "disks", "disk_template",
10769
    "os", "tags", "nics", "vcpus", "hypervisor",
10770
    ]
10771
  _RELO_KEYS = [
10772
    "name", "relocate_from",
10773
    ]
10774
  _EVAC_KEYS = [
10775
    "evac_nodes",
10776
    ]
10777

    
10778
  def __init__(self, cfg, rpc, mode, **kwargs):
10779
    self.cfg = cfg
10780
    self.rpc = rpc
10781
    # init buffer variables
10782
    self.in_text = self.out_text = self.in_data = self.out_data = None
10783
    # init all input fields so that pylint is happy
10784
    self.mode = mode
10785
    self.mem_size = self.disks = self.disk_template = None
10786
    self.os = self.tags = self.nics = self.vcpus = None
10787
    self.hypervisor = None
10788
    self.relocate_from = None
10789
    self.name = None
10790
    self.evac_nodes = None
10791
    # computed fields
10792
    self.required_nodes = None
10793
    # init result fields
10794
    self.success = self.info = self.result = None
10795
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10796
      keyset = self._ALLO_KEYS
10797
      fn = self._AddNewInstance
10798
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10799
      keyset = self._RELO_KEYS
10800
      fn = self._AddRelocateInstance
10801
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10802
      keyset = self._EVAC_KEYS
10803
      fn = self._AddEvacuateNodes
10804
    else:
10805
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10806
                                   " IAllocator" % self.mode)
10807
    for key in kwargs:
10808
      if key not in keyset:
10809
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
10810
                                     " IAllocator" % key)
10811
      setattr(self, key, kwargs[key])
10812

    
10813
    for key in keyset:
10814
      if key not in kwargs:
10815
        raise errors.ProgrammerError("Missing input parameter '%s' to"
10816
                                     " IAllocator" % key)
10817
    self._BuildInputData(fn)
10818

    
10819
  def _ComputeClusterData(self):
10820
    """Compute the generic allocator input data.
10821

10822
    This is the data that is independent of the actual operation.
10823

10824
    """
10825
    cfg = self.cfg
10826
    cluster_info = cfg.GetClusterInfo()
10827
    # cluster data
10828
    data = {
10829
      "version": constants.IALLOCATOR_VERSION,
10830
      "cluster_name": cfg.GetClusterName(),
10831
      "cluster_tags": list(cluster_info.GetTags()),
10832
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10833
      # we don't have job IDs
10834
      }
10835
    ninfo = cfg.GetAllNodesInfo()
10836
    iinfo = cfg.GetAllInstancesInfo().values()
10837
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10838

    
10839
    # node data
10840
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
10841

    
10842
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10843
      hypervisor_name = self.hypervisor
10844
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10845
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10846
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10847
      hypervisor_name = cluster_info.enabled_hypervisors[0]
10848

    
10849
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10850
                                        hypervisor_name)
10851
    node_iinfo = \
10852
      self.rpc.call_all_instances_info(node_list,
10853
                                       cluster_info.enabled_hypervisors)
10854

    
10855
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10856

    
10857
    config_ndata = self._ComputeBasicNodeData(ninfo)
10858
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
10859
                                                 i_list, config_ndata)
10860
    assert len(data["nodes"]) == len(ninfo), \
10861
        "Incomplete node data computed"
10862

    
10863
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10864

    
10865
    self.in_data = data
10866

    
10867
  @staticmethod
10868
  def _ComputeNodeGroupData(cfg):
10869
    """Compute node groups data.
10870

10871
    """
10872
    ng = {}
10873
    for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10874
      ng[guuid] = {
10875
        "name": gdata.name,
10876
        "alloc_policy": gdata.alloc_policy,
10877
        }
10878
    return ng
10879

    
10880
  @staticmethod
10881
  def _ComputeBasicNodeData(node_cfg):
10882
    """Compute global node data.
10883

10884
    @rtype: dict
10885
    @returns: a dict of name: (node dict, node config)
10886

10887
    """
10888
    node_results = {}
10889
    for ninfo in node_cfg.values():
10890
      # fill in static (config-based) values
10891
      pnr = {
10892
        "tags": list(ninfo.GetTags()),
10893
        "primary_ip": ninfo.primary_ip,
10894
        "secondary_ip": ninfo.secondary_ip,
10895
        "offline": ninfo.offline,
10896
        "drained": ninfo.drained,
10897
        "master_candidate": ninfo.master_candidate,
10898
        "group": ninfo.group,
10899
        "master_capable": ninfo.master_capable,
10900
        "vm_capable": ninfo.vm_capable,
10901
        }
10902

    
10903
      node_results[ninfo.name] = pnr
10904

    
10905
    return node_results
10906

    
10907
  @staticmethod
10908
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
10909
                              node_results):
10910
    """Compute global node data.
10911

10912
    @param node_results: the basic node structures as filled from the config
10913

10914
    """
10915
    # make a copy of the current dict
10916
    node_results = dict(node_results)
10917
    for nname, nresult in node_data.items():
10918
      assert nname in node_results, "Missing basic data for node %s" % nname
10919
      ninfo = node_cfg[nname]
10920

    
10921
      if not (ninfo.offline or ninfo.drained):
10922
        nresult.Raise("Can't get data for node %s" % nname)
10923
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10924
                                nname)
10925
        remote_info = nresult.payload
10926

    
10927
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
10928
                     'vg_size', 'vg_free', 'cpu_total']:
10929
          if attr not in remote_info:
10930
            raise errors.OpExecError("Node '%s' didn't return attribute"
10931
                                     " '%s'" % (nname, attr))
10932
          if not isinstance(remote_info[attr], int):
10933
            raise errors.OpExecError("Node '%s' returned invalid value"
10934
                                     " for '%s': %s" %
10935
                                     (nname, attr, remote_info[attr]))
10936
        # compute memory used by primary instances
10937
        i_p_mem = i_p_up_mem = 0
10938
        for iinfo, beinfo in i_list:
10939
          if iinfo.primary_node == nname:
10940
            i_p_mem += beinfo[constants.BE_MEMORY]
10941
            if iinfo.name not in node_iinfo[nname].payload:
10942
              i_used_mem = 0
10943
            else:
10944
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10945
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10946
            remote_info['memory_free'] -= max(0, i_mem_diff)
10947

    
10948
            if iinfo.admin_up:
10949
              i_p_up_mem += beinfo[constants.BE_MEMORY]
10950

    
10951
        # compute memory used by instances
10952
        pnr_dyn = {
10953
          "total_memory": remote_info['memory_total'],
10954
          "reserved_memory": remote_info['memory_dom0'],
10955
          "free_memory": remote_info['memory_free'],
10956
          "total_disk": remote_info['vg_size'],
10957
          "free_disk": remote_info['vg_free'],
10958
          "total_cpus": remote_info['cpu_total'],
10959
          "i_pri_memory": i_p_mem,
10960
          "i_pri_up_memory": i_p_up_mem,
10961
          }
10962
        pnr_dyn.update(node_results[nname])
10963

    
10964
      node_results[nname] = pnr_dyn
10965

    
10966
    return node_results
10967

    
10968
  @staticmethod
10969
  def _ComputeInstanceData(cluster_info, i_list):
10970
    """Compute global instance data.
10971

10972
    """
10973
    instance_data = {}
10974
    for iinfo, beinfo in i_list:
10975
      nic_data = []
10976
      for nic in iinfo.nics:
10977
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10978
        nic_dict = {"mac": nic.mac,
10979
                    "ip": nic.ip,
10980
                    "mode": filled_params[constants.NIC_MODE],
10981
                    "link": filled_params[constants.NIC_LINK],
10982
                   }
10983
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10984
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10985
        nic_data.append(nic_dict)
10986
      pir = {
10987
        "tags": list(iinfo.GetTags()),
10988
        "admin_up": iinfo.admin_up,
10989
        "vcpus": beinfo[constants.BE_VCPUS],
10990
        "memory": beinfo[constants.BE_MEMORY],
10991
        "os": iinfo.os,
10992
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10993
        "nics": nic_data,
10994
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10995
        "disk_template": iinfo.disk_template,
10996
        "hypervisor": iinfo.hypervisor,
10997
        }
10998
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10999
                                                 pir["disks"])
11000
      instance_data[iinfo.name] = pir
11001

    
11002
    return instance_data
11003

    
11004
  def _AddNewInstance(self):
11005
    """Add new instance data to allocator structure.
11006

11007
    This in combination with _AllocatorGetClusterData will create the
11008
    correct structure needed as input for the allocator.
11009

11010
    The checks for the completeness of the opcode must have already been
11011
    done.
11012

11013
    """
11014
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11015

    
11016
    if self.disk_template in constants.DTS_NET_MIRROR:
11017
      self.required_nodes = 2
11018
    else:
11019
      self.required_nodes = 1
11020
    request = {
11021
      "name": self.name,
11022
      "disk_template": self.disk_template,
11023
      "tags": self.tags,
11024
      "os": self.os,
11025
      "vcpus": self.vcpus,
11026
      "memory": self.mem_size,
11027
      "disks": self.disks,
11028
      "disk_space_total": disk_space,
11029
      "nics": self.nics,
11030
      "required_nodes": self.required_nodes,
11031
      }
11032
    return request
11033

    
11034
  def _AddRelocateInstance(self):
11035
    """Add relocate instance data to allocator structure.
11036

11037
    This in combination with _IAllocatorGetClusterData will create the
11038
    correct structure needed as input for the allocator.
11039

11040
    The checks for the completeness of the opcode must have already been
11041
    done.
11042

11043
    """
11044
    instance = self.cfg.GetInstanceInfo(self.name)
11045
    if instance is None:
11046
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
11047
                                   " IAllocator" % self.name)
11048

    
11049
    if instance.disk_template not in constants.DTS_NET_MIRROR:
11050
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11051
                                 errors.ECODE_INVAL)
11052

    
11053
    if len(instance.secondary_nodes) != 1:
11054
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
11055
                                 errors.ECODE_STATE)
11056

    
11057
    self.required_nodes = 1
11058
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
11059
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11060

    
11061
    request = {
11062
      "name": self.name,
11063
      "disk_space_total": disk_space,
11064
      "required_nodes": self.required_nodes,
11065
      "relocate_from": self.relocate_from,
11066
      }
11067
    return request
11068

    
11069
  def _AddEvacuateNodes(self):
11070
    """Add evacuate nodes data to allocator structure.
11071

11072
    """
11073
    request = {
11074
      "evac_nodes": self.evac_nodes
11075
      }
11076
    return request
11077

    
11078
  def _BuildInputData(self, fn):
11079
    """Build input data structures.
11080

11081
    """
11082
    self._ComputeClusterData()
11083

    
11084
    request = fn()
11085
    request["type"] = self.mode
11086
    self.in_data["request"] = request
11087

    
11088
    self.in_text = serializer.Dump(self.in_data)
11089

    
11090
  def Run(self, name, validate=True, call_fn=None):
11091
    """Run an instance allocator and return the results.
11092

11093
    """
11094
    if call_fn is None:
11095
      call_fn = self.rpc.call_iallocator_runner
11096

    
11097
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11098
    result.Raise("Failure while running the iallocator script")
11099

    
11100
    self.out_text = result.payload
11101
    if validate:
11102
      self._ValidateResult()
11103

    
11104
  def _ValidateResult(self):
11105
    """Process the allocator results.
11106

11107
    This will process and if successful save the result in
11108
    self.out_data and the other parameters.
11109

11110
    """
11111
    try:
11112
      rdict = serializer.Load(self.out_text)
11113
    except Exception, err:
11114
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11115

    
11116
    if not isinstance(rdict, dict):
11117
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
11118

    
11119
    # TODO: remove backwards compatiblity in later versions
11120
    if "nodes" in rdict and "result" not in rdict:
11121
      rdict["result"] = rdict["nodes"]
11122
      del rdict["nodes"]
11123

    
11124
    for key in "success", "info", "result":
11125
      if key not in rdict:
11126
        raise errors.OpExecError("Can't parse iallocator results:"
11127
                                 " missing key '%s'" % key)
11128
      setattr(self, key, rdict[key])
11129

    
11130
    if not isinstance(rdict["result"], list):
11131
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11132
                               " is not a list")
11133
    self.out_data = rdict
11134

    
11135

    
11136
class LUTestAllocator(NoHooksLU):
11137
  """Run allocator tests.
11138

11139
  This LU runs the allocator tests
11140

11141
  """
11142
  def CheckPrereq(self):
11143
    """Check prerequisites.
11144

11145
    This checks the opcode parameters depending on the director and mode test.
11146

11147
    """
11148
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11149
      for attr in ["mem_size", "disks", "disk_template",
11150
                   "os", "tags", "nics", "vcpus"]:
11151
        if not hasattr(self.op, attr):
11152
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11153
                                     attr, errors.ECODE_INVAL)
11154
      iname = self.cfg.ExpandInstanceName(self.op.name)
11155
      if iname is not None:
11156
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11157
                                   iname, errors.ECODE_EXISTS)
11158
      if not isinstance(self.op.nics, list):
11159
        raise errors.OpPrereqError("Invalid parameter 'nics'",
11160
                                   errors.ECODE_INVAL)
11161
      if not isinstance(self.op.disks, list):
11162
        raise errors.OpPrereqError("Invalid parameter 'disks'",
11163
                                   errors.ECODE_INVAL)
11164
      for row in self.op.disks:
11165
        if (not isinstance(row, dict) or
11166
            "size" not in row or
11167
            not isinstance(row["size"], int) or
11168
            "mode" not in row or
11169
            row["mode"] not in ['r', 'w']):
11170
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
11171
                                     " parameter", errors.ECODE_INVAL)
11172
      if self.op.hypervisor is None:
11173
        self.op.hypervisor = self.cfg.GetHypervisorType()
11174
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11175
      fname = _ExpandInstanceName(self.cfg, self.op.name)
11176
      self.op.name = fname
11177
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11178
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11179
      if not hasattr(self.op, "evac_nodes"):
11180
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11181
                                   " opcode input", errors.ECODE_INVAL)
11182
    else:
11183
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11184
                                 self.op.mode, errors.ECODE_INVAL)
11185

    
11186
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11187
      if self.op.allocator is None:
11188
        raise errors.OpPrereqError("Missing allocator name",
11189
                                   errors.ECODE_INVAL)
11190
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11191
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
11192
                                 self.op.direction, errors.ECODE_INVAL)
11193

    
11194
  def Exec(self, feedback_fn):
11195
    """Run the allocator test.
11196

11197
    """
11198
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11199
      ial = IAllocator(self.cfg, self.rpc,
11200
                       mode=self.op.mode,
11201
                       name=self.op.name,
11202
                       mem_size=self.op.mem_size,
11203
                       disks=self.op.disks,
11204
                       disk_template=self.op.disk_template,
11205
                       os=self.op.os,
11206
                       tags=self.op.tags,
11207
                       nics=self.op.nics,
11208
                       vcpus=self.op.vcpus,
11209
                       hypervisor=self.op.hypervisor,
11210
                       )
11211
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11212
      ial = IAllocator(self.cfg, self.rpc,
11213
                       mode=self.op.mode,
11214
                       name=self.op.name,
11215
                       relocate_from=list(self.relocate_from),
11216
                       )
11217
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11218
      ial = IAllocator(self.cfg, self.rpc,
11219
                       mode=self.op.mode,
11220
                       evac_nodes=self.op.evac_nodes)
11221
    else:
11222
      raise errors.ProgrammerError("Uncatched mode %s in"
11223
                                   " LUTestAllocator.Exec", self.op.mode)
11224

    
11225
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
11226
      result = ial.in_text
11227
    else:
11228
      ial.Run(self.op.allocator, validate=False)
11229
      result = ial.out_text
11230
    return result
11231

    
11232

    
11233
#: Query type implementations
11234
_QUERY_IMPL = {
11235
  constants.QR_INSTANCE: _InstanceQuery,
11236
  constants.QR_NODE: _NodeQuery,
11237
  constants.QR_GROUP: _GroupQuery,
11238
  }
11239

    
11240

    
11241
def _GetQueryImplementation(name):
11242
  """Returns the implemtnation for a query type.
11243

11244
  @param name: Query type, must be one of L{constants.QR_OP_QUERY}
11245

11246
  """
11247
  try:
11248
    return _QUERY_IMPL[name]
11249
  except KeyError:
11250
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11251
                               errors.ECODE_INVAL)