Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 8572f1fe

History | View | Annotate | Download (392.3 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import query
58
from ganeti import qlang
59
from ganeti import opcodes
60

    
61
import ganeti.masterd.instance # pylint: disable-msg=W0611
62

    
63

    
64
def _SupportsOob(cfg, node):
65
  """Tells if node supports OOB.
66

67
  @type cfg: L{config.ConfigWriter}
68
  @param cfg: The cluster configuration
69
  @type node: L{objects.Node}
70
  @param node: The node
71
  @return: The OOB script if supported or an empty string otherwise
72

73
  """
74
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
75

    
76

    
77
# End types
78
class LogicalUnit(object):
79
  """Logical Unit base class.
80

81
  Subclasses must follow these rules:
82
    - implement ExpandNames
83
    - implement CheckPrereq (except when tasklets are used)
84
    - implement Exec (except when tasklets are used)
85
    - implement BuildHooksEnv
86
    - redefine HPATH and HTYPE
87
    - optionally redefine their run requirements:
88
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
89

90
  Note that all commands require root permissions.
91

92
  @ivar dry_run_result: the value (if any) that will be returned to the caller
93
      in dry-run mode (signalled by opcode dry_run parameter)
94

95
  """
96
  HPATH = None
97
  HTYPE = None
98
  REQ_BGL = True
99

    
100
  def __init__(self, processor, op, context, rpc):
101
    """Constructor for LogicalUnit.
102

103
    This needs to be overridden in derived classes in order to check op
104
    validity.
105

106
    """
107
    self.proc = processor
108
    self.op = op
109
    self.cfg = context.cfg
110
    self.context = context
111
    self.rpc = rpc
112
    # Dicts used to declare locking needs to mcpu
113
    self.needed_locks = None
114
    self.acquired_locks = {}
115
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
116
    self.add_locks = {}
117
    self.remove_locks = {}
118
    # Used to force good behavior when calling helper functions
119
    self.recalculate_locks = {}
120
    self.__ssh = None
121
    # logging
122
    self.Log = processor.Log # pylint: disable-msg=C0103
123
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
124
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
125
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
126
    # support for dry-run
127
    self.dry_run_result = None
128
    # support for generic debug attribute
129
    if (not hasattr(self.op, "debug_level") or
130
        not isinstance(self.op.debug_level, int)):
131
      self.op.debug_level = 0
132

    
133
    # Tasklets
134
    self.tasklets = None
135

    
136
    # Validate opcode parameters and set defaults
137
    self.op.Validate(True)
138

    
139
    self.CheckArguments()
140

    
141
  def __GetSSH(self):
142
    """Returns the SshRunner object
143

144
    """
145
    if not self.__ssh:
146
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
147
    return self.__ssh
148

    
149
  ssh = property(fget=__GetSSH)
150

    
151
  def CheckArguments(self):
152
    """Check syntactic validity for the opcode arguments.
153

154
    This method is for doing a simple syntactic check and ensure
155
    validity of opcode parameters, without any cluster-related
156
    checks. While the same can be accomplished in ExpandNames and/or
157
    CheckPrereq, doing these separate is better because:
158

159
      - ExpandNames is left as as purely a lock-related function
160
      - CheckPrereq is run after we have acquired locks (and possible
161
        waited for them)
162

163
    The function is allowed to change the self.op attribute so that
164
    later methods can no longer worry about missing parameters.
165

166
    """
167
    pass
168

    
169
  def ExpandNames(self):
170
    """Expand names for this LU.
171

172
    This method is called before starting to execute the opcode, and it should
173
    update all the parameters of the opcode to their canonical form (e.g. a
174
    short node name must be fully expanded after this method has successfully
175
    completed). This way locking, hooks, logging, etc. can work correctly.
176

177
    LUs which implement this method must also populate the self.needed_locks
178
    member, as a dict with lock levels as keys, and a list of needed lock names
179
    as values. Rules:
180

181
      - use an empty dict if you don't need any lock
182
      - if you don't need any lock at a particular level omit that level
183
      - don't put anything for the BGL level
184
      - if you want all locks at a level use locking.ALL_SET as a value
185

186
    If you need to share locks (rather than acquire them exclusively) at one
187
    level you can modify self.share_locks, setting a true value (usually 1) for
188
    that level. By default locks are not shared.
189

190
    This function can also define a list of tasklets, which then will be
191
    executed in order instead of the usual LU-level CheckPrereq and Exec
192
    functions, if those are not defined by the LU.
193

194
    Examples::
195

196
      # Acquire all nodes and one instance
197
      self.needed_locks = {
198
        locking.LEVEL_NODE: locking.ALL_SET,
199
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
200
      }
201
      # Acquire just two nodes
202
      self.needed_locks = {
203
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
204
      }
205
      # Acquire no locks
206
      self.needed_locks = {} # No, you can't leave it to the default value None
207

208
    """
209
    # The implementation of this method is mandatory only if the new LU is
210
    # concurrent, so that old LUs don't need to be changed all at the same
211
    # time.
212
    if self.REQ_BGL:
213
      self.needed_locks = {} # Exclusive LUs don't need locks.
214
    else:
215
      raise NotImplementedError
216

    
217
  def DeclareLocks(self, level):
218
    """Declare LU locking needs for a level
219

220
    While most LUs can just declare their locking needs at ExpandNames time,
221
    sometimes there's the need to calculate some locks after having acquired
222
    the ones before. This function is called just before acquiring locks at a
223
    particular level, but after acquiring the ones at lower levels, and permits
224
    such calculations. It can be used to modify self.needed_locks, and by
225
    default it does nothing.
226

227
    This function is only called if you have something already set in
228
    self.needed_locks for the level.
229

230
    @param level: Locking level which is going to be locked
231
    @type level: member of ganeti.locking.LEVELS
232

233
    """
234

    
235
  def CheckPrereq(self):
236
    """Check prerequisites for this LU.
237

238
    This method should check that the prerequisites for the execution
239
    of this LU are fulfilled. It can do internode communication, but
240
    it should be idempotent - no cluster or system changes are
241
    allowed.
242

243
    The method should raise errors.OpPrereqError in case something is
244
    not fulfilled. Its return value is ignored.
245

246
    This method should also update all the parameters of the opcode to
247
    their canonical form if it hasn't been done by ExpandNames before.
248

249
    """
250
    if self.tasklets is not None:
251
      for (idx, tl) in enumerate(self.tasklets):
252
        logging.debug("Checking prerequisites for tasklet %s/%s",
253
                      idx + 1, len(self.tasklets))
254
        tl.CheckPrereq()
255
    else:
256
      pass
257

    
258
  def Exec(self, feedback_fn):
259
    """Execute the LU.
260

261
    This method should implement the actual work. It should raise
262
    errors.OpExecError for failures that are somewhat dealt with in
263
    code, or expected.
264

265
    """
266
    if self.tasklets is not None:
267
      for (idx, tl) in enumerate(self.tasklets):
268
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
269
        tl.Exec(feedback_fn)
270
    else:
271
      raise NotImplementedError
272

    
273
  def BuildHooksEnv(self):
274
    """Build hooks environment for this LU.
275

276
    This method should return a three-node tuple consisting of: a dict
277
    containing the environment that will be used for running the
278
    specific hook for this LU, a list of node names on which the hook
279
    should run before the execution, and a list of node names on which
280
    the hook should run after the execution.
281

282
    The keys of the dict must not have 'GANETI_' prefixed as this will
283
    be handled in the hooks runner. Also note additional keys will be
284
    added by the hooks runner. If the LU doesn't define any
285
    environment, an empty dict (and not None) should be returned.
286

287
    No nodes should be returned as an empty list (and not None).
288

289
    Note that if the HPATH for a LU class is None, this function will
290
    not be called.
291

292
    """
293
    raise NotImplementedError
294

    
295
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
296
    """Notify the LU about the results of its hooks.
297

298
    This method is called every time a hooks phase is executed, and notifies
299
    the Logical Unit about the hooks' result. The LU can then use it to alter
300
    its result based on the hooks.  By default the method does nothing and the
301
    previous result is passed back unchanged but any LU can define it if it
302
    wants to use the local cluster hook-scripts somehow.
303

304
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
305
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
306
    @param hook_results: the results of the multi-node hooks rpc call
307
    @param feedback_fn: function used send feedback back to the caller
308
    @param lu_result: the previous Exec result this LU had, or None
309
        in the PRE phase
310
    @return: the new Exec result, based on the previous result
311
        and hook results
312

313
    """
314
    # API must be kept, thus we ignore the unused argument and could
315
    # be a function warnings
316
    # pylint: disable-msg=W0613,R0201
317
    return lu_result
318

    
319
  def _ExpandAndLockInstance(self):
320
    """Helper function to expand and lock an instance.
321

322
    Many LUs that work on an instance take its name in self.op.instance_name
323
    and need to expand it and then declare the expanded name for locking. This
324
    function does it, and then updates self.op.instance_name to the expanded
325
    name. It also initializes needed_locks as a dict, if this hasn't been done
326
    before.
327

328
    """
329
    if self.needed_locks is None:
330
      self.needed_locks = {}
331
    else:
332
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
333
        "_ExpandAndLockInstance called with instance-level locks set"
334
    self.op.instance_name = _ExpandInstanceName(self.cfg,
335
                                                self.op.instance_name)
336
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
337

    
338
  def _LockInstancesNodes(self, primary_only=False):
339
    """Helper function to declare instances' nodes for locking.
340

341
    This function should be called after locking one or more instances to lock
342
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
343
    with all primary or secondary nodes for instances already locked and
344
    present in self.needed_locks[locking.LEVEL_INSTANCE].
345

346
    It should be called from DeclareLocks, and for safety only works if
347
    self.recalculate_locks[locking.LEVEL_NODE] is set.
348

349
    In the future it may grow parameters to just lock some instance's nodes, or
350
    to just lock primaries or secondary nodes, if needed.
351

352
    If should be called in DeclareLocks in a way similar to::
353

354
      if level == locking.LEVEL_NODE:
355
        self._LockInstancesNodes()
356

357
    @type primary_only: boolean
358
    @param primary_only: only lock primary nodes of locked instances
359

360
    """
361
    assert locking.LEVEL_NODE in self.recalculate_locks, \
362
      "_LockInstancesNodes helper function called with no nodes to recalculate"
363

    
364
    # TODO: check if we're really been called with the instance locks held
365

    
366
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
367
    # future we might want to have different behaviors depending on the value
368
    # of self.recalculate_locks[locking.LEVEL_NODE]
369
    wanted_nodes = []
370
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
371
      instance = self.context.cfg.GetInstanceInfo(instance_name)
372
      wanted_nodes.append(instance.primary_node)
373
      if not primary_only:
374
        wanted_nodes.extend(instance.secondary_nodes)
375

    
376
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
377
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
378
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
379
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
380

    
381
    del self.recalculate_locks[locking.LEVEL_NODE]
382

    
383

    
384
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
385
  """Simple LU which runs no hooks.
386

387
  This LU is intended as a parent for other LogicalUnits which will
388
  run no hooks, in order to reduce duplicate code.
389

390
  """
391
  HPATH = None
392
  HTYPE = None
393

    
394
  def BuildHooksEnv(self):
395
    """Empty BuildHooksEnv for NoHooksLu.
396

397
    This just raises an error.
398

399
    """
400
    assert False, "BuildHooksEnv called for NoHooksLUs"
401

    
402

    
403
class Tasklet:
404
  """Tasklet base class.
405

406
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
407
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
408
  tasklets know nothing about locks.
409

410
  Subclasses must follow these rules:
411
    - Implement CheckPrereq
412
    - Implement Exec
413

414
  """
415
  def __init__(self, lu):
416
    self.lu = lu
417

    
418
    # Shortcuts
419
    self.cfg = lu.cfg
420
    self.rpc = lu.rpc
421

    
422
  def CheckPrereq(self):
423
    """Check prerequisites for this tasklets.
424

425
    This method should check whether the prerequisites for the execution of
426
    this tasklet are fulfilled. It can do internode communication, but it
427
    should be idempotent - no cluster or system changes are allowed.
428

429
    The method should raise errors.OpPrereqError in case something is not
430
    fulfilled. Its return value is ignored.
431

432
    This method should also update all parameters to their canonical form if it
433
    hasn't been done before.
434

435
    """
436
    pass
437

    
438
  def Exec(self, feedback_fn):
439
    """Execute the tasklet.
440

441
    This method should implement the actual work. It should raise
442
    errors.OpExecError for failures that are somewhat dealt with in code, or
443
    expected.
444

445
    """
446
    raise NotImplementedError
447

    
448

    
449
class _QueryBase:
450
  """Base for query utility classes.
451

452
  """
453
  #: Attribute holding field definitions
454
  FIELDS = None
455

    
456
  def __init__(self, names, fields, use_locking):
457
    """Initializes this class.
458

459
    """
460
    self.names = names
461
    self.use_locking = use_locking
462

    
463
    self.query = query.Query(self.FIELDS, fields)
464
    self.requested_data = self.query.RequestedData()
465

    
466
    self.do_locking = None
467
    self.wanted = None
468

    
469
  def _GetNames(self, lu, all_names, lock_level):
470
    """Helper function to determine names asked for in the query.
471

472
    """
473
    if self.do_locking:
474
      names = lu.acquired_locks[lock_level]
475
    else:
476
      names = all_names
477

    
478
    if self.wanted == locking.ALL_SET:
479
      assert not self.names
480
      # caller didn't specify names, so ordering is not important
481
      return utils.NiceSort(names)
482

    
483
    # caller specified names and we must keep the same order
484
    assert self.names
485
    assert not self.do_locking or lu.acquired_locks[lock_level]
486

    
487
    missing = set(self.wanted).difference(names)
488
    if missing:
489
      raise errors.OpExecError("Some items were removed before retrieving"
490
                               " their data: %s" % missing)
491

    
492
    # Return expanded names
493
    return self.wanted
494

    
495
  @classmethod
496
  def FieldsQuery(cls, fields):
497
    """Returns list of available fields.
498

499
    @return: List of L{objects.QueryFieldDefinition}
500

501
    """
502
    return query.QueryFields(cls.FIELDS, fields)
503

    
504
  def ExpandNames(self, lu):
505
    """Expand names for this query.
506

507
    See L{LogicalUnit.ExpandNames}.
508

509
    """
510
    raise NotImplementedError()
511

    
512
  def DeclareLocks(self, lu, level):
513
    """Declare locks for this query.
514

515
    See L{LogicalUnit.DeclareLocks}.
516

517
    """
518
    raise NotImplementedError()
519

    
520
  def _GetQueryData(self, lu):
521
    """Collects all data for this query.
522

523
    @return: Query data object
524

525
    """
526
    raise NotImplementedError()
527

    
528
  def NewStyleQuery(self, lu):
529
    """Collect data and execute query.
530

531
    """
532
    return query.GetQueryResponse(self.query, self._GetQueryData(lu))
533

    
534
  def OldStyleQuery(self, lu):
535
    """Collect data and execute query.
536

537
    """
538
    return self.query.OldStyleQuery(self._GetQueryData(lu))
539

    
540

    
541
def _GetWantedNodes(lu, nodes):
542
  """Returns list of checked and expanded node names.
543

544
  @type lu: L{LogicalUnit}
545
  @param lu: the logical unit on whose behalf we execute
546
  @type nodes: list
547
  @param nodes: list of node names or None for all nodes
548
  @rtype: list
549
  @return: the list of nodes, sorted
550
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
551

552
  """
553
  if nodes:
554
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
555

    
556
  return utils.NiceSort(lu.cfg.GetNodeList())
557

    
558

    
559
def _GetWantedInstances(lu, instances):
560
  """Returns list of checked and expanded instance names.
561

562
  @type lu: L{LogicalUnit}
563
  @param lu: the logical unit on whose behalf we execute
564
  @type instances: list
565
  @param instances: list of instance names or None for all instances
566
  @rtype: list
567
  @return: the list of instances, sorted
568
  @raise errors.OpPrereqError: if the instances parameter is wrong type
569
  @raise errors.OpPrereqError: if any of the passed instances is not found
570

571
  """
572
  if instances:
573
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
574
  else:
575
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
576
  return wanted
577

    
578

    
579
def _GetUpdatedParams(old_params, update_dict,
580
                      use_default=True, use_none=False):
581
  """Return the new version of a parameter dictionary.
582

583
  @type old_params: dict
584
  @param old_params: old parameters
585
  @type update_dict: dict
586
  @param update_dict: dict containing new parameter values, or
587
      constants.VALUE_DEFAULT to reset the parameter to its default
588
      value
589
  @param use_default: boolean
590
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
591
      values as 'to be deleted' values
592
  @param use_none: boolean
593
  @type use_none: whether to recognise C{None} values as 'to be
594
      deleted' values
595
  @rtype: dict
596
  @return: the new parameter dictionary
597

598
  """
599
  params_copy = copy.deepcopy(old_params)
600
  for key, val in update_dict.iteritems():
601
    if ((use_default and val == constants.VALUE_DEFAULT) or
602
        (use_none and val is None)):
603
      try:
604
        del params_copy[key]
605
      except KeyError:
606
        pass
607
    else:
608
      params_copy[key] = val
609
  return params_copy
610

    
611

    
612
def _CheckOutputFields(static, dynamic, selected):
613
  """Checks whether all selected fields are valid.
614

615
  @type static: L{utils.FieldSet}
616
  @param static: static fields set
617
  @type dynamic: L{utils.FieldSet}
618
  @param dynamic: dynamic fields set
619

620
  """
621
  f = utils.FieldSet()
622
  f.Extend(static)
623
  f.Extend(dynamic)
624

    
625
  delta = f.NonMatching(selected)
626
  if delta:
627
    raise errors.OpPrereqError("Unknown output fields selected: %s"
628
                               % ",".join(delta), errors.ECODE_INVAL)
629

    
630

    
631
def _CheckGlobalHvParams(params):
632
  """Validates that given hypervisor params are not global ones.
633

634
  This will ensure that instances don't get customised versions of
635
  global params.
636

637
  """
638
  used_globals = constants.HVC_GLOBALS.intersection(params)
639
  if used_globals:
640
    msg = ("The following hypervisor parameters are global and cannot"
641
           " be customized at instance level, please modify them at"
642
           " cluster level: %s" % utils.CommaJoin(used_globals))
643
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
644

    
645

    
646
def _CheckNodeOnline(lu, node, msg=None):
647
  """Ensure that a given node is online.
648

649
  @param lu: the LU on behalf of which we make the check
650
  @param node: the node to check
651
  @param msg: if passed, should be a message to replace the default one
652
  @raise errors.OpPrereqError: if the node is offline
653

654
  """
655
  if msg is None:
656
    msg = "Can't use offline node"
657
  if lu.cfg.GetNodeInfo(node).offline:
658
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
659

    
660

    
661
def _CheckNodeNotDrained(lu, node):
662
  """Ensure that a given node is not drained.
663

664
  @param lu: the LU on behalf of which we make the check
665
  @param node: the node to check
666
  @raise errors.OpPrereqError: if the node is drained
667

668
  """
669
  if lu.cfg.GetNodeInfo(node).drained:
670
    raise errors.OpPrereqError("Can't use drained node %s" % node,
671
                               errors.ECODE_STATE)
672

    
673

    
674
def _CheckNodeVmCapable(lu, node):
675
  """Ensure that a given node is vm capable.
676

677
  @param lu: the LU on behalf of which we make the check
678
  @param node: the node to check
679
  @raise errors.OpPrereqError: if the node is not vm capable
680

681
  """
682
  if not lu.cfg.GetNodeInfo(node).vm_capable:
683
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
684
                               errors.ECODE_STATE)
685

    
686

    
687
def _CheckNodeHasOS(lu, node, os_name, force_variant):
688
  """Ensure that a node supports a given OS.
689

690
  @param lu: the LU on behalf of which we make the check
691
  @param node: the node to check
692
  @param os_name: the OS to query about
693
  @param force_variant: whether to ignore variant errors
694
  @raise errors.OpPrereqError: if the node is not supporting the OS
695

696
  """
697
  result = lu.rpc.call_os_get(node, os_name)
698
  result.Raise("OS '%s' not in supported OS list for node %s" %
699
               (os_name, node),
700
               prereq=True, ecode=errors.ECODE_INVAL)
701
  if not force_variant:
702
    _CheckOSVariant(result.payload, os_name)
703

    
704

    
705
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
706
  """Ensure that a node has the given secondary ip.
707

708
  @type lu: L{LogicalUnit}
709
  @param lu: the LU on behalf of which we make the check
710
  @type node: string
711
  @param node: the node to check
712
  @type secondary_ip: string
713
  @param secondary_ip: the ip to check
714
  @type prereq: boolean
715
  @param prereq: whether to throw a prerequisite or an execute error
716
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
717
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
718

719
  """
720
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
721
  result.Raise("Failure checking secondary ip on node %s" % node,
722
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
723
  if not result.payload:
724
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
725
           " please fix and re-run this command" % secondary_ip)
726
    if prereq:
727
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
728
    else:
729
      raise errors.OpExecError(msg)
730

    
731

    
732
def _GetClusterDomainSecret():
733
  """Reads the cluster domain secret.
734

735
  """
736
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
737
                               strict=True)
738

    
739

    
740
def _CheckInstanceDown(lu, instance, reason):
741
  """Ensure that an instance is not running."""
742
  if instance.admin_up:
743
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
744
                               (instance.name, reason), errors.ECODE_STATE)
745

    
746
  pnode = instance.primary_node
747
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
748
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
749
              prereq=True, ecode=errors.ECODE_ENVIRON)
750

    
751
  if instance.name in ins_l.payload:
752
    raise errors.OpPrereqError("Instance %s is running, %s" %
753
                               (instance.name, reason), errors.ECODE_STATE)
754

    
755

    
756
def _ExpandItemName(fn, name, kind):
757
  """Expand an item name.
758

759
  @param fn: the function to use for expansion
760
  @param name: requested item name
761
  @param kind: text description ('Node' or 'Instance')
762
  @return: the resolved (full) name
763
  @raise errors.OpPrereqError: if the item is not found
764

765
  """
766
  full_name = fn(name)
767
  if full_name is None:
768
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
769
                               errors.ECODE_NOENT)
770
  return full_name
771

    
772

    
773
def _ExpandNodeName(cfg, name):
774
  """Wrapper over L{_ExpandItemName} for nodes."""
775
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
776

    
777

    
778
def _ExpandInstanceName(cfg, name):
779
  """Wrapper over L{_ExpandItemName} for instance."""
780
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
781

    
782

    
783
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
784
                          memory, vcpus, nics, disk_template, disks,
785
                          bep, hvp, hypervisor_name):
786
  """Builds instance related env variables for hooks
787

788
  This builds the hook environment from individual variables.
789

790
  @type name: string
791
  @param name: the name of the instance
792
  @type primary_node: string
793
  @param primary_node: the name of the instance's primary node
794
  @type secondary_nodes: list
795
  @param secondary_nodes: list of secondary nodes as strings
796
  @type os_type: string
797
  @param os_type: the name of the instance's OS
798
  @type status: boolean
799
  @param status: the should_run status of the instance
800
  @type memory: string
801
  @param memory: the memory size of the instance
802
  @type vcpus: string
803
  @param vcpus: the count of VCPUs the instance has
804
  @type nics: list
805
  @param nics: list of tuples (ip, mac, mode, link) representing
806
      the NICs the instance has
807
  @type disk_template: string
808
  @param disk_template: the disk template of the instance
809
  @type disks: list
810
  @param disks: the list of (size, mode) pairs
811
  @type bep: dict
812
  @param bep: the backend parameters for the instance
813
  @type hvp: dict
814
  @param hvp: the hypervisor parameters for the instance
815
  @type hypervisor_name: string
816
  @param hypervisor_name: the hypervisor for the instance
817
  @rtype: dict
818
  @return: the hook environment for this instance
819

820
  """
821
  if status:
822
    str_status = "up"
823
  else:
824
    str_status = "down"
825
  env = {
826
    "OP_TARGET": name,
827
    "INSTANCE_NAME": name,
828
    "INSTANCE_PRIMARY": primary_node,
829
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
830
    "INSTANCE_OS_TYPE": os_type,
831
    "INSTANCE_STATUS": str_status,
832
    "INSTANCE_MEMORY": memory,
833
    "INSTANCE_VCPUS": vcpus,
834
    "INSTANCE_DISK_TEMPLATE": disk_template,
835
    "INSTANCE_HYPERVISOR": hypervisor_name,
836
  }
837

    
838
  if nics:
839
    nic_count = len(nics)
840
    for idx, (ip, mac, mode, link) in enumerate(nics):
841
      if ip is None:
842
        ip = ""
843
      env["INSTANCE_NIC%d_IP" % idx] = ip
844
      env["INSTANCE_NIC%d_MAC" % idx] = mac
845
      env["INSTANCE_NIC%d_MODE" % idx] = mode
846
      env["INSTANCE_NIC%d_LINK" % idx] = link
847
      if mode == constants.NIC_MODE_BRIDGED:
848
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
849
  else:
850
    nic_count = 0
851

    
852
  env["INSTANCE_NIC_COUNT"] = nic_count
853

    
854
  if disks:
855
    disk_count = len(disks)
856
    for idx, (size, mode) in enumerate(disks):
857
      env["INSTANCE_DISK%d_SIZE" % idx] = size
858
      env["INSTANCE_DISK%d_MODE" % idx] = mode
859
  else:
860
    disk_count = 0
861

    
862
  env["INSTANCE_DISK_COUNT"] = disk_count
863

    
864
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
865
    for key, value in source.items():
866
      env["INSTANCE_%s_%s" % (kind, key)] = value
867

    
868
  return env
869

    
870

    
871
def _NICListToTuple(lu, nics):
872
  """Build a list of nic information tuples.
873

874
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
875
  value in LUQueryInstanceData.
876

877
  @type lu:  L{LogicalUnit}
878
  @param lu: the logical unit on whose behalf we execute
879
  @type nics: list of L{objects.NIC}
880
  @param nics: list of nics to convert to hooks tuples
881

882
  """
883
  hooks_nics = []
884
  cluster = lu.cfg.GetClusterInfo()
885
  for nic in nics:
886
    ip = nic.ip
887
    mac = nic.mac
888
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
889
    mode = filled_params[constants.NIC_MODE]
890
    link = filled_params[constants.NIC_LINK]
891
    hooks_nics.append((ip, mac, mode, link))
892
  return hooks_nics
893

    
894

    
895
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
896
  """Builds instance related env variables for hooks from an object.
897

898
  @type lu: L{LogicalUnit}
899
  @param lu: the logical unit on whose behalf we execute
900
  @type instance: L{objects.Instance}
901
  @param instance: the instance for which we should build the
902
      environment
903
  @type override: dict
904
  @param override: dictionary with key/values that will override
905
      our values
906
  @rtype: dict
907
  @return: the hook environment dictionary
908

909
  """
910
  cluster = lu.cfg.GetClusterInfo()
911
  bep = cluster.FillBE(instance)
912
  hvp = cluster.FillHV(instance)
913
  args = {
914
    'name': instance.name,
915
    'primary_node': instance.primary_node,
916
    'secondary_nodes': instance.secondary_nodes,
917
    'os_type': instance.os,
918
    'status': instance.admin_up,
919
    'memory': bep[constants.BE_MEMORY],
920
    'vcpus': bep[constants.BE_VCPUS],
921
    'nics': _NICListToTuple(lu, instance.nics),
922
    'disk_template': instance.disk_template,
923
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
924
    'bep': bep,
925
    'hvp': hvp,
926
    'hypervisor_name': instance.hypervisor,
927
  }
928
  if override:
929
    args.update(override)
930
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
931

    
932

    
933
def _AdjustCandidatePool(lu, exceptions):
934
  """Adjust the candidate pool after node operations.
935

936
  """
937
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
938
  if mod_list:
939
    lu.LogInfo("Promoted nodes to master candidate role: %s",
940
               utils.CommaJoin(node.name for node in mod_list))
941
    for name in mod_list:
942
      lu.context.ReaddNode(name)
943
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
944
  if mc_now > mc_max:
945
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
946
               (mc_now, mc_max))
947

    
948

    
949
def _DecideSelfPromotion(lu, exceptions=None):
950
  """Decide whether I should promote myself as a master candidate.
951

952
  """
953
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
954
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
955
  # the new node will increase mc_max with one, so:
956
  mc_should = min(mc_should + 1, cp_size)
957
  return mc_now < mc_should
958

    
959

    
960
def _CheckNicsBridgesExist(lu, target_nics, target_node):
961
  """Check that the brigdes needed by a list of nics exist.
962

963
  """
964
  cluster = lu.cfg.GetClusterInfo()
965
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
966
  brlist = [params[constants.NIC_LINK] for params in paramslist
967
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
968
  if brlist:
969
    result = lu.rpc.call_bridges_exist(target_node, brlist)
970
    result.Raise("Error checking bridges on destination node '%s'" %
971
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
972

    
973

    
974
def _CheckInstanceBridgesExist(lu, instance, node=None):
975
  """Check that the brigdes needed by an instance exist.
976

977
  """
978
  if node is None:
979
    node = instance.primary_node
980
  _CheckNicsBridgesExist(lu, instance.nics, node)
981

    
982

    
983
def _CheckOSVariant(os_obj, name):
984
  """Check whether an OS name conforms to the os variants specification.
985

986
  @type os_obj: L{objects.OS}
987
  @param os_obj: OS object to check
988
  @type name: string
989
  @param name: OS name passed by the user, to check for validity
990

991
  """
992
  if not os_obj.supported_variants:
993
    return
994
  variant = objects.OS.GetVariant(name)
995
  if not variant:
996
    raise errors.OpPrereqError("OS name must include a variant",
997
                               errors.ECODE_INVAL)
998

    
999
  if variant not in os_obj.supported_variants:
1000
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1001

    
1002

    
1003
def _GetNodeInstancesInner(cfg, fn):
1004
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1005

    
1006

    
1007
def _GetNodeInstances(cfg, node_name):
1008
  """Returns a list of all primary and secondary instances on a node.
1009

1010
  """
1011

    
1012
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1013

    
1014

    
1015
def _GetNodePrimaryInstances(cfg, node_name):
1016
  """Returns primary instances on a node.
1017

1018
  """
1019
  return _GetNodeInstancesInner(cfg,
1020
                                lambda inst: node_name == inst.primary_node)
1021

    
1022

    
1023
def _GetNodeSecondaryInstances(cfg, node_name):
1024
  """Returns secondary instances on a node.
1025

1026
  """
1027
  return _GetNodeInstancesInner(cfg,
1028
                                lambda inst: node_name in inst.secondary_nodes)
1029

    
1030

    
1031
def _GetStorageTypeArgs(cfg, storage_type):
1032
  """Returns the arguments for a storage type.
1033

1034
  """
1035
  # Special case for file storage
1036
  if storage_type == constants.ST_FILE:
1037
    # storage.FileStorage wants a list of storage directories
1038
    return [[cfg.GetFileStorageDir()]]
1039

    
1040
  return []
1041

    
1042

    
1043
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1044
  faulty = []
1045

    
1046
  for dev in instance.disks:
1047
    cfg.SetDiskID(dev, node_name)
1048

    
1049
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1050
  result.Raise("Failed to get disk status from node %s" % node_name,
1051
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1052

    
1053
  for idx, bdev_status in enumerate(result.payload):
1054
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1055
      faulty.append(idx)
1056

    
1057
  return faulty
1058

    
1059

    
1060
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1061
  """Check the sanity of iallocator and node arguments and use the
1062
  cluster-wide iallocator if appropriate.
1063

1064
  Check that at most one of (iallocator, node) is specified. If none is
1065
  specified, then the LU's opcode's iallocator slot is filled with the
1066
  cluster-wide default iallocator.
1067

1068
  @type iallocator_slot: string
1069
  @param iallocator_slot: the name of the opcode iallocator slot
1070
  @type node_slot: string
1071
  @param node_slot: the name of the opcode target node slot
1072

1073
  """
1074
  node = getattr(lu.op, node_slot, None)
1075
  iallocator = getattr(lu.op, iallocator_slot, None)
1076

    
1077
  if node is not None and iallocator is not None:
1078
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1079
                               errors.ECODE_INVAL)
1080
  elif node is None and iallocator is None:
1081
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1082
    if default_iallocator:
1083
      setattr(lu.op, iallocator_slot, default_iallocator)
1084
    else:
1085
      raise errors.OpPrereqError("No iallocator or node given and no"
1086
                                 " cluster-wide default iallocator found."
1087
                                 " Please specify either an iallocator or a"
1088
                                 " node, or set a cluster-wide default"
1089
                                 " iallocator.")
1090

    
1091

    
1092
class LUPostInitCluster(LogicalUnit):
1093
  """Logical unit for running hooks after cluster initialization.
1094

1095
  """
1096
  HPATH = "cluster-init"
1097
  HTYPE = constants.HTYPE_CLUSTER
1098

    
1099
  def BuildHooksEnv(self):
1100
    """Build hooks env.
1101

1102
    """
1103
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1104
    mn = self.cfg.GetMasterNode()
1105
    return env, [], [mn]
1106

    
1107
  def Exec(self, feedback_fn):
1108
    """Nothing to do.
1109

1110
    """
1111
    return True
1112

    
1113

    
1114
class LUDestroyCluster(LogicalUnit):
1115
  """Logical unit for destroying the cluster.
1116

1117
  """
1118
  HPATH = "cluster-destroy"
1119
  HTYPE = constants.HTYPE_CLUSTER
1120

    
1121
  def BuildHooksEnv(self):
1122
    """Build hooks env.
1123

1124
    """
1125
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1126
    return env, [], []
1127

    
1128
  def CheckPrereq(self):
1129
    """Check prerequisites.
1130

1131
    This checks whether the cluster is empty.
1132

1133
    Any errors are signaled by raising errors.OpPrereqError.
1134

1135
    """
1136
    master = self.cfg.GetMasterNode()
1137

    
1138
    nodelist = self.cfg.GetNodeList()
1139
    if len(nodelist) != 1 or nodelist[0] != master:
1140
      raise errors.OpPrereqError("There are still %d node(s) in"
1141
                                 " this cluster." % (len(nodelist) - 1),
1142
                                 errors.ECODE_INVAL)
1143
    instancelist = self.cfg.GetInstanceList()
1144
    if instancelist:
1145
      raise errors.OpPrereqError("There are still %d instance(s) in"
1146
                                 " this cluster." % len(instancelist),
1147
                                 errors.ECODE_INVAL)
1148

    
1149
  def Exec(self, feedback_fn):
1150
    """Destroys the cluster.
1151

1152
    """
1153
    master = self.cfg.GetMasterNode()
1154

    
1155
    # Run post hooks on master node before it's removed
1156
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1157
    try:
1158
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1159
    except:
1160
      # pylint: disable-msg=W0702
1161
      self.LogWarning("Errors occurred running hooks on %s" % master)
1162

    
1163
    result = self.rpc.call_node_stop_master(master, False)
1164
    result.Raise("Could not disable the master role")
1165

    
1166
    return master
1167

    
1168

    
1169
def _VerifyCertificate(filename):
1170
  """Verifies a certificate for LUVerifyCluster.
1171

1172
  @type filename: string
1173
  @param filename: Path to PEM file
1174

1175
  """
1176
  try:
1177
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1178
                                           utils.ReadFile(filename))
1179
  except Exception, err: # pylint: disable-msg=W0703
1180
    return (LUVerifyCluster.ETYPE_ERROR,
1181
            "Failed to load X509 certificate %s: %s" % (filename, err))
1182

    
1183
  (errcode, msg) = \
1184
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1185
                                constants.SSL_CERT_EXPIRATION_ERROR)
1186

    
1187
  if msg:
1188
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1189
  else:
1190
    fnamemsg = None
1191

    
1192
  if errcode is None:
1193
    return (None, fnamemsg)
1194
  elif errcode == utils.CERT_WARNING:
1195
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1196
  elif errcode == utils.CERT_ERROR:
1197
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1198

    
1199
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1200

    
1201

    
1202
class LUVerifyCluster(LogicalUnit):
1203
  """Verifies the cluster status.
1204

1205
  """
1206
  HPATH = "cluster-verify"
1207
  HTYPE = constants.HTYPE_CLUSTER
1208
  REQ_BGL = False
1209

    
1210
  TCLUSTER = "cluster"
1211
  TNODE = "node"
1212
  TINSTANCE = "instance"
1213

    
1214
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1215
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1216
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1217
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1218
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1219
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1220
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1221
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1222
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1223
  ENODEDRBD = (TNODE, "ENODEDRBD")
1224
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1225
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1226
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1227
  ENODEHV = (TNODE, "ENODEHV")
1228
  ENODELVM = (TNODE, "ENODELVM")
1229
  ENODEN1 = (TNODE, "ENODEN1")
1230
  ENODENET = (TNODE, "ENODENET")
1231
  ENODEOS = (TNODE, "ENODEOS")
1232
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1233
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1234
  ENODERPC = (TNODE, "ENODERPC")
1235
  ENODESSH = (TNODE, "ENODESSH")
1236
  ENODEVERSION = (TNODE, "ENODEVERSION")
1237
  ENODESETUP = (TNODE, "ENODESETUP")
1238
  ENODETIME = (TNODE, "ENODETIME")
1239
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1240

    
1241
  ETYPE_FIELD = "code"
1242
  ETYPE_ERROR = "ERROR"
1243
  ETYPE_WARNING = "WARNING"
1244

    
1245
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1246

    
1247
  class NodeImage(object):
1248
    """A class representing the logical and physical status of a node.
1249

1250
    @type name: string
1251
    @ivar name: the node name to which this object refers
1252
    @ivar volumes: a structure as returned from
1253
        L{ganeti.backend.GetVolumeList} (runtime)
1254
    @ivar instances: a list of running instances (runtime)
1255
    @ivar pinst: list of configured primary instances (config)
1256
    @ivar sinst: list of configured secondary instances (config)
1257
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1258
        of this node (config)
1259
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1260
    @ivar dfree: free disk, as reported by the node (runtime)
1261
    @ivar offline: the offline status (config)
1262
    @type rpc_fail: boolean
1263
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1264
        not whether the individual keys were correct) (runtime)
1265
    @type lvm_fail: boolean
1266
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1267
    @type hyp_fail: boolean
1268
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1269
    @type ghost: boolean
1270
    @ivar ghost: whether this is a known node or not (config)
1271
    @type os_fail: boolean
1272
    @ivar os_fail: whether the RPC call didn't return valid OS data
1273
    @type oslist: list
1274
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1275
    @type vm_capable: boolean
1276
    @ivar vm_capable: whether the node can host instances
1277

1278
    """
1279
    def __init__(self, offline=False, name=None, vm_capable=True):
1280
      self.name = name
1281
      self.volumes = {}
1282
      self.instances = []
1283
      self.pinst = []
1284
      self.sinst = []
1285
      self.sbp = {}
1286
      self.mfree = 0
1287
      self.dfree = 0
1288
      self.offline = offline
1289
      self.vm_capable = vm_capable
1290
      self.rpc_fail = False
1291
      self.lvm_fail = False
1292
      self.hyp_fail = False
1293
      self.ghost = False
1294
      self.os_fail = False
1295
      self.oslist = {}
1296

    
1297
  def ExpandNames(self):
1298
    self.needed_locks = {
1299
      locking.LEVEL_NODE: locking.ALL_SET,
1300
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1301
    }
1302
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1303

    
1304
  def _Error(self, ecode, item, msg, *args, **kwargs):
1305
    """Format an error message.
1306

1307
    Based on the opcode's error_codes parameter, either format a
1308
    parseable error code, or a simpler error string.
1309

1310
    This must be called only from Exec and functions called from Exec.
1311

1312
    """
1313
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1314
    itype, etxt = ecode
1315
    # first complete the msg
1316
    if args:
1317
      msg = msg % args
1318
    # then format the whole message
1319
    if self.op.error_codes:
1320
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1321
    else:
1322
      if item:
1323
        item = " " + item
1324
      else:
1325
        item = ""
1326
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1327
    # and finally report it via the feedback_fn
1328
    self._feedback_fn("  - %s" % msg)
1329

    
1330
  def _ErrorIf(self, cond, *args, **kwargs):
1331
    """Log an error message if the passed condition is True.
1332

1333
    """
1334
    cond = bool(cond) or self.op.debug_simulate_errors
1335
    if cond:
1336
      self._Error(*args, **kwargs)
1337
    # do not mark the operation as failed for WARN cases only
1338
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1339
      self.bad = self.bad or cond
1340

    
1341
  def _VerifyNode(self, ninfo, nresult):
1342
    """Perform some basic validation on data returned from a node.
1343

1344
      - check the result data structure is well formed and has all the
1345
        mandatory fields
1346
      - check ganeti version
1347

1348
    @type ninfo: L{objects.Node}
1349
    @param ninfo: the node to check
1350
    @param nresult: the results from the node
1351
    @rtype: boolean
1352
    @return: whether overall this call was successful (and we can expect
1353
         reasonable values in the respose)
1354

1355
    """
1356
    node = ninfo.name
1357
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1358

    
1359
    # main result, nresult should be a non-empty dict
1360
    test = not nresult or not isinstance(nresult, dict)
1361
    _ErrorIf(test, self.ENODERPC, node,
1362
                  "unable to verify node: no data returned")
1363
    if test:
1364
      return False
1365

    
1366
    # compares ganeti version
1367
    local_version = constants.PROTOCOL_VERSION
1368
    remote_version = nresult.get("version", None)
1369
    test = not (remote_version and
1370
                isinstance(remote_version, (list, tuple)) and
1371
                len(remote_version) == 2)
1372
    _ErrorIf(test, self.ENODERPC, node,
1373
             "connection to node returned invalid data")
1374
    if test:
1375
      return False
1376

    
1377
    test = local_version != remote_version[0]
1378
    _ErrorIf(test, self.ENODEVERSION, node,
1379
             "incompatible protocol versions: master %s,"
1380
             " node %s", local_version, remote_version[0])
1381
    if test:
1382
      return False
1383

    
1384
    # node seems compatible, we can actually try to look into its results
1385

    
1386
    # full package version
1387
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1388
                  self.ENODEVERSION, node,
1389
                  "software version mismatch: master %s, node %s",
1390
                  constants.RELEASE_VERSION, remote_version[1],
1391
                  code=self.ETYPE_WARNING)
1392

    
1393
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1394
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1395
      for hv_name, hv_result in hyp_result.iteritems():
1396
        test = hv_result is not None
1397
        _ErrorIf(test, self.ENODEHV, node,
1398
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1399

    
1400
    test = nresult.get(constants.NV_NODESETUP,
1401
                           ["Missing NODESETUP results"])
1402
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1403
             "; ".join(test))
1404

    
1405
    return True
1406

    
1407
  def _VerifyNodeTime(self, ninfo, nresult,
1408
                      nvinfo_starttime, nvinfo_endtime):
1409
    """Check the node time.
1410

1411
    @type ninfo: L{objects.Node}
1412
    @param ninfo: the node to check
1413
    @param nresult: the remote results for the node
1414
    @param nvinfo_starttime: the start time of the RPC call
1415
    @param nvinfo_endtime: the end time of the RPC call
1416

1417
    """
1418
    node = ninfo.name
1419
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1420

    
1421
    ntime = nresult.get(constants.NV_TIME, None)
1422
    try:
1423
      ntime_merged = utils.MergeTime(ntime)
1424
    except (ValueError, TypeError):
1425
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1426
      return
1427

    
1428
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1429
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1430
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1431
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1432
    else:
1433
      ntime_diff = None
1434

    
1435
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1436
             "Node time diverges by at least %s from master node time",
1437
             ntime_diff)
1438

    
1439
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1440
    """Check the node time.
1441

1442
    @type ninfo: L{objects.Node}
1443
    @param ninfo: the node to check
1444
    @param nresult: the remote results for the node
1445
    @param vg_name: the configured VG name
1446

1447
    """
1448
    if vg_name is None:
1449
      return
1450

    
1451
    node = ninfo.name
1452
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1453

    
1454
    # checks vg existence and size > 20G
1455
    vglist = nresult.get(constants.NV_VGLIST, None)
1456
    test = not vglist
1457
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1458
    if not test:
1459
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1460
                                            constants.MIN_VG_SIZE)
1461
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1462

    
1463
    # check pv names
1464
    pvlist = nresult.get(constants.NV_PVLIST, None)
1465
    test = pvlist is None
1466
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1467
    if not test:
1468
      # check that ':' is not present in PV names, since it's a
1469
      # special character for lvcreate (denotes the range of PEs to
1470
      # use on the PV)
1471
      for _, pvname, owner_vg in pvlist:
1472
        test = ":" in pvname
1473
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1474
                 " '%s' of VG '%s'", pvname, owner_vg)
1475

    
1476
  def _VerifyNodeNetwork(self, ninfo, nresult):
1477
    """Check the node time.
1478

1479
    @type ninfo: L{objects.Node}
1480
    @param ninfo: the node to check
1481
    @param nresult: the remote results for the node
1482

1483
    """
1484
    node = ninfo.name
1485
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1486

    
1487
    test = constants.NV_NODELIST not in nresult
1488
    _ErrorIf(test, self.ENODESSH, node,
1489
             "node hasn't returned node ssh connectivity data")
1490
    if not test:
1491
      if nresult[constants.NV_NODELIST]:
1492
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1493
          _ErrorIf(True, self.ENODESSH, node,
1494
                   "ssh communication with node '%s': %s", a_node, a_msg)
1495

    
1496
    test = constants.NV_NODENETTEST not in nresult
1497
    _ErrorIf(test, self.ENODENET, node,
1498
             "node hasn't returned node tcp connectivity data")
1499
    if not test:
1500
      if nresult[constants.NV_NODENETTEST]:
1501
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1502
        for anode in nlist:
1503
          _ErrorIf(True, self.ENODENET, node,
1504
                   "tcp communication with node '%s': %s",
1505
                   anode, nresult[constants.NV_NODENETTEST][anode])
1506

    
1507
    test = constants.NV_MASTERIP not in nresult
1508
    _ErrorIf(test, self.ENODENET, node,
1509
             "node hasn't returned node master IP reachability data")
1510
    if not test:
1511
      if not nresult[constants.NV_MASTERIP]:
1512
        if node == self.master_node:
1513
          msg = "the master node cannot reach the master IP (not configured?)"
1514
        else:
1515
          msg = "cannot reach the master IP"
1516
        _ErrorIf(True, self.ENODENET, node, msg)
1517

    
1518
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1519
                      diskstatus):
1520
    """Verify an instance.
1521

1522
    This function checks to see if the required block devices are
1523
    available on the instance's node.
1524

1525
    """
1526
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1527
    node_current = instanceconfig.primary_node
1528

    
1529
    node_vol_should = {}
1530
    instanceconfig.MapLVsByNode(node_vol_should)
1531

    
1532
    for node in node_vol_should:
1533
      n_img = node_image[node]
1534
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1535
        # ignore missing volumes on offline or broken nodes
1536
        continue
1537
      for volume in node_vol_should[node]:
1538
        test = volume not in n_img.volumes
1539
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1540
                 "volume %s missing on node %s", volume, node)
1541

    
1542
    if instanceconfig.admin_up:
1543
      pri_img = node_image[node_current]
1544
      test = instance not in pri_img.instances and not pri_img.offline
1545
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1546
               "instance not running on its primary node %s",
1547
               node_current)
1548

    
1549
    for node, n_img in node_image.items():
1550
      if (not node == node_current):
1551
        test = instance in n_img.instances
1552
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1553
                 "instance should not run on node %s", node)
1554

    
1555
    diskdata = [(nname, success, status, idx)
1556
                for (nname, disks) in diskstatus.items()
1557
                for idx, (success, status) in enumerate(disks)]
1558

    
1559
    for nname, success, bdev_status, idx in diskdata:
1560
      _ErrorIf(instanceconfig.admin_up and not success,
1561
               self.EINSTANCEFAULTYDISK, instance,
1562
               "couldn't retrieve status for disk/%s on %s: %s",
1563
               idx, nname, bdev_status)
1564
      _ErrorIf((instanceconfig.admin_up and success and
1565
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1566
               self.EINSTANCEFAULTYDISK, instance,
1567
               "disk/%s on %s is faulty", idx, nname)
1568

    
1569
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1570
    """Verify if there are any unknown volumes in the cluster.
1571

1572
    The .os, .swap and backup volumes are ignored. All other volumes are
1573
    reported as unknown.
1574

1575
    @type reserved: L{ganeti.utils.FieldSet}
1576
    @param reserved: a FieldSet of reserved volume names
1577

1578
    """
1579
    for node, n_img in node_image.items():
1580
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1581
        # skip non-healthy nodes
1582
        continue
1583
      for volume in n_img.volumes:
1584
        test = ((node not in node_vol_should or
1585
                volume not in node_vol_should[node]) and
1586
                not reserved.Matches(volume))
1587
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1588
                      "volume %s is unknown", volume)
1589

    
1590
  def _VerifyOrphanInstances(self, instancelist, node_image):
1591
    """Verify the list of running instances.
1592

1593
    This checks what instances are running but unknown to the cluster.
1594

1595
    """
1596
    for node, n_img in node_image.items():
1597
      for o_inst in n_img.instances:
1598
        test = o_inst not in instancelist
1599
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1600
                      "instance %s on node %s should not exist", o_inst, node)
1601

    
1602
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1603
    """Verify N+1 Memory Resilience.
1604

1605
    Check that if one single node dies we can still start all the
1606
    instances it was primary for.
1607

1608
    """
1609
    for node, n_img in node_image.items():
1610
      # This code checks that every node which is now listed as
1611
      # secondary has enough memory to host all instances it is
1612
      # supposed to should a single other node in the cluster fail.
1613
      # FIXME: not ready for failover to an arbitrary node
1614
      # FIXME: does not support file-backed instances
1615
      # WARNING: we currently take into account down instances as well
1616
      # as up ones, considering that even if they're down someone
1617
      # might want to start them even in the event of a node failure.
1618
      for prinode, instances in n_img.sbp.items():
1619
        needed_mem = 0
1620
        for instance in instances:
1621
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1622
          if bep[constants.BE_AUTO_BALANCE]:
1623
            needed_mem += bep[constants.BE_MEMORY]
1624
        test = n_img.mfree < needed_mem
1625
        self._ErrorIf(test, self.ENODEN1, node,
1626
                      "not enough memory to accomodate instance failovers"
1627
                      " should node %s fail", prinode)
1628

    
1629
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1630
                       master_files):
1631
    """Verifies and computes the node required file checksums.
1632

1633
    @type ninfo: L{objects.Node}
1634
    @param ninfo: the node to check
1635
    @param nresult: the remote results for the node
1636
    @param file_list: required list of files
1637
    @param local_cksum: dictionary of local files and their checksums
1638
    @param master_files: list of files that only masters should have
1639

1640
    """
1641
    node = ninfo.name
1642
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1643

    
1644
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1645
    test = not isinstance(remote_cksum, dict)
1646
    _ErrorIf(test, self.ENODEFILECHECK, node,
1647
             "node hasn't returned file checksum data")
1648
    if test:
1649
      return
1650

    
1651
    for file_name in file_list:
1652
      node_is_mc = ninfo.master_candidate
1653
      must_have = (file_name not in master_files) or node_is_mc
1654
      # missing
1655
      test1 = file_name not in remote_cksum
1656
      # invalid checksum
1657
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1658
      # existing and good
1659
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1660
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1661
               "file '%s' missing", file_name)
1662
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1663
               "file '%s' has wrong checksum", file_name)
1664
      # not candidate and this is not a must-have file
1665
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1666
               "file '%s' should not exist on non master"
1667
               " candidates (and the file is outdated)", file_name)
1668
      # all good, except non-master/non-must have combination
1669
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1670
               "file '%s' should not exist"
1671
               " on non master candidates", file_name)
1672

    
1673
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1674
                      drbd_map):
1675
    """Verifies and the node DRBD status.
1676

1677
    @type ninfo: L{objects.Node}
1678
    @param ninfo: the node to check
1679
    @param nresult: the remote results for the node
1680
    @param instanceinfo: the dict of instances
1681
    @param drbd_helper: the configured DRBD usermode helper
1682
    @param drbd_map: the DRBD map as returned by
1683
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1684

1685
    """
1686
    node = ninfo.name
1687
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1688

    
1689
    if drbd_helper:
1690
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1691
      test = (helper_result == None)
1692
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1693
               "no drbd usermode helper returned")
1694
      if helper_result:
1695
        status, payload = helper_result
1696
        test = not status
1697
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1698
                 "drbd usermode helper check unsuccessful: %s", payload)
1699
        test = status and (payload != drbd_helper)
1700
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1701
                 "wrong drbd usermode helper: %s", payload)
1702

    
1703
    # compute the DRBD minors
1704
    node_drbd = {}
1705
    for minor, instance in drbd_map[node].items():
1706
      test = instance not in instanceinfo
1707
      _ErrorIf(test, self.ECLUSTERCFG, None,
1708
               "ghost instance '%s' in temporary DRBD map", instance)
1709
        # ghost instance should not be running, but otherwise we
1710
        # don't give double warnings (both ghost instance and
1711
        # unallocated minor in use)
1712
      if test:
1713
        node_drbd[minor] = (instance, False)
1714
      else:
1715
        instance = instanceinfo[instance]
1716
        node_drbd[minor] = (instance.name, instance.admin_up)
1717

    
1718
    # and now check them
1719
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1720
    test = not isinstance(used_minors, (tuple, list))
1721
    _ErrorIf(test, self.ENODEDRBD, node,
1722
             "cannot parse drbd status file: %s", str(used_minors))
1723
    if test:
1724
      # we cannot check drbd status
1725
      return
1726

    
1727
    for minor, (iname, must_exist) in node_drbd.items():
1728
      test = minor not in used_minors and must_exist
1729
      _ErrorIf(test, self.ENODEDRBD, node,
1730
               "drbd minor %d of instance %s is not active", minor, iname)
1731
    for minor in used_minors:
1732
      test = minor not in node_drbd
1733
      _ErrorIf(test, self.ENODEDRBD, node,
1734
               "unallocated drbd minor %d is in use", minor)
1735

    
1736
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1737
    """Builds the node OS structures.
1738

1739
    @type ninfo: L{objects.Node}
1740
    @param ninfo: the node to check
1741
    @param nresult: the remote results for the node
1742
    @param nimg: the node image object
1743

1744
    """
1745
    node = ninfo.name
1746
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1747

    
1748
    remote_os = nresult.get(constants.NV_OSLIST, None)
1749
    test = (not isinstance(remote_os, list) or
1750
            not compat.all(isinstance(v, list) and len(v) == 7
1751
                           for v in remote_os))
1752

    
1753
    _ErrorIf(test, self.ENODEOS, node,
1754
             "node hasn't returned valid OS data")
1755

    
1756
    nimg.os_fail = test
1757

    
1758
    if test:
1759
      return
1760

    
1761
    os_dict = {}
1762

    
1763
    for (name, os_path, status, diagnose,
1764
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1765

    
1766
      if name not in os_dict:
1767
        os_dict[name] = []
1768

    
1769
      # parameters is a list of lists instead of list of tuples due to
1770
      # JSON lacking a real tuple type, fix it:
1771
      parameters = [tuple(v) for v in parameters]
1772
      os_dict[name].append((os_path, status, diagnose,
1773
                            set(variants), set(parameters), set(api_ver)))
1774

    
1775
    nimg.oslist = os_dict
1776

    
1777
  def _VerifyNodeOS(self, ninfo, nimg, base):
1778
    """Verifies the node OS list.
1779

1780
    @type ninfo: L{objects.Node}
1781
    @param ninfo: the node to check
1782
    @param nimg: the node image object
1783
    @param base: the 'template' node we match against (e.g. from the master)
1784

1785
    """
1786
    node = ninfo.name
1787
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1788

    
1789
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1790

    
1791
    for os_name, os_data in nimg.oslist.items():
1792
      assert os_data, "Empty OS status for OS %s?!" % os_name
1793
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1794
      _ErrorIf(not f_status, self.ENODEOS, node,
1795
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1796
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1797
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1798
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1799
      # this will catched in backend too
1800
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1801
               and not f_var, self.ENODEOS, node,
1802
               "OS %s with API at least %d does not declare any variant",
1803
               os_name, constants.OS_API_V15)
1804
      # comparisons with the 'base' image
1805
      test = os_name not in base.oslist
1806
      _ErrorIf(test, self.ENODEOS, node,
1807
               "Extra OS %s not present on reference node (%s)",
1808
               os_name, base.name)
1809
      if test:
1810
        continue
1811
      assert base.oslist[os_name], "Base node has empty OS status?"
1812
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1813
      if not b_status:
1814
        # base OS is invalid, skipping
1815
        continue
1816
      for kind, a, b in [("API version", f_api, b_api),
1817
                         ("variants list", f_var, b_var),
1818
                         ("parameters", f_param, b_param)]:
1819
        _ErrorIf(a != b, self.ENODEOS, node,
1820
                 "OS %s %s differs from reference node %s: %s vs. %s",
1821
                 kind, os_name, base.name,
1822
                 utils.CommaJoin(a), utils.CommaJoin(b))
1823

    
1824
    # check any missing OSes
1825
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1826
    _ErrorIf(missing, self.ENODEOS, node,
1827
             "OSes present on reference node %s but missing on this node: %s",
1828
             base.name, utils.CommaJoin(missing))
1829

    
1830
  def _VerifyOob(self, ninfo, nresult):
1831
    """Verifies out of band functionality of a node.
1832

1833
    @type ninfo: L{objects.Node}
1834
    @param ninfo: the node to check
1835
    @param nresult: the remote results for the node
1836

1837
    """
1838
    node = ninfo.name
1839
    # We just have to verify the paths on master and/or master candidates
1840
    # as the oob helper is invoked on the master
1841
    if ((ninfo.master_candidate or ninfo.master) and
1842
        constants.NV_OOB_PATHS in nresult):
1843
      for path_result in nresult[constants.NV_OOB_PATHS]:
1844
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1845

    
1846
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1847
    """Verifies and updates the node volume data.
1848

1849
    This function will update a L{NodeImage}'s internal structures
1850
    with data from the remote call.
1851

1852
    @type ninfo: L{objects.Node}
1853
    @param ninfo: the node to check
1854
    @param nresult: the remote results for the node
1855
    @param nimg: the node image object
1856
    @param vg_name: the configured VG name
1857

1858
    """
1859
    node = ninfo.name
1860
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1861

    
1862
    nimg.lvm_fail = True
1863
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1864
    if vg_name is None:
1865
      pass
1866
    elif isinstance(lvdata, basestring):
1867
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1868
               utils.SafeEncode(lvdata))
1869
    elif not isinstance(lvdata, dict):
1870
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1871
    else:
1872
      nimg.volumes = lvdata
1873
      nimg.lvm_fail = False
1874

    
1875
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1876
    """Verifies and updates the node instance list.
1877

1878
    If the listing was successful, then updates this node's instance
1879
    list. Otherwise, it marks the RPC call as failed for the instance
1880
    list key.
1881

1882
    @type ninfo: L{objects.Node}
1883
    @param ninfo: the node to check
1884
    @param nresult: the remote results for the node
1885
    @param nimg: the node image object
1886

1887
    """
1888
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1889
    test = not isinstance(idata, list)
1890
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1891
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1892
    if test:
1893
      nimg.hyp_fail = True
1894
    else:
1895
      nimg.instances = idata
1896

    
1897
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1898
    """Verifies and computes a node information map
1899

1900
    @type ninfo: L{objects.Node}
1901
    @param ninfo: the node to check
1902
    @param nresult: the remote results for the node
1903
    @param nimg: the node image object
1904
    @param vg_name: the configured VG name
1905

1906
    """
1907
    node = ninfo.name
1908
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1909

    
1910
    # try to read free memory (from the hypervisor)
1911
    hv_info = nresult.get(constants.NV_HVINFO, None)
1912
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1913
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1914
    if not test:
1915
      try:
1916
        nimg.mfree = int(hv_info["memory_free"])
1917
      except (ValueError, TypeError):
1918
        _ErrorIf(True, self.ENODERPC, node,
1919
                 "node returned invalid nodeinfo, check hypervisor")
1920

    
1921
    # FIXME: devise a free space model for file based instances as well
1922
    if vg_name is not None:
1923
      test = (constants.NV_VGLIST not in nresult or
1924
              vg_name not in nresult[constants.NV_VGLIST])
1925
      _ErrorIf(test, self.ENODELVM, node,
1926
               "node didn't return data for the volume group '%s'"
1927
               " - it is either missing or broken", vg_name)
1928
      if not test:
1929
        try:
1930
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1931
        except (ValueError, TypeError):
1932
          _ErrorIf(True, self.ENODERPC, node,
1933
                   "node returned invalid LVM info, check LVM status")
1934

    
1935
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1936
    """Gets per-disk status information for all instances.
1937

1938
    @type nodelist: list of strings
1939
    @param nodelist: Node names
1940
    @type node_image: dict of (name, L{objects.Node})
1941
    @param node_image: Node objects
1942
    @type instanceinfo: dict of (name, L{objects.Instance})
1943
    @param instanceinfo: Instance objects
1944
    @rtype: {instance: {node: [(succes, payload)]}}
1945
    @return: a dictionary of per-instance dictionaries with nodes as
1946
        keys and disk information as values; the disk information is a
1947
        list of tuples (success, payload)
1948

1949
    """
1950
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1951

    
1952
    node_disks = {}
1953
    node_disks_devonly = {}
1954
    diskless_instances = set()
1955
    diskless = constants.DT_DISKLESS
1956

    
1957
    for nname in nodelist:
1958
      node_instances = list(itertools.chain(node_image[nname].pinst,
1959
                                            node_image[nname].sinst))
1960
      diskless_instances.update(inst for inst in node_instances
1961
                                if instanceinfo[inst].disk_template == diskless)
1962
      disks = [(inst, disk)
1963
               for inst in node_instances
1964
               for disk in instanceinfo[inst].disks]
1965

    
1966
      if not disks:
1967
        # No need to collect data
1968
        continue
1969

    
1970
      node_disks[nname] = disks
1971

    
1972
      # Creating copies as SetDiskID below will modify the objects and that can
1973
      # lead to incorrect data returned from nodes
1974
      devonly = [dev.Copy() for (_, dev) in disks]
1975

    
1976
      for dev in devonly:
1977
        self.cfg.SetDiskID(dev, nname)
1978

    
1979
      node_disks_devonly[nname] = devonly
1980

    
1981
    assert len(node_disks) == len(node_disks_devonly)
1982

    
1983
    # Collect data from all nodes with disks
1984
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
1985
                                                          node_disks_devonly)
1986

    
1987
    assert len(result) == len(node_disks)
1988

    
1989
    instdisk = {}
1990

    
1991
    for (nname, nres) in result.items():
1992
      disks = node_disks[nname]
1993

    
1994
      if nres.offline:
1995
        # No data from this node
1996
        data = len(disks) * [(False, "node offline")]
1997
      else:
1998
        msg = nres.fail_msg
1999
        _ErrorIf(msg, self.ENODERPC, nname,
2000
                 "while getting disk information: %s", msg)
2001
        if msg:
2002
          # No data from this node
2003
          data = len(disks) * [(False, msg)]
2004
        else:
2005
          data = []
2006
          for idx, i in enumerate(nres.payload):
2007
            if isinstance(i, (tuple, list)) and len(i) == 2:
2008
              data.append(i)
2009
            else:
2010
              logging.warning("Invalid result from node %s, entry %d: %s",
2011
                              nname, idx, i)
2012
              data.append((False, "Invalid result from the remote node"))
2013

    
2014
      for ((inst, _), status) in zip(disks, data):
2015
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2016

    
2017
    # Add empty entries for diskless instances.
2018
    for inst in diskless_instances:
2019
      assert inst not in instdisk
2020
      instdisk[inst] = {}
2021

    
2022
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2023
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2024
                      compat.all(isinstance(s, (tuple, list)) and
2025
                                 len(s) == 2 for s in statuses)
2026
                      for inst, nnames in instdisk.items()
2027
                      for nname, statuses in nnames.items())
2028
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2029

    
2030
    return instdisk
2031

    
2032
  def BuildHooksEnv(self):
2033
    """Build hooks env.
2034

2035
    Cluster-Verify hooks just ran in the post phase and their failure makes
2036
    the output be logged in the verify output and the verification to fail.
2037

2038
    """
2039
    all_nodes = self.cfg.GetNodeList()
2040
    env = {
2041
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2042
      }
2043
    for node in self.cfg.GetAllNodesInfo().values():
2044
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2045

    
2046
    return env, [], all_nodes
2047

    
2048
  def Exec(self, feedback_fn):
2049
    """Verify integrity of cluster, performing various test on nodes.
2050

2051
    """
2052
    # This method has too many local variables. pylint: disable-msg=R0914
2053
    self.bad = False
2054
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2055
    verbose = self.op.verbose
2056
    self._feedback_fn = feedback_fn
2057
    feedback_fn("* Verifying global settings")
2058
    for msg in self.cfg.VerifyConfig():
2059
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2060

    
2061
    # Check the cluster certificates
2062
    for cert_filename in constants.ALL_CERT_FILES:
2063
      (errcode, msg) = _VerifyCertificate(cert_filename)
2064
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2065

    
2066
    vg_name = self.cfg.GetVGName()
2067
    drbd_helper = self.cfg.GetDRBDHelper()
2068
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2069
    cluster = self.cfg.GetClusterInfo()
2070
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2071
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2072
    nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2073
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2074
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2075
                        for iname in instancelist)
2076
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2077
    i_non_redundant = [] # Non redundant instances
2078
    i_non_a_balanced = [] # Non auto-balanced instances
2079
    n_offline = 0 # Count of offline nodes
2080
    n_drained = 0 # Count of nodes being drained
2081
    node_vol_should = {}
2082

    
2083
    # FIXME: verify OS list
2084
    # do local checksums
2085
    master_files = [constants.CLUSTER_CONF_FILE]
2086
    master_node = self.master_node = self.cfg.GetMasterNode()
2087
    master_ip = self.cfg.GetMasterIP()
2088

    
2089
    file_names = ssconf.SimpleStore().GetFileList()
2090
    file_names.extend(constants.ALL_CERT_FILES)
2091
    file_names.extend(master_files)
2092
    if cluster.modify_etc_hosts:
2093
      file_names.append(constants.ETC_HOSTS)
2094

    
2095
    local_checksums = utils.FingerprintFiles(file_names)
2096

    
2097
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2098
    node_verify_param = {
2099
      constants.NV_FILELIST: file_names,
2100
      constants.NV_NODELIST: [node.name for node in nodeinfo
2101
                              if not node.offline],
2102
      constants.NV_HYPERVISOR: hypervisors,
2103
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2104
                                  node.secondary_ip) for node in nodeinfo
2105
                                 if not node.offline],
2106
      constants.NV_INSTANCELIST: hypervisors,
2107
      constants.NV_VERSION: None,
2108
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2109
      constants.NV_NODESETUP: None,
2110
      constants.NV_TIME: None,
2111
      constants.NV_MASTERIP: (master_node, master_ip),
2112
      constants.NV_OSLIST: None,
2113
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2114
      }
2115

    
2116
    if vg_name is not None:
2117
      node_verify_param[constants.NV_VGLIST] = None
2118
      node_verify_param[constants.NV_LVLIST] = vg_name
2119
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2120
      node_verify_param[constants.NV_DRBDLIST] = None
2121

    
2122
    if drbd_helper:
2123
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2124

    
2125
    # Build our expected cluster state
2126
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2127
                                                 name=node.name,
2128
                                                 vm_capable=node.vm_capable))
2129
                      for node in nodeinfo)
2130

    
2131
    # Gather OOB paths
2132
    oob_paths = []
2133
    for node in nodeinfo:
2134
      path = _SupportsOob(self.cfg, node)
2135
      if path and path not in oob_paths:
2136
        oob_paths.append(path)
2137

    
2138
    if oob_paths:
2139
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2140

    
2141
    for instance in instancelist:
2142
      inst_config = instanceinfo[instance]
2143

    
2144
      for nname in inst_config.all_nodes:
2145
        if nname not in node_image:
2146
          # ghost node
2147
          gnode = self.NodeImage(name=nname)
2148
          gnode.ghost = True
2149
          node_image[nname] = gnode
2150

    
2151
      inst_config.MapLVsByNode(node_vol_should)
2152

    
2153
      pnode = inst_config.primary_node
2154
      node_image[pnode].pinst.append(instance)
2155

    
2156
      for snode in inst_config.secondary_nodes:
2157
        nimg = node_image[snode]
2158
        nimg.sinst.append(instance)
2159
        if pnode not in nimg.sbp:
2160
          nimg.sbp[pnode] = []
2161
        nimg.sbp[pnode].append(instance)
2162

    
2163
    # At this point, we have the in-memory data structures complete,
2164
    # except for the runtime information, which we'll gather next
2165

    
2166
    # Due to the way our RPC system works, exact response times cannot be
2167
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2168
    # time before and after executing the request, we can at least have a time
2169
    # window.
2170
    nvinfo_starttime = time.time()
2171
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2172
                                           self.cfg.GetClusterName())
2173
    nvinfo_endtime = time.time()
2174

    
2175
    all_drbd_map = self.cfg.ComputeDRBDMap()
2176

    
2177
    feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2178
    instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2179

    
2180
    feedback_fn("* Verifying node status")
2181

    
2182
    refos_img = None
2183

    
2184
    for node_i in nodeinfo:
2185
      node = node_i.name
2186
      nimg = node_image[node]
2187

    
2188
      if node_i.offline:
2189
        if verbose:
2190
          feedback_fn("* Skipping offline node %s" % (node,))
2191
        n_offline += 1
2192
        continue
2193

    
2194
      if node == master_node:
2195
        ntype = "master"
2196
      elif node_i.master_candidate:
2197
        ntype = "master candidate"
2198
      elif node_i.drained:
2199
        ntype = "drained"
2200
        n_drained += 1
2201
      else:
2202
        ntype = "regular"
2203
      if verbose:
2204
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2205

    
2206
      msg = all_nvinfo[node].fail_msg
2207
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2208
      if msg:
2209
        nimg.rpc_fail = True
2210
        continue
2211

    
2212
      nresult = all_nvinfo[node].payload
2213

    
2214
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2215
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2216
      self._VerifyNodeNetwork(node_i, nresult)
2217
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2218
                            master_files)
2219

    
2220
      self._VerifyOob(node_i, nresult)
2221

    
2222
      if nimg.vm_capable:
2223
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2224
        self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2225
                             all_drbd_map)
2226

    
2227
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2228
        self._UpdateNodeInstances(node_i, nresult, nimg)
2229
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2230
        self._UpdateNodeOS(node_i, nresult, nimg)
2231
        if not nimg.os_fail:
2232
          if refos_img is None:
2233
            refos_img = nimg
2234
          self._VerifyNodeOS(node_i, nimg, refos_img)
2235

    
2236
    feedback_fn("* Verifying instance status")
2237
    for instance in instancelist:
2238
      if verbose:
2239
        feedback_fn("* Verifying instance %s" % instance)
2240
      inst_config = instanceinfo[instance]
2241
      self._VerifyInstance(instance, inst_config, node_image,
2242
                           instdisk[instance])
2243
      inst_nodes_offline = []
2244

    
2245
      pnode = inst_config.primary_node
2246
      pnode_img = node_image[pnode]
2247
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2248
               self.ENODERPC, pnode, "instance %s, connection to"
2249
               " primary node failed", instance)
2250

    
2251
      if pnode_img.offline:
2252
        inst_nodes_offline.append(pnode)
2253

    
2254
      # If the instance is non-redundant we cannot survive losing its primary
2255
      # node, so we are not N+1 compliant. On the other hand we have no disk
2256
      # templates with more than one secondary so that situation is not well
2257
      # supported either.
2258
      # FIXME: does not support file-backed instances
2259
      if not inst_config.secondary_nodes:
2260
        i_non_redundant.append(instance)
2261

    
2262
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2263
               instance, "instance has multiple secondary nodes: %s",
2264
               utils.CommaJoin(inst_config.secondary_nodes),
2265
               code=self.ETYPE_WARNING)
2266

    
2267
      if inst_config.disk_template in constants.DTS_NET_MIRROR:
2268
        pnode = inst_config.primary_node
2269
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2270
        instance_groups = {}
2271

    
2272
        for node in instance_nodes:
2273
          instance_groups.setdefault(nodeinfo_byname[node].group,
2274
                                     []).append(node)
2275

    
2276
        pretty_list = [
2277
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2278
          # Sort so that we always list the primary node first.
2279
          for group, nodes in sorted(instance_groups.items(),
2280
                                     key=lambda (_, nodes): pnode in nodes,
2281
                                     reverse=True)]
2282

    
2283
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2284
                      instance, "instance has primary and secondary nodes in"
2285
                      " different groups: %s", utils.CommaJoin(pretty_list),
2286
                      code=self.ETYPE_WARNING)
2287

    
2288
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2289
        i_non_a_balanced.append(instance)
2290

    
2291
      for snode in inst_config.secondary_nodes:
2292
        s_img = node_image[snode]
2293
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2294
                 "instance %s, connection to secondary node failed", instance)
2295

    
2296
        if s_img.offline:
2297
          inst_nodes_offline.append(snode)
2298

    
2299
      # warn that the instance lives on offline nodes
2300
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2301
               "instance lives on offline node(s) %s",
2302
               utils.CommaJoin(inst_nodes_offline))
2303
      # ... or ghost/non-vm_capable nodes
2304
      for node in inst_config.all_nodes:
2305
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2306
                 "instance lives on ghost node %s", node)
2307
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2308
                 instance, "instance lives on non-vm_capable node %s", node)
2309

    
2310
    feedback_fn("* Verifying orphan volumes")
2311
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2312
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2313

    
2314
    feedback_fn("* Verifying orphan instances")
2315
    self._VerifyOrphanInstances(instancelist, node_image)
2316

    
2317
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2318
      feedback_fn("* Verifying N+1 Memory redundancy")
2319
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2320

    
2321
    feedback_fn("* Other Notes")
2322
    if i_non_redundant:
2323
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2324
                  % len(i_non_redundant))
2325

    
2326
    if i_non_a_balanced:
2327
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2328
                  % len(i_non_a_balanced))
2329

    
2330
    if n_offline:
2331
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2332

    
2333
    if n_drained:
2334
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2335

    
2336
    return not self.bad
2337

    
2338
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2339
    """Analyze the post-hooks' result
2340

2341
    This method analyses the hook result, handles it, and sends some
2342
    nicely-formatted feedback back to the user.
2343

2344
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2345
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2346
    @param hooks_results: the results of the multi-node hooks rpc call
2347
    @param feedback_fn: function used send feedback back to the caller
2348
    @param lu_result: previous Exec result
2349
    @return: the new Exec result, based on the previous result
2350
        and hook results
2351

2352
    """
2353
    # We only really run POST phase hooks, and are only interested in
2354
    # their results
2355
    if phase == constants.HOOKS_PHASE_POST:
2356
      # Used to change hooks' output to proper indentation
2357
      feedback_fn("* Hooks Results")
2358
      assert hooks_results, "invalid result from hooks"
2359

    
2360
      for node_name in hooks_results:
2361
        res = hooks_results[node_name]
2362
        msg = res.fail_msg
2363
        test = msg and not res.offline
2364
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2365
                      "Communication failure in hooks execution: %s", msg)
2366
        if res.offline or msg:
2367
          # No need to investigate payload if node is offline or gave an error.
2368
          # override manually lu_result here as _ErrorIf only
2369
          # overrides self.bad
2370
          lu_result = 1
2371
          continue
2372
        for script, hkr, output in res.payload:
2373
          test = hkr == constants.HKR_FAIL
2374
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2375
                        "Script %s failed, output:", script)
2376
          if test:
2377
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2378
            feedback_fn("%s" % output)
2379
            lu_result = 0
2380

    
2381
      return lu_result
2382

    
2383

    
2384
class LUVerifyDisks(NoHooksLU):
2385
  """Verifies the cluster disks status.
2386

2387
  """
2388
  REQ_BGL = False
2389

    
2390
  def ExpandNames(self):
2391
    self.needed_locks = {
2392
      locking.LEVEL_NODE: locking.ALL_SET,
2393
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2394
    }
2395
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2396

    
2397
  def Exec(self, feedback_fn):
2398
    """Verify integrity of cluster disks.
2399

2400
    @rtype: tuple of three items
2401
    @return: a tuple of (dict of node-to-node_error, list of instances
2402
        which need activate-disks, dict of instance: (node, volume) for
2403
        missing volumes
2404

2405
    """
2406
    result = res_nodes, res_instances, res_missing = {}, [], {}
2407

    
2408
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2409
    instances = [self.cfg.GetInstanceInfo(name)
2410
                 for name in self.cfg.GetInstanceList()]
2411

    
2412
    nv_dict = {}
2413
    for inst in instances:
2414
      inst_lvs = {}
2415
      if (not inst.admin_up or
2416
          inst.disk_template not in constants.DTS_NET_MIRROR):
2417
        continue
2418
      inst.MapLVsByNode(inst_lvs)
2419
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2420
      for node, vol_list in inst_lvs.iteritems():
2421
        for vol in vol_list:
2422
          nv_dict[(node, vol)] = inst
2423

    
2424
    if not nv_dict:
2425
      return result
2426

    
2427
    vg_names = self.rpc.call_vg_list(nodes)
2428
    vg_names.Raise("Cannot get list of VGs")
2429

    
2430
    for node in nodes:
2431
      # node_volume
2432
      node_res = self.rpc.call_lv_list([node],
2433
                                       vg_names[node].payload.keys())[node]
2434
      if node_res.offline:
2435
        continue
2436
      msg = node_res.fail_msg
2437
      if msg:
2438
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2439
        res_nodes[node] = msg
2440
        continue
2441

    
2442
      lvs = node_res.payload
2443
      for lv_name, (_, _, lv_online) in lvs.items():
2444
        inst = nv_dict.pop((node, lv_name), None)
2445
        if (not lv_online and inst is not None
2446
            and inst.name not in res_instances):
2447
          res_instances.append(inst.name)
2448

    
2449
    # any leftover items in nv_dict are missing LVs, let's arrange the
2450
    # data better
2451
    for key, inst in nv_dict.iteritems():
2452
      if inst.name not in res_missing:
2453
        res_missing[inst.name] = []
2454
      res_missing[inst.name].append(key)
2455

    
2456
    return result
2457

    
2458

    
2459
class LURepairDiskSizes(NoHooksLU):
2460
  """Verifies the cluster disks sizes.
2461

2462
  """
2463
  REQ_BGL = False
2464

    
2465
  def ExpandNames(self):
2466
    if self.op.instances:
2467
      self.wanted_names = []
2468
      for name in self.op.instances:
2469
        full_name = _ExpandInstanceName(self.cfg, name)
2470
        self.wanted_names.append(full_name)
2471
      self.needed_locks = {
2472
        locking.LEVEL_NODE: [],
2473
        locking.LEVEL_INSTANCE: self.wanted_names,
2474
        }
2475
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2476
    else:
2477
      self.wanted_names = None
2478
      self.needed_locks = {
2479
        locking.LEVEL_NODE: locking.ALL_SET,
2480
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2481
        }
2482
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2483

    
2484
  def DeclareLocks(self, level):
2485
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2486
      self._LockInstancesNodes(primary_only=True)
2487

    
2488
  def CheckPrereq(self):
2489
    """Check prerequisites.
2490

2491
    This only checks the optional instance list against the existing names.
2492

2493
    """
2494
    if self.wanted_names is None:
2495
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2496

    
2497
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2498
                             in self.wanted_names]
2499

    
2500
  def _EnsureChildSizes(self, disk):
2501
    """Ensure children of the disk have the needed disk size.
2502

2503
    This is valid mainly for DRBD8 and fixes an issue where the
2504
    children have smaller disk size.
2505

2506
    @param disk: an L{ganeti.objects.Disk} object
2507

2508
    """
2509
    if disk.dev_type == constants.LD_DRBD8:
2510
      assert disk.children, "Empty children for DRBD8?"
2511
      fchild = disk.children[0]
2512
      mismatch = fchild.size < disk.size
2513
      if mismatch:
2514
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2515
                     fchild.size, disk.size)
2516
        fchild.size = disk.size
2517

    
2518
      # and we recurse on this child only, not on the metadev
2519
      return self._EnsureChildSizes(fchild) or mismatch
2520
    else:
2521
      return False
2522

    
2523
  def Exec(self, feedback_fn):
2524
    """Verify the size of cluster disks.
2525

2526
    """
2527
    # TODO: check child disks too
2528
    # TODO: check differences in size between primary/secondary nodes
2529
    per_node_disks = {}
2530
    for instance in self.wanted_instances:
2531
      pnode = instance.primary_node
2532
      if pnode not in per_node_disks:
2533
        per_node_disks[pnode] = []
2534
      for idx, disk in enumerate(instance.disks):
2535
        per_node_disks[pnode].append((instance, idx, disk))
2536

    
2537
    changed = []
2538
    for node, dskl in per_node_disks.items():
2539
      newl = [v[2].Copy() for v in dskl]
2540
      for dsk in newl:
2541
        self.cfg.SetDiskID(dsk, node)
2542
      result = self.rpc.call_blockdev_getsizes(node, newl)
2543
      if result.fail_msg:
2544
        self.LogWarning("Failure in blockdev_getsizes call to node"
2545
                        " %s, ignoring", node)
2546
        continue
2547
      if len(result.data) != len(dskl):
2548
        self.LogWarning("Invalid result from node %s, ignoring node results",
2549
                        node)
2550
        continue
2551
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2552
        if size is None:
2553
          self.LogWarning("Disk %d of instance %s did not return size"
2554
                          " information, ignoring", idx, instance.name)
2555
          continue
2556
        if not isinstance(size, (int, long)):
2557
          self.LogWarning("Disk %d of instance %s did not return valid"
2558
                          " size information, ignoring", idx, instance.name)
2559
          continue
2560
        size = size >> 20
2561
        if size != disk.size:
2562
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2563
                       " correcting: recorded %d, actual %d", idx,
2564
                       instance.name, disk.size, size)
2565
          disk.size = size
2566
          self.cfg.Update(instance, feedback_fn)
2567
          changed.append((instance.name, idx, size))
2568
        if self._EnsureChildSizes(disk):
2569
          self.cfg.Update(instance, feedback_fn)
2570
          changed.append((instance.name, idx, disk.size))
2571
    return changed
2572

    
2573

    
2574
class LURenameCluster(LogicalUnit):
2575
  """Rename the cluster.
2576

2577
  """
2578
  HPATH = "cluster-rename"
2579
  HTYPE = constants.HTYPE_CLUSTER
2580

    
2581
  def BuildHooksEnv(self):
2582
    """Build hooks env.
2583

2584
    """
2585
    env = {
2586
      "OP_TARGET": self.cfg.GetClusterName(),
2587
      "NEW_NAME": self.op.name,
2588
      }
2589
    mn = self.cfg.GetMasterNode()
2590
    all_nodes = self.cfg.GetNodeList()
2591
    return env, [mn], all_nodes
2592

    
2593
  def CheckPrereq(self):
2594
    """Verify that the passed name is a valid one.
2595

2596
    """
2597
    hostname = netutils.GetHostname(name=self.op.name,
2598
                                    family=self.cfg.GetPrimaryIPFamily())
2599

    
2600
    new_name = hostname.name
2601
    self.ip = new_ip = hostname.ip
2602
    old_name = self.cfg.GetClusterName()
2603
    old_ip = self.cfg.GetMasterIP()
2604
    if new_name == old_name and new_ip == old_ip:
2605
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2606
                                 " cluster has changed",
2607
                                 errors.ECODE_INVAL)
2608
    if new_ip != old_ip:
2609
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2610
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2611
                                   " reachable on the network" %
2612
                                   new_ip, errors.ECODE_NOTUNIQUE)
2613

    
2614
    self.op.name = new_name
2615

    
2616
  def Exec(self, feedback_fn):
2617
    """Rename the cluster.
2618

2619
    """
2620
    clustername = self.op.name
2621
    ip = self.ip
2622

    
2623
    # shutdown the master IP
2624
    master = self.cfg.GetMasterNode()
2625
    result = self.rpc.call_node_stop_master(master, False)
2626
    result.Raise("Could not disable the master role")
2627

    
2628
    try:
2629
      cluster = self.cfg.GetClusterInfo()
2630
      cluster.cluster_name = clustername
2631
      cluster.master_ip = ip
2632
      self.cfg.Update(cluster, feedback_fn)
2633

    
2634
      # update the known hosts file
2635
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2636
      node_list = self.cfg.GetOnlineNodeList()
2637
      try:
2638
        node_list.remove(master)
2639
      except ValueError:
2640
        pass
2641
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2642
    finally:
2643
      result = self.rpc.call_node_start_master(master, False, False)
2644
      msg = result.fail_msg
2645
      if msg:
2646
        self.LogWarning("Could not re-enable the master role on"
2647
                        " the master, please restart manually: %s", msg)
2648

    
2649
    return clustername
2650

    
2651

    
2652
class LUSetClusterParams(LogicalUnit):
2653
  """Change the parameters of the cluster.
2654

2655
  """
2656
  HPATH = "cluster-modify"
2657
  HTYPE = constants.HTYPE_CLUSTER
2658
  REQ_BGL = False
2659

    
2660
  def CheckArguments(self):
2661
    """Check parameters
2662

2663
    """
2664
    if self.op.uid_pool:
2665
      uidpool.CheckUidPool(self.op.uid_pool)
2666

    
2667
    if self.op.add_uids:
2668
      uidpool.CheckUidPool(self.op.add_uids)
2669

    
2670
    if self.op.remove_uids:
2671
      uidpool.CheckUidPool(self.op.remove_uids)
2672

    
2673
  def ExpandNames(self):
2674
    # FIXME: in the future maybe other cluster params won't require checking on
2675
    # all nodes to be modified.
2676
    self.needed_locks = {
2677
      locking.LEVEL_NODE: locking.ALL_SET,
2678
    }
2679
    self.share_locks[locking.LEVEL_NODE] = 1
2680

    
2681
  def BuildHooksEnv(self):
2682
    """Build hooks env.
2683

2684
    """
2685
    env = {
2686
      "OP_TARGET": self.cfg.GetClusterName(),
2687
      "NEW_VG_NAME": self.op.vg_name,
2688
      }
2689
    mn = self.cfg.GetMasterNode()
2690
    return env, [mn], [mn]
2691

    
2692
  def CheckPrereq(self):
2693
    """Check prerequisites.
2694

2695
    This checks whether the given params don't conflict and
2696
    if the given volume group is valid.
2697

2698
    """
2699
    if self.op.vg_name is not None and not self.op.vg_name:
2700
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2701
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2702
                                   " instances exist", errors.ECODE_INVAL)
2703

    
2704
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2705
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2706
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2707
                                   " drbd-based instances exist",
2708
                                   errors.ECODE_INVAL)
2709

    
2710
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2711

    
2712
    # if vg_name not None, checks given volume group on all nodes
2713
    if self.op.vg_name:
2714
      vglist = self.rpc.call_vg_list(node_list)
2715
      for node in node_list:
2716
        msg = vglist[node].fail_msg
2717
        if msg:
2718
          # ignoring down node
2719
          self.LogWarning("Error while gathering data on node %s"
2720
                          " (ignoring node): %s", node, msg)
2721
          continue
2722
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2723
                                              self.op.vg_name,
2724
                                              constants.MIN_VG_SIZE)
2725
        if vgstatus:
2726
          raise errors.OpPrereqError("Error on node '%s': %s" %
2727
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2728

    
2729
    if self.op.drbd_helper:
2730
      # checks given drbd helper on all nodes
2731
      helpers = self.rpc.call_drbd_helper(node_list)
2732
      for node in node_list:
2733
        ninfo = self.cfg.GetNodeInfo(node)
2734
        if ninfo.offline:
2735
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2736
          continue
2737
        msg = helpers[node].fail_msg
2738
        if msg:
2739
          raise errors.OpPrereqError("Error checking drbd helper on node"
2740
                                     " '%s': %s" % (node, msg),
2741
                                     errors.ECODE_ENVIRON)
2742
        node_helper = helpers[node].payload
2743
        if node_helper != self.op.drbd_helper:
2744
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2745
                                     (node, node_helper), errors.ECODE_ENVIRON)
2746

    
2747
    self.cluster = cluster = self.cfg.GetClusterInfo()
2748
    # validate params changes
2749
    if self.op.beparams:
2750
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2751
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2752

    
2753
    if self.op.ndparams:
2754
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2755
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2756

    
2757
    if self.op.nicparams:
2758
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2759
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2760
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2761
      nic_errors = []
2762

    
2763
      # check all instances for consistency
2764
      for instance in self.cfg.GetAllInstancesInfo().values():
2765
        for nic_idx, nic in enumerate(instance.nics):
2766
          params_copy = copy.deepcopy(nic.nicparams)
2767
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2768

    
2769
          # check parameter syntax
2770
          try:
2771
            objects.NIC.CheckParameterSyntax(params_filled)
2772
          except errors.ConfigurationError, err:
2773
            nic_errors.append("Instance %s, nic/%d: %s" %
2774
                              (instance.name, nic_idx, err))
2775

    
2776
          # if we're moving instances to routed, check that they have an ip
2777
          target_mode = params_filled[constants.NIC_MODE]
2778
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2779
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2780
                              (instance.name, nic_idx))
2781
      if nic_errors:
2782
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2783
                                   "\n".join(nic_errors))
2784

    
2785
    # hypervisor list/parameters
2786
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2787
    if self.op.hvparams:
2788
      for hv_name, hv_dict in self.op.hvparams.items():
2789
        if hv_name not in self.new_hvparams:
2790
          self.new_hvparams[hv_name] = hv_dict
2791
        else:
2792
          self.new_hvparams[hv_name].update(hv_dict)
2793

    
2794
    # os hypervisor parameters
2795
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2796
    if self.op.os_hvp:
2797
      for os_name, hvs in self.op.os_hvp.items():
2798
        if os_name not in self.new_os_hvp:
2799
          self.new_os_hvp[os_name] = hvs
2800
        else:
2801
          for hv_name, hv_dict in hvs.items():
2802
            if hv_name not in self.new_os_hvp[os_name]:
2803
              self.new_os_hvp[os_name][hv_name] = hv_dict
2804
            else:
2805
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2806

    
2807
    # os parameters
2808
    self.new_osp = objects.FillDict(cluster.osparams, {})
2809
    if self.op.osparams:
2810
      for os_name, osp in self.op.osparams.items():
2811
        if os_name not in self.new_osp:
2812
          self.new_osp[os_name] = {}
2813

    
2814
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2815
                                                  use_none=True)
2816

    
2817
        if not self.new_osp[os_name]:
2818
          # we removed all parameters
2819
          del self.new_osp[os_name]
2820
        else:
2821
          # check the parameter validity (remote check)
2822
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2823
                         os_name, self.new_osp[os_name])
2824

    
2825
    # changes to the hypervisor list
2826
    if self.op.enabled_hypervisors is not None:
2827
      self.hv_list = self.op.enabled_hypervisors
2828
      for hv in self.hv_list:
2829
        # if the hypervisor doesn't already exist in the cluster
2830
        # hvparams, we initialize it to empty, and then (in both
2831
        # cases) we make sure to fill the defaults, as we might not
2832
        # have a complete defaults list if the hypervisor wasn't
2833
        # enabled before
2834
        if hv not in new_hvp:
2835
          new_hvp[hv] = {}
2836
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2837
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2838
    else:
2839
      self.hv_list = cluster.enabled_hypervisors
2840

    
2841
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2842
      # either the enabled list has changed, or the parameters have, validate
2843
      for hv_name, hv_params in self.new_hvparams.items():
2844
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2845
            (self.op.enabled_hypervisors and
2846
             hv_name in self.op.enabled_hypervisors)):
2847
          # either this is a new hypervisor, or its parameters have changed
2848
          hv_class = hypervisor.GetHypervisor(hv_name)
2849
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2850
          hv_class.CheckParameterSyntax(hv_params)
2851
          _CheckHVParams(self, node_list, hv_name, hv_params)
2852

    
2853
    if self.op.os_hvp:
2854
      # no need to check any newly-enabled hypervisors, since the
2855
      # defaults have already been checked in the above code-block
2856
      for os_name, os_hvp in self.new_os_hvp.items():
2857
        for hv_name, hv_params in os_hvp.items():
2858
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2859
          # we need to fill in the new os_hvp on top of the actual hv_p
2860
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2861
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2862
          hv_class = hypervisor.GetHypervisor(hv_name)
2863
          hv_class.CheckParameterSyntax(new_osp)
2864
          _CheckHVParams(self, node_list, hv_name, new_osp)
2865

    
2866
    if self.op.default_iallocator:
2867
      alloc_script = utils.FindFile(self.op.default_iallocator,
2868
                                    constants.IALLOCATOR_SEARCH_PATH,
2869
                                    os.path.isfile)
2870
      if alloc_script is None:
2871
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2872
                                   " specified" % self.op.default_iallocator,
2873
                                   errors.ECODE_INVAL)
2874

    
2875
  def Exec(self, feedback_fn):
2876
    """Change the parameters of the cluster.
2877

2878
    """
2879
    if self.op.vg_name is not None:
2880
      new_volume = self.op.vg_name
2881
      if not new_volume:
2882
        new_volume = None
2883
      if new_volume != self.cfg.GetVGName():
2884
        self.cfg.SetVGName(new_volume)
2885
      else:
2886
        feedback_fn("Cluster LVM configuration already in desired"
2887
                    " state, not changing")
2888
    if self.op.drbd_helper is not None:
2889
      new_helper = self.op.drbd_helper
2890
      if not new_helper:
2891
        new_helper = None
2892
      if new_helper != self.cfg.GetDRBDHelper():
2893
        self.cfg.SetDRBDHelper(new_helper)
2894
      else:
2895
        feedback_fn("Cluster DRBD helper already in desired state,"
2896
                    " not changing")
2897
    if self.op.hvparams:
2898
      self.cluster.hvparams = self.new_hvparams
2899
    if self.op.os_hvp:
2900
      self.cluster.os_hvp = self.new_os_hvp
2901
    if self.op.enabled_hypervisors is not None:
2902
      self.cluster.hvparams = self.new_hvparams
2903
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2904
    if self.op.beparams:
2905
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2906
    if self.op.nicparams:
2907
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2908
    if self.op.osparams:
2909
      self.cluster.osparams = self.new_osp
2910
    if self.op.ndparams:
2911
      self.cluster.ndparams = self.new_ndparams
2912

    
2913
    if self.op.candidate_pool_size is not None:
2914
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2915
      # we need to update the pool size here, otherwise the save will fail
2916
      _AdjustCandidatePool(self, [])
2917

    
2918
    if self.op.maintain_node_health is not None:
2919
      self.cluster.maintain_node_health = self.op.maintain_node_health
2920

    
2921
    if self.op.prealloc_wipe_disks is not None:
2922
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2923

    
2924
    if self.op.add_uids is not None:
2925
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2926

    
2927
    if self.op.remove_uids is not None:
2928
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2929

    
2930
    if self.op.uid_pool is not None:
2931
      self.cluster.uid_pool = self.op.uid_pool
2932

    
2933
    if self.op.default_iallocator is not None:
2934
      self.cluster.default_iallocator = self.op.default_iallocator
2935

    
2936
    if self.op.reserved_lvs is not None:
2937
      self.cluster.reserved_lvs = self.op.reserved_lvs
2938

    
2939
    def helper_os(aname, mods, desc):
2940
      desc += " OS list"
2941
      lst = getattr(self.cluster, aname)
2942
      for key, val in mods:
2943
        if key == constants.DDM_ADD:
2944
          if val in lst:
2945
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2946
          else:
2947
            lst.append(val)
2948
        elif key == constants.DDM_REMOVE:
2949
          if val in lst:
2950
            lst.remove(val)
2951
          else:
2952
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
2953
        else:
2954
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
2955

    
2956
    if self.op.hidden_os:
2957
      helper_os("hidden_os", self.op.hidden_os, "hidden")
2958

    
2959
    if self.op.blacklisted_os:
2960
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2961

    
2962
    if self.op.master_netdev:
2963
      master = self.cfg.GetMasterNode()
2964
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
2965
                  self.cluster.master_netdev)
2966
      result = self.rpc.call_node_stop_master(master, False)
2967
      result.Raise("Could not disable the master ip")
2968
      feedback_fn("Changing master_netdev from %s to %s" %
2969
                  (self.cluster.master_netdev, self.op.master_netdev))
2970
      self.cluster.master_netdev = self.op.master_netdev
2971

    
2972
    self.cfg.Update(self.cluster, feedback_fn)
2973

    
2974
    if self.op.master_netdev:
2975
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
2976
                  self.op.master_netdev)
2977
      result = self.rpc.call_node_start_master(master, False, False)
2978
      if result.fail_msg:
2979
        self.LogWarning("Could not re-enable the master ip on"
2980
                        " the master, please restart manually: %s",
2981
                        result.fail_msg)
2982

    
2983

    
2984
def _UploadHelper(lu, nodes, fname):
2985
  """Helper for uploading a file and showing warnings.
2986

2987
  """
2988
  if os.path.exists(fname):
2989
    result = lu.rpc.call_upload_file(nodes, fname)
2990
    for to_node, to_result in result.items():
2991
      msg = to_result.fail_msg
2992
      if msg:
2993
        msg = ("Copy of file %s to node %s failed: %s" %
2994
               (fname, to_node, msg))
2995
        lu.proc.LogWarning(msg)
2996

    
2997

    
2998
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
2999
  """Distribute additional files which are part of the cluster configuration.
3000

3001
  ConfigWriter takes care of distributing the config and ssconf files, but
3002
  there are more files which should be distributed to all nodes. This function
3003
  makes sure those are copied.
3004

3005
  @param lu: calling logical unit
3006
  @param additional_nodes: list of nodes not in the config to distribute to
3007
  @type additional_vm: boolean
3008
  @param additional_vm: whether the additional nodes are vm-capable or not
3009

3010
  """
3011
  # 1. Gather target nodes
3012
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3013
  dist_nodes = lu.cfg.GetOnlineNodeList()
3014
  nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3015
  vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3016
  if additional_nodes is not None:
3017
    dist_nodes.extend(additional_nodes)
3018
    if additional_vm:
3019
      vm_nodes.extend(additional_nodes)
3020
  if myself.name in dist_nodes:
3021
    dist_nodes.remove(myself.name)
3022
  if myself.name in vm_nodes:
3023
    vm_nodes.remove(myself.name)
3024

    
3025
  # 2. Gather files to distribute
3026
  dist_files = set([constants.ETC_HOSTS,
3027
                    constants.SSH_KNOWN_HOSTS_FILE,
3028
                    constants.RAPI_CERT_FILE,
3029
                    constants.RAPI_USERS_FILE,
3030
                    constants.CONFD_HMAC_KEY,
3031
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
3032
                   ])
3033

    
3034
  vm_files = set()
3035
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3036
  for hv_name in enabled_hypervisors:
3037
    hv_class = hypervisor.GetHypervisor(hv_name)
3038
    vm_files.update(hv_class.GetAncillaryFiles())
3039

    
3040
  # 3. Perform the files upload
3041
  for fname in dist_files:
3042
    _UploadHelper(lu, dist_nodes, fname)
3043
  for fname in vm_files:
3044
    _UploadHelper(lu, vm_nodes, fname)
3045

    
3046

    
3047
class LURedistributeConfig(NoHooksLU):
3048
  """Force the redistribution of cluster configuration.
3049

3050
  This is a very simple LU.
3051

3052
  """
3053
  REQ_BGL = False
3054

    
3055
  def ExpandNames(self):
3056
    self.needed_locks = {
3057
      locking.LEVEL_NODE: locking.ALL_SET,
3058
    }
3059
    self.share_locks[locking.LEVEL_NODE] = 1
3060

    
3061
  def Exec(self, feedback_fn):
3062
    """Redistribute the configuration.
3063

3064
    """
3065
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3066
    _RedistributeAncillaryFiles(self)
3067

    
3068

    
3069
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3070
  """Sleep and poll for an instance's disk to sync.
3071

3072
  """
3073
  if not instance.disks or disks is not None and not disks:
3074
    return True
3075

    
3076
  disks = _ExpandCheckDisks(instance, disks)
3077

    
3078
  if not oneshot:
3079
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3080

    
3081
  node = instance.primary_node
3082

    
3083
  for dev in disks:
3084
    lu.cfg.SetDiskID(dev, node)
3085

    
3086
  # TODO: Convert to utils.Retry
3087

    
3088
  retries = 0
3089
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3090
  while True:
3091
    max_time = 0
3092
    done = True
3093
    cumul_degraded = False
3094
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3095
    msg = rstats.fail_msg
3096
    if msg:
3097
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3098
      retries += 1
3099
      if retries >= 10:
3100
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3101
                                 " aborting." % node)
3102
      time.sleep(6)
3103
      continue
3104
    rstats = rstats.payload
3105
    retries = 0
3106
    for i, mstat in enumerate(rstats):
3107
      if mstat is None:
3108
        lu.LogWarning("Can't compute data for node %s/%s",
3109
                           node, disks[i].iv_name)
3110
        continue
3111

    
3112
      cumul_degraded = (cumul_degraded or
3113
                        (mstat.is_degraded and mstat.sync_percent is None))
3114
      if mstat.sync_percent is not None:
3115
        done = False
3116
        if mstat.estimated_time is not None:
3117
          rem_time = ("%s remaining (estimated)" %
3118
                      utils.FormatSeconds(mstat.estimated_time))
3119
          max_time = mstat.estimated_time
3120
        else:
3121
          rem_time = "no time estimate"
3122
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3123
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3124

    
3125
    # if we're done but degraded, let's do a few small retries, to
3126
    # make sure we see a stable and not transient situation; therefore
3127
    # we force restart of the loop
3128
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3129
      logging.info("Degraded disks found, %d retries left", degr_retries)
3130
      degr_retries -= 1
3131
      time.sleep(1)
3132
      continue
3133

    
3134
    if done or oneshot:
3135
      break
3136

    
3137
    time.sleep(min(60, max_time))
3138

    
3139
  if done:
3140
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3141
  return not cumul_degraded
3142

    
3143

    
3144
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3145
  """Check that mirrors are not degraded.
3146

3147
  The ldisk parameter, if True, will change the test from the
3148
  is_degraded attribute (which represents overall non-ok status for
3149
  the device(s)) to the ldisk (representing the local storage status).
3150

3151
  """
3152
  lu.cfg.SetDiskID(dev, node)
3153

    
3154
  result = True
3155

    
3156
  if on_primary or dev.AssembleOnSecondary():
3157
    rstats = lu.rpc.call_blockdev_find(node, dev)
3158
    msg = rstats.fail_msg
3159
    if msg:
3160
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3161
      result = False
3162
    elif not rstats.payload:
3163
      lu.LogWarning("Can't find disk on node %s", node)
3164
      result = False
3165
    else:
3166
      if ldisk:
3167
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3168
      else:
3169
        result = result and not rstats.payload.is_degraded
3170

    
3171
  if dev.children:
3172
    for child in dev.children:
3173
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3174

    
3175
  return result
3176

    
3177

    
3178
class LUOobCommand(NoHooksLU):
3179
  """Logical unit for OOB handling.
3180

3181
  """
3182
  REG_BGL = False
3183

    
3184
  def CheckPrereq(self):
3185
    """Check prerequisites.
3186

3187
    This checks:
3188
     - the node exists in the configuration
3189
     - OOB is supported
3190

3191
    Any errors are signaled by raising errors.OpPrereqError.
3192

3193
    """
3194
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3195
    node = self.cfg.GetNodeInfo(self.op.node_name)
3196

    
3197
    if node is None:
3198
      raise errors.OpPrereqError("Node %s not found" % self.op.node_name)
3199

    
3200
    self.oob_program = _SupportsOob(self.cfg, node)
3201

    
3202
    if not self.oob_program:
3203
      raise errors.OpPrereqError("OOB is not supported for node %s" %
3204
                                 self.op.node_name)
3205

    
3206
    if self.op.command == constants.OOB_POWER_OFF and not node.offline:
3207
      raise errors.OpPrereqError(("Cannot power off node %s because it is"
3208
                                  " not marked offline") % self.op.node_name)
3209

    
3210
    self.node = node
3211

    
3212
  def ExpandNames(self):
3213
    """Gather locks we need.
3214

3215
    """
3216
    node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3217
    self.needed_locks = {
3218
      locking.LEVEL_NODE: [node_name],
3219
      }
3220

    
3221
  def Exec(self, feedback_fn):
3222
    """Execute OOB and return result if we expect any.
3223

3224
    """
3225
    master_node = self.cfg.GetMasterNode()
3226
    node = self.node
3227

    
3228
    logging.info("Executing out-of-band command '%s' using '%s' on %s",
3229
                 self.op.command, self.oob_program, self.op.node_name)
3230
    result = self.rpc.call_run_oob(master_node, self.oob_program,
3231
                                   self.op.command, self.op.node_name,
3232
                                   self.op.timeout)
3233

    
3234
    result.Raise("An error occurred on execution of OOB helper")
3235

    
3236
    self._CheckPayload(result)
3237

    
3238
    if self.op.command == constants.OOB_HEALTH:
3239
      # For health we should log important events
3240
      for item, status in result.payload:
3241
        if status in [constants.OOB_STATUS_WARNING,
3242
                      constants.OOB_STATUS_CRITICAL]:
3243
          logging.warning("On node '%s' item '%s' has status '%s'",
3244
                          self.op.node_name, item, status)
3245

    
3246
    if self.op.command == constants.OOB_POWER_ON:
3247
      node.powered = True
3248
    elif self.op.command == constants.OOB_POWER_OFF:
3249
      node.powered = False
3250
    elif self.op.command == constants.OOB_POWER_STATUS:
3251
      powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3252
      if powered != self.node.powered:
3253
        logging.warning(("Recorded power state (%s) of node '%s' does not match"
3254
                         " actual power state (%s)"), node.powered,
3255
                        self.op.node_name, powered)
3256

    
3257
    self.cfg.Update(node, feedback_fn)
3258

    
3259
    return result.payload
3260

    
3261
  def _CheckPayload(self, result):
3262
    """Checks if the payload is valid.
3263

3264
    @param result: RPC result
3265
    @raises errors.OpExecError: If payload is not valid
3266

3267
    """
3268
    errs = []
3269
    if self.op.command == constants.OOB_HEALTH:
3270
      if not isinstance(result.payload, list):
3271
        errs.append("command 'health' is expected to return a list but got %s" %
3272
                    type(result.payload))
3273
      for item, status in result.payload:
3274
        if status not in constants.OOB_STATUSES:
3275
          errs.append("health item '%s' has invalid status '%s'" %
3276
                      (item, status))
3277

    
3278
    if self.op.command == constants.OOB_POWER_STATUS:
3279
      if not isinstance(result.payload, dict):
3280
        errs.append("power-status is expected to return a dict but got %s" %
3281
                    type(result.payload))
3282

    
3283
    if self.op.command in [
3284
        constants.OOB_POWER_ON,
3285
        constants.OOB_POWER_OFF,
3286
        constants.OOB_POWER_CYCLE,
3287
        ]:
3288
      if result.payload is not None:
3289
        errs.append("%s is expected to not return payload but got '%s'" %
3290
                    (self.op.command, result.payload))
3291

    
3292
    if errs:
3293
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3294
                               utils.CommaJoin(errs))
3295

    
3296

    
3297

    
3298
class LUDiagnoseOS(NoHooksLU):
3299
  """Logical unit for OS diagnose/query.
3300

3301
  """
3302
  REQ_BGL = False
3303
  _HID = "hidden"
3304
  _BLK = "blacklisted"
3305
  _VLD = "valid"
3306
  _FIELDS_STATIC = utils.FieldSet()
3307
  _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3308
                                   "parameters", "api_versions", _HID, _BLK)
3309

    
3310
  def CheckArguments(self):
3311
    if self.op.names:
3312
      raise errors.OpPrereqError("Selective OS query not supported",
3313
                                 errors.ECODE_INVAL)
3314

    
3315
    _CheckOutputFields(static=self._FIELDS_STATIC,
3316
                       dynamic=self._FIELDS_DYNAMIC,
3317
                       selected=self.op.output_fields)
3318

    
3319
  def ExpandNames(self):
3320
    # Lock all nodes, in shared mode
3321
    # Temporary removal of locks, should be reverted later
3322
    # TODO: reintroduce locks when they are lighter-weight
3323
    self.needed_locks = {}
3324
    #self.share_locks[locking.LEVEL_NODE] = 1
3325
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3326

    
3327
  @staticmethod
3328
  def _DiagnoseByOS(rlist):
3329
    """Remaps a per-node return list into an a per-os per-node dictionary
3330

3331
    @param rlist: a map with node names as keys and OS objects as values
3332

3333
    @rtype: dict
3334
    @return: a dictionary with osnames as keys and as value another
3335
        map, with nodes as keys and tuples of (path, status, diagnose,
3336
        variants, parameters, api_versions) as values, eg::
3337

3338
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3339
                                     (/srv/..., False, "invalid api")],
3340
                           "node2": [(/srv/..., True, "", [], [])]}
3341
          }
3342

3343
    """
3344
    all_os = {}
3345
    # we build here the list of nodes that didn't fail the RPC (at RPC
3346
    # level), so that nodes with a non-responding node daemon don't
3347
    # make all OSes invalid
3348
    good_nodes = [node_name for node_name in rlist
3349
                  if not rlist[node_name].fail_msg]
3350
    for node_name, nr in rlist.items():
3351
      if nr.fail_msg or not nr.payload:
3352
        continue
3353
      for (name, path, status, diagnose, variants,
3354
           params, api_versions) in nr.payload:
3355
        if name not in all_os:
3356
          # build a list of nodes for this os containing empty lists
3357
          # for each node in node_list
3358
          all_os[name] = {}
3359
          for nname in good_nodes:
3360
            all_os[name][nname] = []
3361
        # convert params from [name, help] to (name, help)
3362
        params = [tuple(v) for v in params]
3363
        all_os[name][node_name].append((path, status, diagnose,
3364
                                        variants, params, api_versions))
3365
    return all_os
3366

    
3367
  def Exec(self, feedback_fn):
3368
    """Compute the list of OSes.
3369

3370
    """
3371
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3372
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3373
    pol = self._DiagnoseByOS(node_data)
3374
    output = []
3375
    cluster = self.cfg.GetClusterInfo()
3376

    
3377
    for os_name in utils.NiceSort(pol.keys()):
3378
      os_data = pol[os_name]
3379
      row = []
3380
      valid = True
3381
      (variants, params, api_versions) = null_state = (set(), set(), set())
3382
      for idx, osl in enumerate(os_data.values()):
3383
        valid = bool(valid and osl and osl[0][1])
3384
        if not valid:
3385
          (variants, params, api_versions) = null_state
3386
          break
3387
        node_variants, node_params, node_api = osl[0][3:6]
3388
        if idx == 0: # first entry
3389
          variants = set(node_variants)
3390
          params = set(node_params)
3391
          api_versions = set(node_api)
3392
        else: # keep consistency
3393
          variants.intersection_update(node_variants)
3394
          params.intersection_update(node_params)
3395
          api_versions.intersection_update(node_api)
3396

    
3397
      is_hid = os_name in cluster.hidden_os
3398
      is_blk = os_name in cluster.blacklisted_os
3399
      if ((self._HID not in self.op.output_fields and is_hid) or
3400
          (self._BLK not in self.op.output_fields and is_blk) or
3401
          (self._VLD not in self.op.output_fields and not valid)):
3402
        continue
3403

    
3404
      for field in self.op.output_fields:
3405
        if field == "name":
3406
          val = os_name
3407
        elif field == self._VLD:
3408
          val = valid
3409
        elif field == "node_status":
3410
          # this is just a copy of the dict
3411
          val = {}
3412
          for node_name, nos_list in os_data.items():
3413
            val[node_name] = nos_list
3414
        elif field == "variants":
3415
          val = utils.NiceSort(list(variants))
3416
        elif field == "parameters":
3417
          val = list(params)
3418
        elif field == "api_versions":
3419
          val = list(api_versions)
3420
        elif field == self._HID:
3421
          val = is_hid
3422
        elif field == self._BLK:
3423
          val = is_blk
3424
        else:
3425
          raise errors.ParameterError(field)
3426
        row.append(val)
3427
      output.append(row)
3428

    
3429
    return output
3430

    
3431

    
3432
class LURemoveNode(LogicalUnit):
3433
  """Logical unit for removing a node.
3434

3435
  """
3436
  HPATH = "node-remove"
3437
  HTYPE = constants.HTYPE_NODE
3438

    
3439
  def BuildHooksEnv(self):
3440
    """Build hooks env.
3441

3442
    This doesn't run on the target node in the pre phase as a failed
3443
    node would then be impossible to remove.
3444

3445
    """
3446
    env = {
3447
      "OP_TARGET": self.op.node_name,
3448
      "NODE_NAME": self.op.node_name,
3449
      }
3450
    all_nodes = self.cfg.GetNodeList()
3451
    try:
3452
      all_nodes.remove(self.op.node_name)
3453
    except ValueError:
3454
      logging.warning("Node %s which is about to be removed not found"
3455
                      " in the all nodes list", self.op.node_name)
3456
    return env, all_nodes, all_nodes
3457

    
3458
  def CheckPrereq(self):
3459
    """Check prerequisites.
3460

3461
    This checks:
3462
     - the node exists in the configuration
3463
     - it does not have primary or secondary instances
3464
     - it's not the master
3465

3466
    Any errors are signaled by raising errors.OpPrereqError.
3467

3468
    """
3469
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3470
    node = self.cfg.GetNodeInfo(self.op.node_name)
3471
    assert node is not None
3472

    
3473
    instance_list = self.cfg.GetInstanceList()
3474

    
3475
    masternode = self.cfg.GetMasterNode()
3476
    if node.name == masternode:
3477
      raise errors.OpPrereqError("Node is the master node,"
3478
                                 " you need to failover first.",
3479
                                 errors.ECODE_INVAL)
3480

    
3481
    for instance_name in instance_list:
3482
      instance = self.cfg.GetInstanceInfo(instance_name)
3483
      if node.name in instance.all_nodes:
3484
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3485
                                   " please remove first." % instance_name,
3486
                                   errors.ECODE_INVAL)
3487
    self.op.node_name = node.name
3488
    self.node = node
3489

    
3490
  def Exec(self, feedback_fn):
3491
    """Removes the node from the cluster.
3492

3493
    """
3494
    node = self.node
3495
    logging.info("Stopping the node daemon and removing configs from node %s",
3496
                 node.name)
3497

    
3498
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3499

    
3500
    # Promote nodes to master candidate as needed
3501
    _AdjustCandidatePool(self, exceptions=[node.name])
3502
    self.context.RemoveNode(node.name)
3503

    
3504
    # Run post hooks on the node before it's removed
3505
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3506
    try:
3507
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3508
    except:
3509
      # pylint: disable-msg=W0702
3510
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3511

    
3512
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3513
    msg = result.fail_msg
3514
    if msg:
3515
      self.LogWarning("Errors encountered on the remote node while leaving"
3516
                      " the cluster: %s", msg)
3517

    
3518
    # Remove node from our /etc/hosts
3519
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3520
      master_node = self.cfg.GetMasterNode()
3521
      result = self.rpc.call_etc_hosts_modify(master_node,
3522
                                              constants.ETC_HOSTS_REMOVE,
3523
                                              node.name, None)
3524
      result.Raise("Can't update hosts file with new host data")
3525
      _RedistributeAncillaryFiles(self)
3526

    
3527

    
3528
class _NodeQuery(_QueryBase):
3529
  FIELDS = query.NODE_FIELDS
3530

    
3531
  def ExpandNames(self, lu):
3532
    lu.needed_locks = {}
3533
    lu.share_locks[locking.LEVEL_NODE] = 1
3534

    
3535
    if self.names:
3536
      self.wanted = _GetWantedNodes(lu, self.names)
3537
    else:
3538
      self.wanted = locking.ALL_SET
3539

    
3540
    self.do_locking = (self.use_locking and
3541
                       query.NQ_LIVE in self.requested_data)
3542

    
3543
    if self.do_locking:
3544
      # if we don't request only static fields, we need to lock the nodes
3545
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3546

    
3547
  def DeclareLocks(self, lu, level):
3548
    pass
3549

    
3550
  def _GetQueryData(self, lu):
3551
    """Computes the list of nodes and their attributes.
3552

3553
    """
3554
    all_info = lu.cfg.GetAllNodesInfo()
3555

    
3556
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3557

    
3558
    # Gather data as requested
3559
    if query.NQ_LIVE in self.requested_data:
3560
      node_data = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3561
                                        lu.cfg.GetHypervisorType())
3562
      live_data = dict((name, nresult.payload)
3563
                       for (name, nresult) in node_data.items()
3564
                       if not nresult.fail_msg and nresult.payload)
3565
    else:
3566
      live_data = None
3567

    
3568
    if query.NQ_INST in self.requested_data:
3569
      node_to_primary = dict([(name, set()) for name in nodenames])
3570
      node_to_secondary = dict([(name, set()) for name in nodenames])
3571

    
3572
      inst_data = lu.cfg.GetAllInstancesInfo()
3573

    
3574
      for inst in inst_data.values():
3575
        if inst.primary_node in node_to_primary:
3576
          node_to_primary[inst.primary_node].add(inst.name)
3577
        for secnode in inst.secondary_nodes:
3578
          if secnode in node_to_secondary:
3579
            node_to_secondary[secnode].add(inst.name)
3580
    else:
3581
      node_to_primary = None
3582
      node_to_secondary = None
3583

    
3584
    if query.NQ_OOB in self.requested_data:
3585
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3586
                         for name, node in all_info.iteritems())
3587
    else:
3588
      oob_support = None
3589

    
3590
    if query.NQ_GROUP in self.requested_data:
3591
      groups = lu.cfg.GetAllNodeGroupsInfo()
3592
    else:
3593
      groups = {}
3594

    
3595
    return query.NodeQueryData([all_info[name] for name in nodenames],
3596
                               live_data, lu.cfg.GetMasterNode(),
3597
                               node_to_primary, node_to_secondary, groups,
3598
                               oob_support, lu.cfg.GetClusterInfo())
3599

    
3600

    
3601
class LUQueryNodes(NoHooksLU):
3602
  """Logical unit for querying nodes.
3603

3604
  """
3605
  # pylint: disable-msg=W0142
3606
  REQ_BGL = False
3607

    
3608
  def CheckArguments(self):
3609
    self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3610
                         self.op.use_locking)
3611

    
3612
  def ExpandNames(self):
3613
    self.nq.ExpandNames(self)
3614

    
3615
  def Exec(self, feedback_fn):
3616
    return self.nq.OldStyleQuery(self)
3617

    
3618

    
3619
class LUQueryNodeVolumes(NoHooksLU):
3620
  """Logical unit for getting volumes on node(s).
3621

3622
  """
3623
  REQ_BGL = False
3624
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3625
  _FIELDS_STATIC = utils.FieldSet("node")
3626

    
3627
  def CheckArguments(self):
3628
    _CheckOutputFields(static=self._FIELDS_STATIC,
3629
                       dynamic=self._FIELDS_DYNAMIC,
3630
                       selected=self.op.output_fields)
3631

    
3632
  def ExpandNames(self):
3633
    self.needed_locks = {}
3634
    self.share_locks[locking.LEVEL_NODE] = 1
3635
    if not self.op.nodes:
3636
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3637
    else:
3638
      self.needed_locks[locking.LEVEL_NODE] = \
3639
        _GetWantedNodes(self, self.op.nodes)
3640

    
3641
  def Exec(self, feedback_fn):
3642
    """Computes the list of nodes and their attributes.
3643

3644
    """
3645
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3646
    volumes = self.rpc.call_node_volumes(nodenames)
3647

    
3648
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3649
             in self.cfg.GetInstanceList()]
3650

    
3651
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3652

    
3653
    output = []
3654
    for node in nodenames:
3655
      nresult = volumes[node]
3656
      if nresult.offline:
3657
        continue
3658
      msg = nresult.fail_msg
3659
      if msg:
3660
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3661
        continue
3662

    
3663
      node_vols = nresult.payload[:]
3664
      node_vols.sort(key=lambda vol: vol['dev'])
3665

    
3666
      for vol in node_vols:
3667
        node_output = []
3668
        for field in self.op.output_fields:
3669
          if field == "node":
3670
            val = node
3671
          elif field == "phys":
3672
            val = vol['dev']
3673
          elif field == "vg":
3674
            val = vol['vg']
3675
          elif field == "name":
3676
            val = vol['name']
3677
          elif field == "size":
3678
            val = int(float(vol['size']))
3679
          elif field == "instance":
3680
            for inst in ilist:
3681
              if node not in lv_by_node[inst]:
3682
                continue
3683
              if vol['name'] in lv_by_node[inst][node]:
3684
                val = inst.name
3685
                break
3686
            else:
3687
              val = '-'
3688
          else:
3689
            raise errors.ParameterError(field)
3690
          node_output.append(str(val))
3691

    
3692
        output.append(node_output)
3693

    
3694
    return output
3695

    
3696

    
3697
class LUQueryNodeStorage(NoHooksLU):
3698
  """Logical unit for getting information on storage units on node(s).
3699

3700
  """
3701
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3702
  REQ_BGL = False
3703

    
3704
  def CheckArguments(self):
3705
    _CheckOutputFields(static=self._FIELDS_STATIC,
3706
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3707
                       selected=self.op.output_fields)
3708

    
3709
  def ExpandNames(self):
3710
    self.needed_locks = {}
3711
    self.share_locks[locking.LEVEL_NODE] = 1
3712

    
3713
    if self.op.nodes:
3714
      self.needed_locks[locking.LEVEL_NODE] = \
3715
        _GetWantedNodes(self, self.op.nodes)
3716
    else:
3717
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3718

    
3719
  def Exec(self, feedback_fn):
3720
    """Computes the list of nodes and their attributes.
3721

3722
    """
3723
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3724

    
3725
    # Always get name to sort by
3726
    if constants.SF_NAME in self.op.output_fields:
3727
      fields = self.op.output_fields[:]
3728
    else:
3729
      fields = [constants.SF_NAME] + self.op.output_fields
3730

    
3731
    # Never ask for node or type as it's only known to the LU
3732
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3733
      while extra in fields:
3734
        fields.remove(extra)
3735

    
3736
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3737
    name_idx = field_idx[constants.SF_NAME]
3738

    
3739
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3740
    data = self.rpc.call_storage_list(self.nodes,
3741
                                      self.op.storage_type, st_args,
3742
                                      self.op.name, fields)
3743

    
3744
    result = []
3745

    
3746
    for node in utils.NiceSort(self.nodes):
3747
      nresult = data[node]
3748
      if nresult.offline:
3749
        continue
3750

    
3751
      msg = nresult.fail_msg
3752
      if msg:
3753
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3754
        continue
3755

    
3756
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3757

    
3758
      for name in utils.NiceSort(rows.keys()):
3759
        row = rows[name]
3760

    
3761
        out = []
3762

    
3763
        for field in self.op.output_fields:
3764
          if field == constants.SF_NODE:
3765
            val = node
3766
          elif field == constants.SF_TYPE:
3767
            val = self.op.storage_type
3768
          elif field in field_idx:
3769
            val = row[field_idx[field]]
3770
          else:
3771
            raise errors.ParameterError(field)
3772

    
3773
          out.append(val)
3774

    
3775
        result.append(out)
3776

    
3777
    return result
3778

    
3779

    
3780
class _InstanceQuery(_QueryBase):
3781
  FIELDS = query.INSTANCE_FIELDS
3782

    
3783
  def ExpandNames(self, lu):
3784
    lu.needed_locks = {}
3785
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
3786
    lu.share_locks[locking.LEVEL_NODE] = 1
3787

    
3788
    if self.names:
3789
      self.wanted = _GetWantedInstances(lu, self.names)
3790
    else:
3791
      self.wanted = locking.ALL_SET
3792

    
3793
    self.do_locking = (self.use_locking and
3794
                       query.IQ_LIVE in self.requested_data)
3795
    if self.do_locking:
3796
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3797
      lu.needed_locks[locking.LEVEL_NODE] = []
3798
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3799

    
3800
  def DeclareLocks(self, lu, level):
3801
    if level == locking.LEVEL_NODE and self.do_locking:
3802
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
3803

    
3804
  def _GetQueryData(self, lu):
3805
    """Computes the list of instances and their attributes.
3806

3807
    """
3808
    all_info = lu.cfg.GetAllInstancesInfo()
3809

    
3810
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3811

    
3812
    instance_list = [all_info[name] for name in instance_names]
3813
    nodes = frozenset([inst.primary_node for inst in instance_list])
3814
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3815
    bad_nodes = []
3816
    offline_nodes = []
3817

    
3818
    # Gather data as requested
3819
    if query.IQ_LIVE in self.requested_data:
3820
      live_data = {}
3821
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3822
      for name in nodes:
3823
        result = node_data[name]
3824
        if result.offline:
3825
          # offline nodes will be in both lists
3826
          assert result.fail_msg
3827
          offline_nodes.append(name)
3828
        if result.fail_msg:
3829
          bad_nodes.append(name)
3830
        elif result.payload:
3831
          live_data.update(result.payload)
3832
        # else no instance is alive
3833
    else:
3834
      live_data = {}
3835

    
3836
    if query.IQ_DISKUSAGE in self.requested_data:
3837
      disk_usage = dict((inst.name,
3838
                         _ComputeDiskSize(inst.disk_template,
3839
                                          [{"size": disk.size}
3840
                                           for disk in inst.disks]))
3841
                        for inst in instance_list)
3842
    else:
3843
      disk_usage = None
3844

    
3845
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3846
                                   disk_usage, offline_nodes, bad_nodes,
3847
                                   live_data)
3848

    
3849

    
3850
class LUQuery(NoHooksLU):
3851
  """Query for resources/items of a certain kind.
3852

3853
  """
3854
  # pylint: disable-msg=W0142
3855
  REQ_BGL = False
3856

    
3857
  def CheckArguments(self):
3858
    qcls = _GetQueryImplementation(self.op.what)
3859
    names = qlang.ReadSimpleFilter("name", self.op.filter)
3860

    
3861
    self.impl = qcls(names, self.op.fields, False)
3862

    
3863
  def ExpandNames(self):
3864
    self.impl.ExpandNames(self)
3865

    
3866
  def DeclareLocks(self, level):
3867
    self.impl.DeclareLocks(self, level)
3868

    
3869
  def Exec(self, feedback_fn):
3870
    return self.impl.NewStyleQuery(self)
3871

    
3872

    
3873
class LUQueryFields(NoHooksLU):
3874
  """Query for resources/items of a certain kind.
3875

3876
  """
3877
  # pylint: disable-msg=W0142
3878
  REQ_BGL = False
3879

    
3880
  def CheckArguments(self):
3881
    self.qcls = _GetQueryImplementation(self.op.what)
3882

    
3883
  def ExpandNames(self):
3884
    self.needed_locks = {}
3885

    
3886
  def Exec(self, feedback_fn):
3887
    return self.qcls.FieldsQuery(self.op.fields)
3888

    
3889

    
3890
class LUModifyNodeStorage(NoHooksLU):
3891
  """Logical unit for modifying a storage volume on a node.
3892

3893
  """
3894
  REQ_BGL = False
3895

    
3896
  def CheckArguments(self):
3897
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3898

    
3899
    storage_type = self.op.storage_type
3900

    
3901
    try:
3902
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3903
    except KeyError:
3904
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3905
                                 " modified" % storage_type,
3906
                                 errors.ECODE_INVAL)
3907

    
3908
    diff = set(self.op.changes.keys()) - modifiable
3909
    if diff:
3910
      raise errors.OpPrereqError("The following fields can not be modified for"
3911
                                 " storage units of type '%s': %r" %
3912
                                 (storage_type, list(diff)),
3913
                                 errors.ECODE_INVAL)
3914

    
3915
  def ExpandNames(self):
3916
    self.needed_locks = {
3917
      locking.LEVEL_NODE: self.op.node_name,
3918
      }
3919

    
3920
  def Exec(self, feedback_fn):
3921
    """Computes the list of nodes and their attributes.
3922

3923
    """
3924
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3925
    result = self.rpc.call_storage_modify(self.op.node_name,
3926
                                          self.op.storage_type, st_args,
3927
                                          self.op.name, self.op.changes)
3928
    result.Raise("Failed to modify storage unit '%s' on %s" %
3929
                 (self.op.name, self.op.node_name))
3930

    
3931

    
3932
class LUAddNode(LogicalUnit):
3933
  """Logical unit for adding node to the cluster.
3934

3935
  """
3936
  HPATH = "node-add"
3937
  HTYPE = constants.HTYPE_NODE
3938
  _NFLAGS = ["master_capable", "vm_capable"]
3939

    
3940
  def CheckArguments(self):
3941
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3942
    # validate/normalize the node name
3943
    self.hostname = netutils.GetHostname(name=self.op.node_name,
3944
                                         family=self.primary_ip_family)
3945
    self.op.node_name = self.hostname.name
3946
    if self.op.readd and self.op.group:
3947
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
3948
                                 " being readded", errors.ECODE_INVAL)
3949

    
3950
  def BuildHooksEnv(self):
3951
    """Build hooks env.
3952

3953
    This will run on all nodes before, and on all nodes + the new node after.
3954

3955
    """
3956
    env = {
3957
      "OP_TARGET": self.op.node_name,
3958
      "NODE_NAME": self.op.node_name,
3959
      "NODE_PIP": self.op.primary_ip,
3960
      "NODE_SIP": self.op.secondary_ip,
3961
      "MASTER_CAPABLE": str(self.op.master_capable),
3962
      "VM_CAPABLE": str(self.op.vm_capable),
3963
      }
3964
    nodes_0 = self.cfg.GetNodeList()
3965
    nodes_1 = nodes_0 + [self.op.node_name, ]
3966
    return env, nodes_0, nodes_1
3967

    
3968
  def CheckPrereq(self):
3969
    """Check prerequisites.
3970

3971
    This checks:
3972
     - the new node is not already in the config
3973
     - it is resolvable
3974
     - its parameters (single/dual homed) matches the cluster
3975

3976
    Any errors are signaled by raising errors.OpPrereqError.
3977

3978
    """
3979
    cfg = self.cfg
3980
    hostname = self.hostname
3981
    node = hostname.name
3982
    primary_ip = self.op.primary_ip = hostname.ip
3983
    if self.op.secondary_ip is None:
3984
      if self.primary_ip_family == netutils.IP6Address.family:
3985
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3986
                                   " IPv4 address must be given as secondary",
3987
                                   errors.ECODE_INVAL)
3988
      self.op.secondary_ip = primary_ip
3989

    
3990
    secondary_ip = self.op.secondary_ip
3991
    if not netutils.IP4Address.IsValid(secondary_ip):
3992
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3993
                                 " address" % secondary_ip, errors.ECODE_INVAL)
3994

    
3995
    node_list = cfg.GetNodeList()
3996
    if not self.op.readd and node in node_list:
3997
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3998
                                 node, errors.ECODE_EXISTS)
3999
    elif self.op.readd and node not in node_list:
4000
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4001
                                 errors.ECODE_NOENT)
4002

    
4003
    self.changed_primary_ip = False
4004

    
4005
    for existing_node_name in node_list:
4006
      existing_node = cfg.GetNodeInfo(existing_node_name)
4007

    
4008
      if self.op.readd and node == existing_node_name:
4009
        if existing_node.secondary_ip != secondary_ip:
4010
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4011
                                     " address configuration as before",
4012
                                     errors.ECODE_INVAL)
4013
        if existing_node.primary_ip != primary_ip:
4014
          self.changed_primary_ip = True
4015

    
4016
        continue
4017

    
4018
      if (existing_node.primary_ip == primary_ip or
4019
          existing_node.secondary_ip == primary_ip or
4020
          existing_node.primary_ip == secondary_ip or
4021
          existing_node.secondary_ip == secondary_ip):
4022
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4023
                                   " existing node %s" % existing_node.name,
4024
                                   errors.ECODE_NOTUNIQUE)
4025

    
4026
    # After this 'if' block, None is no longer a valid value for the
4027
    # _capable op attributes
4028
    if self.op.readd:
4029
      old_node = self.cfg.GetNodeInfo(node)
4030
      assert old_node is not None, "Can't retrieve locked node %s" % node
4031
      for attr in self._NFLAGS:
4032
        if getattr(self.op, attr) is None:
4033
          setattr(self.op, attr, getattr(old_node, attr))
4034
    else:
4035
      for attr in self._NFLAGS:
4036
        if getattr(self.op, attr) is None:
4037
          setattr(self.op, attr, True)
4038

    
4039
    if self.op.readd and not self.op.vm_capable:
4040
      pri, sec = cfg.GetNodeInstances(node)
4041
      if pri or sec:
4042
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4043
                                   " flag set to false, but it already holds"
4044
                                   " instances" % node,
4045
                                   errors.ECODE_STATE)
4046

    
4047
    # check that the type of the node (single versus dual homed) is the
4048
    # same as for the master
4049
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4050
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4051
    newbie_singlehomed = secondary_ip == primary_ip
4052
    if master_singlehomed != newbie_singlehomed:
4053
      if master_singlehomed:
4054
        raise errors.OpPrereqError("The master has no secondary ip but the"
4055
                                   " new node has one",
4056
                                   errors.ECODE_INVAL)
4057
      else:
4058
        raise errors.OpPrereqError("The master has a secondary ip but the"
4059
                                   " new node doesn't have one",
4060
                                   errors.ECODE_INVAL)
4061

    
4062
    # checks reachability
4063
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4064
      raise errors.OpPrereqError("Node not reachable by ping",
4065
                                 errors.ECODE_ENVIRON)
4066

    
4067
    if not newbie_singlehomed:
4068
      # check reachability from my secondary ip to newbie's secondary ip
4069
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4070
                           source=myself.secondary_ip):
4071
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4072
                                   " based ping to node daemon port",
4073
                                   errors.ECODE_ENVIRON)
4074

    
4075
    if self.op.readd:
4076
      exceptions = [node]
4077
    else:
4078
      exceptions = []
4079

    
4080
    if self.op.master_capable:
4081
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4082
    else:
4083
      self.master_candidate = False
4084

    
4085
    if self.op.readd:
4086
      self.new_node = old_node
4087
    else:
4088
      node_group = cfg.LookupNodeGroup(self.op.group)
4089
      self.new_node = objects.Node(name=node,
4090
                                   primary_ip=primary_ip,
4091
                                   secondary_ip=secondary_ip,
4092
                                   master_candidate=self.master_candidate,
4093
                                   offline=False, drained=False,
4094
                                   group=node_group)
4095

    
4096
    if self.op.ndparams:
4097
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4098

    
4099
  def Exec(self, feedback_fn):
4100
    """Adds the new node to the cluster.
4101

4102
    """
4103
    new_node = self.new_node
4104
    node = new_node.name
4105

    
4106
    # We adding a new node so we assume it's powered
4107
    new_node.powered = True
4108

    
4109
    # for re-adds, reset the offline/drained/master-candidate flags;
4110
    # we need to reset here, otherwise offline would prevent RPC calls
4111
    # later in the procedure; this also means that if the re-add
4112
    # fails, we are left with a non-offlined, broken node
4113
    if self.op.readd:
4114
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4115
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4116
      # if we demote the node, we do cleanup later in the procedure
4117
      new_node.master_candidate = self.master_candidate
4118
      if self.changed_primary_ip:
4119
        new_node.primary_ip = self.op.primary_ip
4120

    
4121
    # copy the master/vm_capable flags
4122
    for attr in self._NFLAGS:
4123
      setattr(new_node, attr, getattr(self.op, attr))
4124

    
4125
    # notify the user about any possible mc promotion
4126
    if new_node.master_candidate:
4127
      self.LogInfo("Node will be a master candidate")
4128

    
4129
    if self.op.ndparams:
4130
      new_node.ndparams = self.op.ndparams
4131
    else:
4132
      new_node.ndparams = {}
4133

    
4134
    # check connectivity
4135
    result = self.rpc.call_version([node])[node]
4136
    result.Raise("Can't get version information from node %s" % node)
4137
    if constants.PROTOCOL_VERSION == result.payload:
4138
      logging.info("Communication to node %s fine, sw version %s match",
4139
                   node, result.payload)
4140
    else:
4141
      raise errors.OpExecError("Version mismatch master version %s,"
4142
                               " node version %s" %
4143
                               (constants.PROTOCOL_VERSION, result.payload))
4144

    
4145
    # Add node to our /etc/hosts, and add key to known_hosts
4146
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4147
      master_node = self.cfg.GetMasterNode()
4148
      result = self.rpc.call_etc_hosts_modify(master_node,
4149
                                              constants.ETC_HOSTS_ADD,
4150
                                              self.hostname.name,
4151
                                              self.hostname.ip)
4152
      result.Raise("Can't update hosts file with new host data")
4153

    
4154
    if new_node.secondary_ip != new_node.primary_ip:
4155
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4156
                               False)
4157

    
4158
    node_verify_list = [self.cfg.GetMasterNode()]
4159
    node_verify_param = {
4160
      constants.NV_NODELIST: [node],
4161
      # TODO: do a node-net-test as well?
4162
    }
4163

    
4164
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4165
                                       self.cfg.GetClusterName())
4166
    for verifier in node_verify_list:
4167
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4168
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4169
      if nl_payload:
4170
        for failed in nl_payload:
4171
          feedback_fn("ssh/hostname verification failed"
4172
                      " (checking from %s): %s" %
4173
                      (verifier, nl_payload[failed]))
4174
        raise errors.OpExecError("ssh/hostname verification failed.")
4175

    
4176
    if self.op.readd:
4177
      _RedistributeAncillaryFiles(self)
4178
      self.context.ReaddNode(new_node)
4179
      # make sure we redistribute the config
4180
      self.cfg.Update(new_node, feedback_fn)
4181
      # and make sure the new node will not have old files around
4182
      if not new_node.master_candidate:
4183
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4184
        msg = result.fail_msg
4185
        if msg:
4186
          self.LogWarning("Node failed to demote itself from master"
4187
                          " candidate status: %s" % msg)
4188
    else:
4189
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4190
                                  additional_vm=self.op.vm_capable)
4191
      self.context.AddNode(new_node, self.proc.GetECId())
4192

    
4193

    
4194
class LUSetNodeParams(LogicalUnit):
4195
  """Modifies the parameters of a node.
4196

4197
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4198
      to the node role (as _ROLE_*)
4199
  @cvar _R2F: a dictionary from node role to tuples of flags
4200
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4201

4202
  """
4203
  HPATH = "node-modify"
4204
  HTYPE = constants.HTYPE_NODE
4205
  REQ_BGL = False
4206
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4207
  _F2R = {
4208
    (True, False, False): _ROLE_CANDIDATE,
4209
    (False, True, False): _ROLE_DRAINED,
4210
    (False, False, True): _ROLE_OFFLINE,
4211
    (False, False, False): _ROLE_REGULAR,
4212
    }
4213
  _R2F = dict((v, k) for k, v in _F2R.items())
4214
  _FLAGS = ["master_candidate", "drained", "offline"]
4215

    
4216
  def CheckArguments(self):
4217
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4218
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4219
                self.op.master_capable, self.op.vm_capable,
4220
                self.op.secondary_ip, self.op.ndparams]
4221
    if all_mods.count(None) == len(all_mods):
4222
      raise errors.OpPrereqError("Please pass at least one modification",
4223
                                 errors.ECODE_INVAL)
4224
    if all_mods.count(True) > 1:
4225
      raise errors.OpPrereqError("Can't set the node into more than one"
4226
                                 " state at the same time",
4227
                                 errors.ECODE_INVAL)
4228

    
4229
    # Boolean value that tells us whether we might be demoting from MC
4230
    self.might_demote = (self.op.master_candidate == False or
4231
                         self.op.offline == True or
4232
                         self.op.drained == True or
4233
                         self.op.master_capable == False)
4234

    
4235
    if self.op.secondary_ip:
4236
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4237
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4238
                                   " address" % self.op.secondary_ip,
4239
                                   errors.ECODE_INVAL)
4240

    
4241
    self.lock_all = self.op.auto_promote and self.might_demote
4242
    self.lock_instances = self.op.secondary_ip is not None
4243

    
4244
  def ExpandNames(self):
4245
    if self.lock_all:
4246
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4247
    else:
4248
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4249

    
4250
    if self.lock_instances:
4251
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4252

    
4253
  def DeclareLocks(self, level):
4254
    # If we have locked all instances, before waiting to lock nodes, release
4255
    # all the ones living on nodes unrelated to the current operation.
4256
    if level == locking.LEVEL_NODE and self.lock_instances:
4257
      instances_release = []
4258
      instances_keep = []
4259
      self.affected_instances = []
4260
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4261
        for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4262
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4263
          i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4264
          if i_mirrored and self.op.node_name in instance.all_nodes:
4265
            instances_keep.append(instance_name)
4266
            self.affected_instances.append(instance)
4267
          else:
4268
            instances_release.append(instance_name)
4269
        if instances_release:
4270
          self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4271
          self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4272

    
4273
  def BuildHooksEnv(self):
4274
    """Build hooks env.
4275

4276
    This runs on the master node.
4277

4278
    """
4279
    env = {
4280
      "OP_TARGET": self.op.node_name,
4281
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4282
      "OFFLINE": str(self.op.offline),
4283
      "DRAINED": str(self.op.drained),
4284
      "MASTER_CAPABLE": str(self.op.master_capable),
4285
      "VM_CAPABLE": str(self.op.vm_capable),
4286
      }
4287
    nl = [self.cfg.GetMasterNode(),
4288
          self.op.node_name]
4289
    return env, nl, nl
4290

    
4291
  def CheckPrereq(self):
4292
    """Check prerequisites.
4293

4294
    This only checks the instance list against the existing names.
4295

4296
    """
4297
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4298

    
4299
    if (self.op.master_candidate is not None or
4300
        self.op.drained is not None or
4301
        self.op.offline is not None):
4302
      # we can't change the master's node flags
4303
      if self.op.node_name == self.cfg.GetMasterNode():
4304
        raise errors.OpPrereqError("The master role can be changed"
4305
                                   " only via master-failover",
4306
                                   errors.ECODE_INVAL)
4307

    
4308
    if self.op.master_candidate and not node.master_capable:
4309
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4310
                                 " it a master candidate" % node.name,
4311
                                 errors.ECODE_STATE)
4312

    
4313
    if self.op.vm_capable == False:
4314
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4315
      if ipri or isec:
4316
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4317
                                   " the vm_capable flag" % node.name,
4318
                                   errors.ECODE_STATE)
4319

    
4320
    if node.master_candidate and self.might_demote and not self.lock_all:
4321
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
4322
      # check if after removing the current node, we're missing master
4323
      # candidates
4324
      (mc_remaining, mc_should, _) = \
4325
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4326
      if mc_remaining < mc_should:
4327
        raise errors.OpPrereqError("Not enough master candidates, please"
4328
                                   " pass auto_promote to allow promotion",
4329
                                   errors.ECODE_STATE)
4330

    
4331
    self.old_flags = old_flags = (node.master_candidate,
4332
                                  node.drained, node.offline)
4333
    assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4334
    self.old_role = old_role = self._F2R[old_flags]
4335

    
4336
    # Check for ineffective changes
4337
    for attr in self._FLAGS:
4338
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4339
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4340
        setattr(self.op, attr, None)
4341

    
4342
    # Past this point, any flag change to False means a transition
4343
    # away from the respective state, as only real changes are kept
4344

    
4345
    # TODO: We might query the real power state if it supports OOB
4346
    if _SupportsOob(self.cfg, node):
4347
      if self.op.offline is False and not (node.powered or
4348
                                           self.op.powered == True):
4349
        raise errors.OpPrereqError(("Please power on node %s first before you"
4350
                                    " can reset offline state") %
4351
                                   self.op.node_name)
4352
    elif self.op.powered is not None:
4353
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
4354
                                  " which does not support out-of-band"
4355
                                  " handling") % self.op.node_name)
4356

    
4357
    # If we're being deofflined/drained, we'll MC ourself if needed
4358
    if (self.op.drained == False or self.op.offline == False or
4359
        (self.op.master_capable and not node.master_capable)):
4360
      if _DecideSelfPromotion(self):
4361
        self.op.master_candidate = True
4362
        self.LogInfo("Auto-promoting node to master candidate")
4363

    
4364
    # If we're no longer master capable, we'll demote ourselves from MC
4365
    if self.op.master_capable == False and node.master_candidate:
4366
      self.LogInfo("Demoting from master candidate")
4367
      self.op.master_candidate = False
4368

    
4369
    # Compute new role
4370
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4371
    if self.op.master_candidate:
4372
      new_role = self._ROLE_CANDIDATE
4373
    elif self.op.drained:
4374
      new_role = self._ROLE_DRAINED
4375
    elif self.op.offline:
4376
      new_role = self._ROLE_OFFLINE
4377
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4378
      # False is still in new flags, which means we're un-setting (the
4379
      # only) True flag
4380
      new_role = self._ROLE_REGULAR
4381
    else: # no new flags, nothing, keep old role
4382
      new_role = old_role
4383

    
4384
    self.new_role = new_role
4385

    
4386
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4387
      # Trying to transition out of offline status
4388
      result = self.rpc.call_version([node.name])[node.name]
4389
      if result.fail_msg:
4390
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4391
                                   " to report its version: %s" %
4392
                                   (node.name, result.fail_msg),
4393
                                   errors.ECODE_STATE)
4394
      else:
4395
        self.LogWarning("Transitioning node from offline to online state"
4396
                        " without using re-add. Please make sure the node"
4397
                        " is healthy!")
4398

    
4399
    if self.op.secondary_ip:
4400
      # Ok even without locking, because this can't be changed by any LU
4401
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4402
      master_singlehomed = master.secondary_ip == master.primary_ip
4403
      if master_singlehomed and self.op.secondary_ip:
4404
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4405
                                   " homed cluster", errors.ECODE_INVAL)
4406

    
4407
      if node.offline:
4408
        if self.affected_instances:
4409
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4410
                                     " node has instances (%s) configured"
4411
                                     " to use it" % self.affected_instances)
4412
      else:
4413
        # On online nodes, check that no instances are running, and that
4414
        # the node has the new ip and we can reach it.
4415
        for instance in self.affected_instances:
4416
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4417

    
4418
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4419
        if master.name != node.name:
4420
          # check reachability from master secondary ip to new secondary ip
4421
          if not netutils.TcpPing(self.op.secondary_ip,
4422
                                  constants.DEFAULT_NODED_PORT,
4423
                                  source=master.secondary_ip):
4424
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4425
                                       " based ping to node daemon port",
4426
                                       errors.ECODE_ENVIRON)
4427

    
4428
    if self.op.ndparams:
4429
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4430
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4431
      self.new_ndparams = new_ndparams
4432

    
4433
  def Exec(self, feedback_fn):
4434
    """Modifies a node.
4435

4436
    """
4437
    node = self.node
4438
    old_role = self.old_role
4439
    new_role = self.new_role
4440

    
4441
    result = []
4442

    
4443
    if self.op.ndparams:
4444
      node.ndparams = self.new_ndparams
4445

    
4446
    if self.op.powered is not None:
4447
      node.powered = self.op.powered
4448

    
4449
    for attr in ["master_capable", "vm_capable"]:
4450
      val = getattr(self.op, attr)
4451
      if val is not None:
4452
        setattr(node, attr, val)
4453
        result.append((attr, str(val)))
4454

    
4455
    if new_role != old_role:
4456
      # Tell the node to demote itself, if no longer MC and not offline
4457
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4458
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4459
        if msg:
4460
          self.LogWarning("Node failed to demote itself: %s", msg)
4461

    
4462
      new_flags = self._R2F[new_role]
4463
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4464
        if of != nf:
4465
          result.append((desc, str(nf)))
4466
      (node.master_candidate, node.drained, node.offline) = new_flags
4467

    
4468
      # we locked all nodes, we adjust the CP before updating this node
4469
      if self.lock_all:
4470
        _AdjustCandidatePool(self, [node.name])
4471

    
4472
    if self.op.secondary_ip:
4473
      node.secondary_ip = self.op.secondary_ip
4474
      result.append(("secondary_ip", self.op.secondary_ip))
4475

    
4476
    # this will trigger configuration file update, if needed
4477
    self.cfg.Update(node, feedback_fn)
4478

    
4479
    # this will trigger job queue propagation or cleanup if the mc
4480
    # flag changed
4481
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4482
      self.context.ReaddNode(node)
4483

    
4484
    return result
4485

    
4486

    
4487
class LUPowercycleNode(NoHooksLU):
4488
  """Powercycles a node.
4489

4490
  """
4491
  REQ_BGL = False
4492

    
4493
  def CheckArguments(self):
4494
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4495
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4496
      raise errors.OpPrereqError("The node is the master and the force"
4497
                                 " parameter was not set",
4498
                                 errors.ECODE_INVAL)
4499

    
4500
  def ExpandNames(self):
4501
    """Locking for PowercycleNode.
4502

4503
    This is a last-resort option and shouldn't block on other
4504
    jobs. Therefore, we grab no locks.
4505

4506
    """
4507
    self.needed_locks = {}
4508

    
4509
  def Exec(self, feedback_fn):
4510
    """Reboots a node.
4511

4512
    """
4513
    result = self.rpc.call_node_powercycle(self.op.node_name,
4514
                                           self.cfg.GetHypervisorType())
4515
    result.Raise("Failed to schedule the reboot")
4516
    return result.payload
4517

    
4518

    
4519
class LUQueryClusterInfo(NoHooksLU):
4520
  """Query cluster configuration.
4521

4522
  """
4523
  REQ_BGL = False
4524

    
4525
  def ExpandNames(self):
4526
    self.needed_locks = {}
4527

    
4528
  def Exec(self, feedback_fn):
4529
    """Return cluster config.
4530

4531
    """
4532
    cluster = self.cfg.GetClusterInfo()
4533
    os_hvp = {}
4534

    
4535
    # Filter just for enabled hypervisors
4536
    for os_name, hv_dict in cluster.os_hvp.items():
4537
      os_hvp[os_name] = {}
4538
      for hv_name, hv_params in hv_dict.items():
4539
        if hv_name in cluster.enabled_hypervisors:
4540
          os_hvp[os_name][hv_name] = hv_params
4541

    
4542
    # Convert ip_family to ip_version
4543
    primary_ip_version = constants.IP4_VERSION
4544
    if cluster.primary_ip_family == netutils.IP6Address.family:
4545
      primary_ip_version = constants.IP6_VERSION
4546

    
4547
    result = {
4548
      "software_version": constants.RELEASE_VERSION,
4549
      "protocol_version": constants.PROTOCOL_VERSION,
4550
      "config_version": constants.CONFIG_VERSION,
4551
      "os_api_version": max(constants.OS_API_VERSIONS),
4552
      "export_version": constants.EXPORT_VERSION,
4553
      "architecture": (platform.architecture()[0], platform.machine()),
4554
      "name": cluster.cluster_name,
4555
      "master": cluster.master_node,
4556
      "default_hypervisor": cluster.enabled_hypervisors[0],
4557
      "enabled_hypervisors": cluster.enabled_hypervisors,
4558
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4559
                        for hypervisor_name in cluster.enabled_hypervisors]),
4560
      "os_hvp": os_hvp,
4561
      "beparams": cluster.beparams,
4562
      "osparams": cluster.osparams,
4563
      "nicparams": cluster.nicparams,
4564
      "ndparams": cluster.ndparams,
4565
      "candidate_pool_size": cluster.candidate_pool_size,
4566
      "master_netdev": cluster.master_netdev,
4567
      "volume_group_name": cluster.volume_group_name,
4568
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4569
      "file_storage_dir": cluster.file_storage_dir,
4570
      "maintain_node_health": cluster.maintain_node_health,
4571
      "ctime": cluster.ctime,
4572
      "mtime": cluster.mtime,
4573
      "uuid": cluster.uuid,
4574
      "tags": list(cluster.GetTags()),
4575
      "uid_pool": cluster.uid_pool,
4576
      "default_iallocator": cluster.default_iallocator,
4577
      "reserved_lvs": cluster.reserved_lvs,
4578
      "primary_ip_version": primary_ip_version,
4579
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4580
      }
4581

    
4582
    return result
4583

    
4584

    
4585
class LUQueryConfigValues(NoHooksLU):
4586
  """Return configuration values.
4587

4588
  """
4589
  REQ_BGL = False
4590
  _FIELDS_DYNAMIC = utils.FieldSet()
4591
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4592
                                  "watcher_pause", "volume_group_name")
4593

    
4594
  def CheckArguments(self):
4595
    _CheckOutputFields(static=self._FIELDS_STATIC,
4596
                       dynamic=self._FIELDS_DYNAMIC,
4597
                       selected=self.op.output_fields)
4598

    
4599
  def ExpandNames(self):
4600
    self.needed_locks = {}
4601

    
4602
  def Exec(self, feedback_fn):
4603
    """Dump a representation of the cluster config to the standard output.
4604

4605
    """
4606
    values = []
4607
    for field in self.op.output_fields:
4608
      if field == "cluster_name":
4609
        entry = self.cfg.GetClusterName()
4610
      elif field == "master_node":
4611
        entry = self.cfg.GetMasterNode()
4612
      elif field == "drain_flag":
4613
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4614
      elif field == "watcher_pause":
4615
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4616
      elif field == "volume_group_name":
4617
        entry = self.cfg.GetVGName()
4618
      else:
4619
        raise errors.ParameterError(field)
4620
      values.append(entry)
4621
    return values
4622

    
4623

    
4624
class LUActivateInstanceDisks(NoHooksLU):
4625
  """Bring up an instance's disks.
4626

4627
  """
4628
  REQ_BGL = False
4629

    
4630
  def ExpandNames(self):
4631
    self._ExpandAndLockInstance()
4632
    self.needed_locks[locking.LEVEL_NODE] = []
4633
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4634

    
4635
  def DeclareLocks(self, level):
4636
    if level == locking.LEVEL_NODE:
4637
      self._LockInstancesNodes()
4638

    
4639
  def CheckPrereq(self):
4640
    """Check prerequisites.
4641

4642
    This checks that the instance is in the cluster.
4643

4644
    """
4645
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4646
    assert self.instance is not None, \
4647
      "Cannot retrieve locked instance %s" % self.op.instance_name
4648
    _CheckNodeOnline(self, self.instance.primary_node)
4649

    
4650
  def Exec(self, feedback_fn):
4651
    """Activate the disks.
4652

4653
    """
4654
    disks_ok, disks_info = \
4655
              _AssembleInstanceDisks(self, self.instance,
4656
                                     ignore_size=self.op.ignore_size)
4657
    if not disks_ok:
4658
      raise errors.OpExecError("Cannot activate block devices")
4659

    
4660
    return disks_info
4661

    
4662

    
4663
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4664
                           ignore_size=False):
4665
  """Prepare the block devices for an instance.
4666

4667
  This sets up the block devices on all nodes.
4668

4669
  @type lu: L{LogicalUnit}
4670
  @param lu: the logical unit on whose behalf we execute
4671
  @type instance: L{objects.Instance}
4672
  @param instance: the instance for whose disks we assemble
4673
  @type disks: list of L{objects.Disk} or None
4674
  @param disks: which disks to assemble (or all, if None)
4675
  @type ignore_secondaries: boolean
4676
  @param ignore_secondaries: if true, errors on secondary nodes
4677
      won't result in an error return from the function
4678
  @type ignore_size: boolean
4679
  @param ignore_size: if true, the current known size of the disk
4680
      will not be used during the disk activation, useful for cases
4681
      when the size is wrong
4682
  @return: False if the operation failed, otherwise a list of
4683
      (host, instance_visible_name, node_visible_name)
4684
      with the mapping from node devices to instance devices
4685

4686
  """
4687
  device_info = []
4688
  disks_ok = True
4689
  iname = instance.name
4690
  disks = _ExpandCheckDisks(instance, disks)
4691

    
4692
  # With the two passes mechanism we try to reduce the window of
4693
  # opportunity for the race condition of switching DRBD to primary
4694
  # before handshaking occured, but we do not eliminate it
4695

    
4696
  # The proper fix would be to wait (with some limits) until the
4697
  # connection has been made and drbd transitions from WFConnection
4698
  # into any other network-connected state (Connected, SyncTarget,
4699
  # SyncSource, etc.)
4700

    
4701
  # 1st pass, assemble on all nodes in secondary mode
4702
  for inst_disk in disks:
4703
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4704
      if ignore_size:
4705
        node_disk = node_disk.Copy()
4706
        node_disk.UnsetSize()
4707
      lu.cfg.SetDiskID(node_disk, node)
4708
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4709
      msg = result.fail_msg
4710
      if msg:
4711
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4712
                           " (is_primary=False, pass=1): %s",
4713
                           inst_disk.iv_name, node, msg)
4714
        if not ignore_secondaries:
4715
          disks_ok = False
4716

    
4717
  # FIXME: race condition on drbd migration to primary
4718

    
4719
  # 2nd pass, do only the primary node
4720
  for inst_disk in disks:
4721
    dev_path = None
4722

    
4723
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4724
      if node != instance.primary_node:
4725
        continue
4726
      if ignore_size:
4727
        node_disk = node_disk.Copy()
4728
        node_disk.UnsetSize()
4729
      lu.cfg.SetDiskID(node_disk, node)
4730
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4731
      msg = result.fail_msg
4732
      if msg:
4733
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4734
                           " (is_primary=True, pass=2): %s",
4735
                           inst_disk.iv_name, node, msg)
4736
        disks_ok = False
4737
      else:
4738
        dev_path = result.payload
4739

    
4740
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4741

    
4742
  # leave the disks configured for the primary node
4743
  # this is a workaround that would be fixed better by
4744
  # improving the logical/physical id handling
4745
  for disk in disks:
4746
    lu.cfg.SetDiskID(disk, instance.primary_node)
4747

    
4748
  return disks_ok, device_info
4749

    
4750

    
4751
def _StartInstanceDisks(lu, instance, force):
4752
  """Start the disks of an instance.
4753

4754
  """
4755
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4756
                                           ignore_secondaries=force)
4757
  if not disks_ok:
4758
    _ShutdownInstanceDisks(lu, instance)
4759
    if force is not None and not force:
4760
      lu.proc.LogWarning("", hint="If the message above refers to a"
4761
                         " secondary node,"
4762
                         " you can retry the operation using '--force'.")
4763
    raise errors.OpExecError("Disk consistency error")
4764

    
4765

    
4766
class LUDeactivateInstanceDisks(NoHooksLU):
4767
  """Shutdown an instance's disks.
4768

4769
  """
4770
  REQ_BGL = False
4771

    
4772
  def ExpandNames(self):
4773
    self._ExpandAndLockInstance()
4774
    self.needed_locks[locking.LEVEL_NODE] = []
4775
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4776

    
4777
  def DeclareLocks(self, level):
4778
    if level == locking.LEVEL_NODE:
4779
      self._LockInstancesNodes()
4780

    
4781
  def CheckPrereq(self):
4782
    """Check prerequisites.
4783

4784
    This checks that the instance is in the cluster.
4785

4786
    """
4787
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4788
    assert self.instance is not None, \
4789
      "Cannot retrieve locked instance %s" % self.op.instance_name
4790

    
4791
  def Exec(self, feedback_fn):
4792
    """Deactivate the disks
4793

4794
    """
4795
    instance = self.instance
4796
    _SafeShutdownInstanceDisks(self, instance)
4797

    
4798

    
4799
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4800
  """Shutdown block devices of an instance.
4801

4802
  This function checks if an instance is running, before calling
4803
  _ShutdownInstanceDisks.
4804

4805
  """
4806
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4807
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4808

    
4809

    
4810
def _ExpandCheckDisks(instance, disks):
4811
  """Return the instance disks selected by the disks list
4812

4813
  @type disks: list of L{objects.Disk} or None
4814
  @param disks: selected disks
4815
  @rtype: list of L{objects.Disk}
4816
  @return: selected instance disks to act on
4817

4818
  """
4819
  if disks is None:
4820
    return instance.disks
4821
  else:
4822
    if not set(disks).issubset(instance.disks):
4823
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4824
                                   " target instance")
4825
    return disks
4826

    
4827

    
4828
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4829
  """Shutdown block devices of an instance.
4830

4831
  This does the shutdown on all nodes of the instance.
4832

4833
  If the ignore_primary is false, errors on the primary node are
4834
  ignored.
4835

4836
  """
4837
  all_result = True
4838
  disks = _ExpandCheckDisks(instance, disks)
4839

    
4840
  for disk in disks:
4841
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4842
      lu.cfg.SetDiskID(top_disk, node)
4843
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4844
      msg = result.fail_msg
4845
      if msg:
4846
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4847
                      disk.iv_name, node, msg)
4848
        if ((node == instance.primary_node and not ignore_primary) or
4849
            (node != instance.primary_node and not result.offline)):
4850
          all_result = False
4851
  return all_result
4852

    
4853

    
4854
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4855
  """Checks if a node has enough free memory.
4856

4857
  This function check if a given node has the needed amount of free
4858
  memory. In case the node has less memory or we cannot get the
4859
  information from the node, this function raise an OpPrereqError
4860
  exception.
4861

4862
  @type lu: C{LogicalUnit}
4863
  @param lu: a logical unit from which we get configuration data
4864
  @type node: C{str}
4865
  @param node: the node to check
4866
  @type reason: C{str}
4867
  @param reason: string to use in the error message
4868
  @type requested: C{int}
4869
  @param requested: the amount of memory in MiB to check for
4870
  @type hypervisor_name: C{str}
4871
  @param hypervisor_name: the hypervisor to ask for memory stats
4872
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4873
      we cannot check the node
4874

4875
  """
4876
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
4877
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4878
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4879
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4880
  if not isinstance(free_mem, int):
4881
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4882
                               " was '%s'" % (node, free_mem),
4883
                               errors.ECODE_ENVIRON)
4884
  if requested > free_mem:
4885
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4886
                               " needed %s MiB, available %s MiB" %
4887
                               (node, reason, requested, free_mem),
4888
                               errors.ECODE_NORES)
4889

    
4890

    
4891
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
4892
  """Checks if nodes have enough free disk space in the all VGs.
4893

4894
  This function check if all given nodes have the needed amount of
4895
  free disk. In case any node has less disk or we cannot get the
4896
  information from the node, this function raise an OpPrereqError
4897
  exception.
4898

4899
  @type lu: C{LogicalUnit}
4900
  @param lu: a logical unit from which we get configuration data
4901
  @type nodenames: C{list}
4902
  @param nodenames: the list of node names to check
4903
  @type req_sizes: C{dict}
4904
  @param req_sizes: the hash of vg and corresponding amount of disk in
4905
      MiB to check for
4906
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
4907
      or we cannot check the node
4908

4909
  """
4910
  if req_sizes is not None:
4911
    for vg, req_size in req_sizes.iteritems():
4912
      _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
4913

    
4914

    
4915
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
4916
  """Checks if nodes have enough free disk space in the specified VG.
4917

4918
  This function check if all given nodes have the needed amount of
4919
  free disk. In case any node has less disk or we cannot get the
4920
  information from the node, this function raise an OpPrereqError
4921
  exception.
4922

4923
  @type lu: C{LogicalUnit}
4924
  @param lu: a logical unit from which we get configuration data
4925
  @type nodenames: C{list}
4926
  @param nodenames: the list of node names to check
4927
  @type vg: C{str}
4928
  @param vg: the volume group to check
4929
  @type requested: C{int}
4930
  @param requested: the amount of disk in MiB to check for
4931
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
4932
      or we cannot check the node
4933

4934
  """
4935
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
4936
  for node in nodenames:
4937
    info = nodeinfo[node]
4938
    info.Raise("Cannot get current information from node %s" % node,
4939
               prereq=True, ecode=errors.ECODE_ENVIRON)
4940
    vg_free = info.payload.get("vg_free", None)
4941
    if not isinstance(vg_free, int):
4942
      raise errors.OpPrereqError("Can't compute free disk space on node"
4943
                                 " %s for vg %s, result was '%s'" %
4944
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
4945
    if requested > vg_free:
4946
      raise errors.OpPrereqError("Not enough disk space on target node %s"
4947
                                 " vg %s: required %d MiB, available %d MiB" %
4948
                                 (node, vg, requested, vg_free),
4949
                                 errors.ECODE_NORES)
4950

    
4951

    
4952
class LUStartupInstance(LogicalUnit):
4953
  """Starts an instance.
4954

4955
  """
4956
  HPATH = "instance-start"
4957
  HTYPE = constants.HTYPE_INSTANCE
4958
  REQ_BGL = False
4959

    
4960
  def CheckArguments(self):
4961
    # extra beparams
4962
    if self.op.beparams:
4963
      # fill the beparams dict
4964
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4965

    
4966
  def ExpandNames(self):
4967
    self._ExpandAndLockInstance()
4968

    
4969
  def BuildHooksEnv(self):
4970
    """Build hooks env.
4971

4972
    This runs on master, primary and secondary nodes of the instance.
4973

4974
    """
4975
    env = {
4976
      "FORCE": self.op.force,
4977
      }
4978
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4979
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4980
    return env, nl, nl
4981

    
4982
  def CheckPrereq(self):
4983
    """Check prerequisites.
4984

4985
    This checks that the instance is in the cluster.
4986

4987
    """
4988
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4989
    assert self.instance is not None, \
4990
      "Cannot retrieve locked instance %s" % self.op.instance_name
4991

    
4992
    # extra hvparams
4993
    if self.op.hvparams:
4994
      # check hypervisor parameter syntax (locally)
4995
      cluster = self.cfg.GetClusterInfo()
4996
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4997
      filled_hvp = cluster.FillHV(instance)
4998
      filled_hvp.update(self.op.hvparams)
4999
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5000
      hv_type.CheckParameterSyntax(filled_hvp)
5001
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5002

    
5003
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5004

    
5005
    if self.primary_offline and self.op.ignore_offline_nodes:
5006
      self.proc.LogWarning("Ignoring offline primary node")
5007

    
5008
      if self.op.hvparams or self.op.beparams:
5009
        self.proc.LogWarning("Overridden parameters are ignored")
5010
    else:
5011
      _CheckNodeOnline(self, instance.primary_node)
5012

    
5013
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5014

    
5015
      # check bridges existence
5016
      _CheckInstanceBridgesExist(self, instance)
5017

    
5018
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5019
                                                instance.name,
5020
                                                instance.hypervisor)
5021
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5022
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5023
      if not remote_info.payload: # not running already
5024
        _CheckNodeFreeMemory(self, instance.primary_node,
5025
                             "starting instance %s" % instance.name,
5026
                             bep[constants.BE_MEMORY], instance.hypervisor)
5027

    
5028
  def Exec(self, feedback_fn):
5029
    """Start the instance.
5030

5031
    """
5032
    instance = self.instance
5033
    force = self.op.force
5034

    
5035
    self.cfg.MarkInstanceUp(instance.name)
5036

    
5037
    if self.primary_offline:
5038
      assert self.op.ignore_offline_nodes
5039
      self.proc.LogInfo("Primary node offline, marked instance as started")
5040
    else:
5041
      node_current = instance.primary_node
5042

    
5043
      _StartInstanceDisks(self, instance, force)
5044

    
5045
      result = self.rpc.call_instance_start(node_current, instance,
5046
                                            self.op.hvparams, self.op.beparams)
5047
      msg = result.fail_msg
5048
      if msg:
5049
        _ShutdownInstanceDisks(self, instance)
5050
        raise errors.OpExecError("Could not start instance: %s" % msg)
5051

    
5052

    
5053
class LURebootInstance(LogicalUnit):
5054
  """Reboot an instance.
5055

5056
  """
5057
  HPATH = "instance-reboot"
5058
  HTYPE = constants.HTYPE_INSTANCE
5059
  REQ_BGL = False
5060

    
5061
  def ExpandNames(self):
5062
    self._ExpandAndLockInstance()
5063

    
5064
  def BuildHooksEnv(self):
5065
    """Build hooks env.
5066

5067
    This runs on master, primary and secondary nodes of the instance.
5068

5069
    """
5070
    env = {
5071
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5072
      "REBOOT_TYPE": self.op.reboot_type,
5073
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5074
      }
5075
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5076
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5077
    return env, nl, nl
5078

    
5079
  def CheckPrereq(self):
5080
    """Check prerequisites.
5081

5082
    This checks that the instance is in the cluster.
5083

5084
    """
5085
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5086
    assert self.instance is not None, \
5087
      "Cannot retrieve locked instance %s" % self.op.instance_name
5088

    
5089
    _CheckNodeOnline(self, instance.primary_node)
5090

    
5091
    # check bridges existence
5092
    _CheckInstanceBridgesExist(self, instance)
5093

    
5094
  def Exec(self, feedback_fn):
5095
    """Reboot the instance.
5096

5097
    """
5098
    instance = self.instance
5099
    ignore_secondaries = self.op.ignore_secondaries
5100
    reboot_type = self.op.reboot_type
5101

    
5102
    node_current = instance.primary_node
5103

    
5104
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5105
                       constants.INSTANCE_REBOOT_HARD]:
5106
      for disk in instance.disks:
5107
        self.cfg.SetDiskID(disk, node_current)
5108
      result = self.rpc.call_instance_reboot(node_current, instance,
5109
                                             reboot_type,
5110
                                             self.op.shutdown_timeout)
5111
      result.Raise("Could not reboot instance")
5112
    else:
5113
      result = self.rpc.call_instance_shutdown(node_current, instance,
5114
                                               self.op.shutdown_timeout)
5115
      result.Raise("Could not shutdown instance for full reboot")
5116
      _ShutdownInstanceDisks(self, instance)
5117
      _StartInstanceDisks(self, instance, ignore_secondaries)
5118
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5119
      msg = result.fail_msg
5120
      if msg:
5121
        _ShutdownInstanceDisks(self, instance)
5122
        raise errors.OpExecError("Could not start instance for"
5123
                                 " full reboot: %s" % msg)
5124

    
5125
    self.cfg.MarkInstanceUp(instance.name)
5126

    
5127

    
5128
class LUShutdownInstance(LogicalUnit):
5129
  """Shutdown an instance.
5130

5131
  """
5132
  HPATH = "instance-stop"
5133
  HTYPE = constants.HTYPE_INSTANCE
5134
  REQ_BGL = False
5135

    
5136
  def ExpandNames(self):
5137
    self._ExpandAndLockInstance()
5138

    
5139
  def BuildHooksEnv(self):
5140
    """Build hooks env.
5141

5142
    This runs on master, primary and secondary nodes of the instance.
5143

5144
    """
5145
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5146
    env["TIMEOUT"] = self.op.timeout
5147
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5148
    return env, nl, nl
5149

    
5150
  def CheckPrereq(self):
5151
    """Check prerequisites.
5152

5153
    This checks that the instance is in the cluster.
5154

5155
    """
5156
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5157
    assert self.instance is not None, \
5158
      "Cannot retrieve locked instance %s" % self.op.instance_name
5159

    
5160
    self.primary_offline = \
5161
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5162

    
5163
    if self.primary_offline and self.op.ignore_offline_nodes:
5164
      self.proc.LogWarning("Ignoring offline primary node")
5165
    else:
5166
      _CheckNodeOnline(self, self.instance.primary_node)
5167

    
5168
  def Exec(self, feedback_fn):
5169
    """Shutdown the instance.
5170

5171
    """
5172
    instance = self.instance
5173
    node_current = instance.primary_node
5174
    timeout = self.op.timeout
5175

    
5176
    self.cfg.MarkInstanceDown(instance.name)
5177

    
5178
    if self.primary_offline:
5179
      assert self.op.ignore_offline_nodes
5180
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5181
    else:
5182
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5183
      msg = result.fail_msg
5184
      if msg:
5185
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5186

    
5187
      _ShutdownInstanceDisks(self, instance)
5188

    
5189

    
5190
class LUReinstallInstance(LogicalUnit):
5191
  """Reinstall an instance.
5192

5193
  """
5194
  HPATH = "instance-reinstall"
5195
  HTYPE = constants.HTYPE_INSTANCE
5196
  REQ_BGL = False
5197

    
5198
  def ExpandNames(self):
5199
    self._ExpandAndLockInstance()
5200

    
5201
  def BuildHooksEnv(self):
5202
    """Build hooks env.
5203

5204
    This runs on master, primary and secondary nodes of the instance.
5205

5206
    """
5207
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5208
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5209
    return env, nl, nl
5210

    
5211
  def CheckPrereq(self):
5212
    """Check prerequisites.
5213

5214
    This checks that the instance is in the cluster and is not running.
5215

5216
    """
5217
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5218
    assert instance is not None, \
5219
      "Cannot retrieve locked instance %s" % self.op.instance_name
5220
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5221
                     " offline, cannot reinstall")
5222
    for node in instance.secondary_nodes:
5223
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5224
                       " cannot reinstall")
5225

    
5226
    if instance.disk_template == constants.DT_DISKLESS:
5227
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5228
                                 self.op.instance_name,
5229
                                 errors.ECODE_INVAL)
5230
    _CheckInstanceDown(self, instance, "cannot reinstall")
5231

    
5232
    if self.op.os_type is not None:
5233
      # OS verification
5234
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5235
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5236
      instance_os = self.op.os_type
5237
    else:
5238
      instance_os = instance.os
5239

    
5240
    nodelist = list(instance.all_nodes)
5241

    
5242
    if self.op.osparams:
5243
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5244
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5245
      self.os_inst = i_osdict # the new dict (without defaults)
5246
    else:
5247
      self.os_inst = None
5248

    
5249
    self.instance = instance
5250

    
5251
  def Exec(self, feedback_fn):
5252
    """Reinstall the instance.
5253

5254
    """
5255
    inst = self.instance
5256

    
5257
    if self.op.os_type is not None:
5258
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5259
      inst.os = self.op.os_type
5260
      # Write to configuration
5261
      self.cfg.Update(inst, feedback_fn)
5262

    
5263
    _StartInstanceDisks(self, inst, None)
5264
    try:
5265
      feedback_fn("Running the instance OS create scripts...")
5266
      # FIXME: pass debug option from opcode to backend
5267
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5268
                                             self.op.debug_level,
5269
                                             osparams=self.os_inst)
5270
      result.Raise("Could not install OS for instance %s on node %s" %
5271
                   (inst.name, inst.primary_node))
5272
    finally:
5273
      _ShutdownInstanceDisks(self, inst)
5274

    
5275

    
5276
class LURecreateInstanceDisks(LogicalUnit):
5277
  """Recreate an instance's missing disks.
5278

5279
  """
5280
  HPATH = "instance-recreate-disks"
5281
  HTYPE = constants.HTYPE_INSTANCE
5282
  REQ_BGL = False
5283

    
5284
  def ExpandNames(self):
5285
    self._ExpandAndLockInstance()
5286

    
5287
  def BuildHooksEnv(self):
5288
    """Build hooks env.
5289

5290
    This runs on master, primary and secondary nodes of the instance.
5291

5292
    """
5293
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5294
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5295
    return env, nl, nl
5296

    
5297
  def CheckPrereq(self):
5298
    """Check prerequisites.
5299

5300
    This checks that the instance is in the cluster and is not running.
5301

5302
    """
5303
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5304
    assert instance is not None, \
5305
      "Cannot retrieve locked instance %s" % self.op.instance_name
5306
    _CheckNodeOnline(self, instance.primary_node)
5307

    
5308
    if instance.disk_template == constants.DT_DISKLESS:
5309
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5310
                                 self.op.instance_name, errors.ECODE_INVAL)
5311
    _CheckInstanceDown(self, instance, "cannot recreate disks")
5312

    
5313
    if not self.op.disks:
5314
      self.op.disks = range(len(instance.disks))
5315
    else:
5316
      for idx in self.op.disks:
5317
        if idx >= len(instance.disks):
5318
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5319
                                     errors.ECODE_INVAL)
5320

    
5321
    self.instance = instance
5322

    
5323
  def Exec(self, feedback_fn):
5324
    """Recreate the disks.
5325

5326
    """
5327
    to_skip = []
5328
    for idx, _ in enumerate(self.instance.disks):
5329
      if idx not in self.op.disks: # disk idx has not been passed in
5330
        to_skip.append(idx)
5331
        continue
5332

    
5333
    _CreateDisks(self, self.instance, to_skip=to_skip)
5334

    
5335

    
5336
class LURenameInstance(LogicalUnit):
5337
  """Rename an instance.
5338

5339
  """
5340
  HPATH = "instance-rename"
5341
  HTYPE = constants.HTYPE_INSTANCE
5342

    
5343
  def CheckArguments(self):
5344
    """Check arguments.
5345

5346
    """
5347
    if self.op.ip_check and not self.op.name_check:
5348
      # TODO: make the ip check more flexible and not depend on the name check
5349
      raise errors.OpPrereqError("Cannot do ip check without a name check",
5350
                                 errors.ECODE_INVAL)
5351

    
5352
  def BuildHooksEnv(self):
5353
    """Build hooks env.
5354

5355
    This runs on master, primary and secondary nodes of the instance.
5356

5357
    """
5358
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5359
    env["INSTANCE_NEW_NAME"] = self.op.new_name
5360
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5361
    return env, nl, nl
5362

    
5363
  def CheckPrereq(self):
5364
    """Check prerequisites.
5365

5366
    This checks that the instance is in the cluster and is not running.
5367

5368
    """
5369
    self.op.instance_name = _ExpandInstanceName(self.cfg,
5370
                                                self.op.instance_name)
5371
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5372
    assert instance is not None
5373
    _CheckNodeOnline(self, instance.primary_node)
5374
    _CheckInstanceDown(self, instance, "cannot rename")
5375
    self.instance = instance
5376

    
5377
    new_name = self.op.new_name
5378
    if self.op.name_check:
5379
      hostname = netutils.GetHostname(name=new_name)
5380
      self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5381
                   hostname.name)
5382
      new_name = self.op.new_name = hostname.name
5383
      if (self.op.ip_check and
5384
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5385
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5386
                                   (hostname.ip, new_name),
5387
                                   errors.ECODE_NOTUNIQUE)
5388

    
5389
    instance_list = self.cfg.GetInstanceList()
5390
    if new_name in instance_list and new_name != instance.name:
5391
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5392
                                 new_name, errors.ECODE_EXISTS)
5393

    
5394
  def Exec(self, feedback_fn):
5395
    """Rename the instance.
5396

5397
    """
5398
    inst = self.instance
5399
    old_name = inst.name
5400

    
5401
    rename_file_storage = False
5402
    if (inst.disk_template == constants.DT_FILE and
5403
        self.op.new_name != inst.name):
5404
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5405
      rename_file_storage = True
5406

    
5407
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5408
    # Change the instance lock. This is definitely safe while we hold the BGL
5409
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5410
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5411

    
5412
    # re-read the instance from the configuration after rename
5413
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5414

    
5415
    if rename_file_storage:
5416
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5417
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5418
                                                     old_file_storage_dir,
5419
                                                     new_file_storage_dir)
5420
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5421
                   " (but the instance has been renamed in Ganeti)" %
5422
                   (inst.primary_node, old_file_storage_dir,
5423
                    new_file_storage_dir))
5424

    
5425
    _StartInstanceDisks(self, inst, None)
5426
    try:
5427
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5428
                                                 old_name, self.op.debug_level)
5429
      msg = result.fail_msg
5430
      if msg:
5431
        msg = ("Could not run OS rename script for instance %s on node %s"
5432
               " (but the instance has been renamed in Ganeti): %s" %
5433
               (inst.name, inst.primary_node, msg))
5434
        self.proc.LogWarning(msg)
5435
    finally:
5436
      _ShutdownInstanceDisks(self, inst)
5437

    
5438
    return inst.name
5439

    
5440

    
5441
class LURemoveInstance(LogicalUnit):
5442
  """Remove an instance.
5443

5444
  """
5445
  HPATH = "instance-remove"
5446
  HTYPE = constants.HTYPE_INSTANCE
5447
  REQ_BGL = False
5448

    
5449
  def ExpandNames(self):
5450
    self._ExpandAndLockInstance()
5451
    self.needed_locks[locking.LEVEL_NODE] = []
5452
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5453

    
5454
  def DeclareLocks(self, level):
5455
    if level == locking.LEVEL_NODE:
5456
      self._LockInstancesNodes()
5457

    
5458
  def BuildHooksEnv(self):
5459
    """Build hooks env.
5460

5461
    This runs on master, primary and secondary nodes of the instance.
5462

5463
    """
5464
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5465
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5466
    nl = [self.cfg.GetMasterNode()]
5467
    nl_post = list(self.instance.all_nodes) + nl
5468
    return env, nl, nl_post
5469

    
5470
  def CheckPrereq(self):
5471
    """Check prerequisites.
5472

5473
    This checks that the instance is in the cluster.
5474

5475
    """
5476
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5477
    assert self.instance is not None, \
5478
      "Cannot retrieve locked instance %s" % self.op.instance_name
5479

    
5480
  def Exec(self, feedback_fn):
5481
    """Remove the instance.
5482

5483
    """
5484
    instance = self.instance
5485
    logging.info("Shutting down instance %s on node %s",
5486
                 instance.name, instance.primary_node)
5487

    
5488
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5489
                                             self.op.shutdown_timeout)
5490
    msg = result.fail_msg
5491
    if msg:
5492
      if self.op.ignore_failures:
5493
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5494
      else:
5495
        raise errors.OpExecError("Could not shutdown instance %s on"
5496
                                 " node %s: %s" %
5497
                                 (instance.name, instance.primary_node, msg))
5498

    
5499
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5500

    
5501

    
5502
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5503
  """Utility function to remove an instance.
5504

5505
  """
5506
  logging.info("Removing block devices for instance %s", instance.name)
5507

    
5508
  if not _RemoveDisks(lu, instance):
5509
    if not ignore_failures:
5510
      raise errors.OpExecError("Can't remove instance's disks")
5511
    feedback_fn("Warning: can't remove instance's disks")
5512

    
5513
  logging.info("Removing instance %s out of cluster config", instance.name)
5514

    
5515
  lu.cfg.RemoveInstance(instance.name)
5516

    
5517
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5518
    "Instance lock removal conflict"
5519

    
5520
  # Remove lock for the instance
5521
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5522

    
5523

    
5524
class LUQueryInstances(NoHooksLU):
5525
  """Logical unit for querying instances.
5526

5527
  """
5528
  # pylint: disable-msg=W0142
5529
  REQ_BGL = False
5530

    
5531
  def CheckArguments(self):
5532
    self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5533
                             self.op.use_locking)
5534

    
5535
  def ExpandNames(self):
5536
    self.iq.ExpandNames(self)
5537

    
5538
  def DeclareLocks(self, level):
5539
    self.iq.DeclareLocks(self, level)
5540

    
5541
  def Exec(self, feedback_fn):
5542
    return self.iq.OldStyleQuery(self)
5543

    
5544

    
5545
class LUFailoverInstance(LogicalUnit):
5546
  """Failover an instance.
5547

5548
  """
5549
  HPATH = "instance-failover"
5550
  HTYPE = constants.HTYPE_INSTANCE
5551
  REQ_BGL = False
5552

    
5553
  def ExpandNames(self):
5554
    self._ExpandAndLockInstance()
5555
    self.needed_locks[locking.LEVEL_NODE] = []
5556
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5557

    
5558
  def DeclareLocks(self, level):
5559
    if level == locking.LEVEL_NODE:
5560
      self._LockInstancesNodes()
5561

    
5562
  def BuildHooksEnv(self):
5563
    """Build hooks env.
5564

5565
    This runs on master, primary and secondary nodes of the instance.
5566

5567
    """
5568
    instance = self.instance
5569
    source_node = instance.primary_node
5570
    target_node = instance.secondary_nodes[0]
5571
    env = {
5572
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5573
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5574
      "OLD_PRIMARY": source_node,
5575
      "OLD_SECONDARY": target_node,
5576
      "NEW_PRIMARY": target_node,
5577
      "NEW_SECONDARY": source_node,
5578
      }
5579
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5580
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5581
    nl_post = list(nl)
5582
    nl_post.append(source_node)
5583
    return env, nl, nl_post
5584

    
5585
  def CheckPrereq(self):
5586
    """Check prerequisites.
5587

5588
    This checks that the instance is in the cluster.
5589

5590
    """
5591
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5592
    assert self.instance is not None, \
5593
      "Cannot retrieve locked instance %s" % self.op.instance_name
5594

    
5595
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5596
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5597
      raise errors.OpPrereqError("Instance's disk layout is not"
5598
                                 " network mirrored, cannot failover.",
5599
                                 errors.ECODE_STATE)
5600

    
5601
    secondary_nodes = instance.secondary_nodes
5602
    if not secondary_nodes:
5603
      raise errors.ProgrammerError("no secondary node but using "
5604
                                   "a mirrored disk template")
5605

    
5606
    target_node = secondary_nodes[0]
5607
    _CheckNodeOnline(self, target_node)
5608
    _CheckNodeNotDrained(self, target_node)
5609
    if instance.admin_up:
5610
      # check memory requirements on the secondary node
5611
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5612
                           instance.name, bep[constants.BE_MEMORY],
5613
                           instance.hypervisor)
5614
    else:
5615
      self.LogInfo("Not checking memory on the secondary node as"
5616
                   " instance will not be started")
5617

    
5618
    # check bridge existance
5619
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5620

    
5621
  def Exec(self, feedback_fn):
5622
    """Failover an instance.
5623

5624
    The failover is done by shutting it down on its present node and
5625
    starting it on the secondary.
5626

5627
    """
5628
    instance = self.instance
5629
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5630

    
5631
    source_node = instance.primary_node
5632
    target_node = instance.secondary_nodes[0]
5633

    
5634
    if instance.admin_up:
5635
      feedback_fn("* checking disk consistency between source and target")
5636
      for dev in instance.disks:
5637
        # for drbd, these are drbd over lvm
5638
        if not _CheckDiskConsistency(self, dev, target_node, False):
5639
          if not self.op.ignore_consistency:
5640
            raise errors.OpExecError("Disk %s is degraded on target node,"
5641
                                     " aborting failover." % dev.iv_name)
5642
    else:
5643
      feedback_fn("* not checking disk consistency as instance is not running")
5644

    
5645
    feedback_fn("* shutting down instance on source node")
5646
    logging.info("Shutting down instance %s on node %s",
5647
                 instance.name, source_node)
5648

    
5649
    result = self.rpc.call_instance_shutdown(source_node, instance,
5650
                                             self.op.shutdown_timeout)
5651
    msg = result.fail_msg
5652
    if msg:
5653
      if self.op.ignore_consistency or primary_node.offline:
5654
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5655
                             " Proceeding anyway. Please make sure node"
5656
                             " %s is down. Error details: %s",
5657
                             instance.name, source_node, source_node, msg)
5658
      else:
5659
        raise errors.OpExecError("Could not shutdown instance %s on"
5660
                                 " node %s: %s" %
5661
                                 (instance.name, source_node, msg))
5662

    
5663
    feedback_fn("* deactivating the instance's disks on source node")
5664
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5665
      raise errors.OpExecError("Can't shut down the instance's disks.")
5666

    
5667
    instance.primary_node = target_node
5668
    # distribute new instance config to the other nodes
5669
    self.cfg.Update(instance, feedback_fn)
5670

    
5671
    # Only start the instance if it's marked as up
5672
    if instance.admin_up:
5673
      feedback_fn("* activating the instance's disks on target node")
5674
      logging.info("Starting instance %s on node %s",
5675
                   instance.name, target_node)
5676

    
5677
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5678
                                           ignore_secondaries=True)
5679
      if not disks_ok:
5680
        _ShutdownInstanceDisks(self, instance)
5681
        raise errors.OpExecError("Can't activate the instance's disks")
5682

    
5683
      feedback_fn("* starting the instance on the target node")
5684
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5685
      msg = result.fail_msg
5686
      if msg:
5687
        _ShutdownInstanceDisks(self, instance)
5688
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5689
                                 (instance.name, target_node, msg))
5690

    
5691

    
5692
class LUMigrateInstance(LogicalUnit):
5693
  """Migrate an instance.
5694

5695
  This is migration without shutting down, compared to the failover,
5696
  which is done with shutdown.
5697

5698
  """
5699
  HPATH = "instance-migrate"
5700
  HTYPE = constants.HTYPE_INSTANCE
5701
  REQ_BGL = False
5702

    
5703
  def ExpandNames(self):
5704
    self._ExpandAndLockInstance()
5705

    
5706
    self.needed_locks[locking.LEVEL_NODE] = []
5707
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5708

    
5709
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5710
                                       self.op.cleanup)
5711
    self.tasklets = [self._migrater]
5712

    
5713
  def DeclareLocks(self, level):
5714
    if level == locking.LEVEL_NODE:
5715
      self._LockInstancesNodes()
5716

    
5717
  def BuildHooksEnv(self):
5718
    """Build hooks env.
5719

5720
    This runs on master, primary and secondary nodes of the instance.
5721

5722
    """
5723
    instance = self._migrater.instance
5724
    source_node = instance.primary_node
5725
    target_node = instance.secondary_nodes[0]
5726
    env = _BuildInstanceHookEnvByObject(self, instance)
5727
    env["MIGRATE_LIVE"] = self._migrater.live
5728
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5729
    env.update({
5730
        "OLD_PRIMARY": source_node,
5731
        "OLD_SECONDARY": target_node,
5732
        "NEW_PRIMARY": target_node,
5733
        "NEW_SECONDARY": source_node,
5734
        })
5735
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5736
    nl_post = list(nl)
5737
    nl_post.append(source_node)
5738
    return env, nl, nl_post
5739

    
5740

    
5741
class LUMoveInstance(LogicalUnit):
5742
  """Move an instance by data-copying.
5743

5744
  """
5745
  HPATH = "instance-move"
5746
  HTYPE = constants.HTYPE_INSTANCE
5747
  REQ_BGL = False
5748

    
5749
  def ExpandNames(self):
5750
    self._ExpandAndLockInstance()
5751
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5752
    self.op.target_node = target_node
5753
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5754
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5755

    
5756
  def DeclareLocks(self, level):
5757
    if level == locking.LEVEL_NODE:
5758
      self._LockInstancesNodes(primary_only=True)
5759

    
5760
  def BuildHooksEnv(self):
5761
    """Build hooks env.
5762

5763
    This runs on master, primary and secondary nodes of the instance.
5764

5765
    """
5766
    env = {
5767
      "TARGET_NODE": self.op.target_node,
5768
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5769
      }
5770
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5771
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5772
                                       self.op.target_node]
5773
    return env, nl, nl
5774

    
5775
  def CheckPrereq(self):
5776
    """Check prerequisites.
5777

5778
    This checks that the instance is in the cluster.
5779

5780
    """
5781
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5782
    assert self.instance is not None, \
5783
      "Cannot retrieve locked instance %s" % self.op.instance_name
5784

    
5785
    node = self.cfg.GetNodeInfo(self.op.target_node)
5786
    assert node is not None, \
5787
      "Cannot retrieve locked node %s" % self.op.target_node
5788

    
5789
    self.target_node = target_node = node.name
5790

    
5791
    if target_node == instance.primary_node:
5792
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5793
                                 (instance.name, target_node),
5794
                                 errors.ECODE_STATE)
5795

    
5796
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5797

    
5798
    for idx, dsk in enumerate(instance.disks):
5799
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5800
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5801
                                   " cannot copy" % idx, errors.ECODE_STATE)
5802

    
5803
    _CheckNodeOnline(self, target_node)
5804
    _CheckNodeNotDrained(self, target_node)
5805
    _CheckNodeVmCapable(self, target_node)
5806

    
5807
    if instance.admin_up:
5808
      # check memory requirements on the secondary node
5809
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5810
                           instance.name, bep[constants.BE_MEMORY],
5811
                           instance.hypervisor)
5812
    else:
5813
      self.LogInfo("Not checking memory on the secondary node as"
5814
                   " instance will not be started")
5815

    
5816
    # check bridge existance
5817
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5818

    
5819
  def Exec(self, feedback_fn):
5820
    """Move an instance.
5821

5822
    The move is done by shutting it down on its present node, copying
5823
    the data over (slow) and starting it on the new node.
5824

5825
    """
5826
    instance = self.instance
5827

    
5828
    source_node = instance.primary_node
5829
    target_node = self.target_node
5830

    
5831
    self.LogInfo("Shutting down instance %s on source node %s",
5832
                 instance.name, source_node)
5833

    
5834
    result = self.rpc.call_instance_shutdown(source_node, instance,
5835
                                             self.op.shutdown_timeout)
5836
    msg = result.fail_msg
5837
    if msg:
5838
      if self.op.ignore_consistency:
5839
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5840
                             " Proceeding anyway. Please make sure node"
5841
                             " %s is down. Error details: %s",
5842
                             instance.name, source_node, source_node, msg)
5843
      else:
5844
        raise errors.OpExecError("Could not shutdown instance %s on"
5845
                                 " node %s: %s" %
5846
                                 (instance.name, source_node, msg))
5847

    
5848
    # create the target disks
5849
    try:
5850
      _CreateDisks(self, instance, target_node=target_node)
5851
    except errors.OpExecError:
5852
      self.LogWarning("Device creation failed, reverting...")
5853
      try:
5854
        _RemoveDisks(self, instance, target_node=target_node)
5855
      finally:
5856
        self.cfg.ReleaseDRBDMinors(instance.name)
5857
        raise
5858

    
5859
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5860

    
5861
    errs = []
5862
    # activate, get path, copy the data over
5863
    for idx, disk in enumerate(instance.disks):
5864
      self.LogInfo("Copying data for disk %d", idx)
5865
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5866
                                               instance.name, True)
5867
      if result.fail_msg:
5868
        self.LogWarning("Can't assemble newly created disk %d: %s",
5869
                        idx, result.fail_msg)
5870
        errs.append(result.fail_msg)
5871
        break
5872
      dev_path = result.payload
5873
      result = self.rpc.call_blockdev_export(source_node, disk,
5874
                                             target_node, dev_path,
5875
                                             cluster_name)
5876
      if result.fail_msg:
5877
        self.LogWarning("Can't copy data over for disk %d: %s",
5878
                        idx, result.fail_msg)
5879
        errs.append(result.fail_msg)
5880
        break
5881

    
5882
    if errs:
5883
      self.LogWarning("Some disks failed to copy, aborting")
5884
      try:
5885
        _RemoveDisks(self, instance, target_node=target_node)
5886
      finally:
5887
        self.cfg.ReleaseDRBDMinors(instance.name)
5888
        raise errors.OpExecError("Errors during disk copy: %s" %
5889
                                 (",".join(errs),))
5890

    
5891
    instance.primary_node = target_node
5892
    self.cfg.Update(instance, feedback_fn)
5893

    
5894
    self.LogInfo("Removing the disks on the original node")
5895
    _RemoveDisks(self, instance, target_node=source_node)
5896

    
5897
    # Only start the instance if it's marked as up
5898
    if instance.admin_up:
5899
      self.LogInfo("Starting instance %s on node %s",
5900
                   instance.name, target_node)
5901

    
5902
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5903
                                           ignore_secondaries=True)
5904
      if not disks_ok:
5905
        _ShutdownInstanceDisks(self, instance)
5906
        raise errors.OpExecError("Can't activate the instance's disks")
5907

    
5908
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5909
      msg = result.fail_msg
5910
      if msg:
5911
        _ShutdownInstanceDisks(self, instance)
5912
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5913
                                 (instance.name, target_node, msg))
5914

    
5915

    
5916
class LUMigrateNode(LogicalUnit):
5917
  """Migrate all instances from a node.
5918

5919
  """
5920
  HPATH = "node-migrate"
5921
  HTYPE = constants.HTYPE_NODE
5922
  REQ_BGL = False
5923

    
5924
  def ExpandNames(self):
5925
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5926

    
5927
    self.needed_locks = {
5928
      locking.LEVEL_NODE: [self.op.node_name],
5929
      }
5930

    
5931
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5932

    
5933
    # Create tasklets for migrating instances for all instances on this node
5934
    names = []
5935
    tasklets = []
5936

    
5937
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5938
      logging.debug("Migrating instance %s", inst.name)
5939
      names.append(inst.name)
5940

    
5941
      tasklets.append(TLMigrateInstance(self, inst.name, False))
5942

    
5943
    self.tasklets = tasklets
5944

    
5945
    # Declare instance locks
5946
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5947

    
5948
  def DeclareLocks(self, level):
5949
    if level == locking.LEVEL_NODE:
5950
      self._LockInstancesNodes()
5951

    
5952
  def BuildHooksEnv(self):
5953
    """Build hooks env.
5954

5955
    This runs on the master, the primary and all the secondaries.
5956

5957
    """
5958
    env = {
5959
      "NODE_NAME": self.op.node_name,
5960
      }
5961

    
5962
    nl = [self.cfg.GetMasterNode()]
5963

    
5964
    return (env, nl, nl)
5965

    
5966

    
5967
class TLMigrateInstance(Tasklet):
5968
  """Tasklet class for instance migration.
5969

5970
  @type live: boolean
5971
  @ivar live: whether the migration will be done live or non-live;
5972
      this variable is initalized only after CheckPrereq has run
5973

5974
  """
5975
  def __init__(self, lu, instance_name, cleanup):
5976
    """Initializes this class.
5977

5978
    """
5979
    Tasklet.__init__(self, lu)
5980

    
5981
    # Parameters
5982
    self.instance_name = instance_name
5983
    self.cleanup = cleanup
5984
    self.live = False # will be overridden later
5985

    
5986
  def CheckPrereq(self):
5987
    """Check prerequisites.
5988

5989
    This checks that the instance is in the cluster.
5990

5991
    """
5992
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5993
    instance = self.cfg.GetInstanceInfo(instance_name)
5994
    assert instance is not None
5995

    
5996
    if instance.disk_template != constants.DT_DRBD8:
5997
      raise errors.OpPrereqError("Instance's disk layout is not"
5998
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5999

    
6000
    secondary_nodes = instance.secondary_nodes
6001
    if not secondary_nodes:
6002
      raise errors.ConfigurationError("No secondary node but using"
6003
                                      " drbd8 disk template")
6004

    
6005
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6006

    
6007
    target_node = secondary_nodes[0]
6008
    # check memory requirements on the secondary node
6009
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6010
                         instance.name, i_be[constants.BE_MEMORY],
6011
                         instance.hypervisor)
6012

    
6013
    # check bridge existance
6014
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6015

    
6016
    if not self.cleanup:
6017
      _CheckNodeNotDrained(self.lu, target_node)
6018
      result = self.rpc.call_instance_migratable(instance.primary_node,
6019
                                                 instance)
6020
      result.Raise("Can't migrate, please use failover",
6021
                   prereq=True, ecode=errors.ECODE_STATE)
6022

    
6023
    self.instance = instance
6024

    
6025
    if self.lu.op.live is not None and self.lu.op.mode is not None:
6026
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6027
                                 " parameters are accepted",
6028
                                 errors.ECODE_INVAL)
6029
    if self.lu.op.live is not None:
6030
      if self.lu.op.live:
6031
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
6032
      else:
6033
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6034
      # reset the 'live' parameter to None so that repeated
6035
      # invocations of CheckPrereq do not raise an exception
6036
      self.lu.op.live = None
6037
    elif self.lu.op.mode is None:
6038
      # read the default value from the hypervisor
6039
      i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6040
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6041

    
6042
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6043

    
6044
  def _WaitUntilSync(self):
6045
    """Poll with custom rpc for disk sync.
6046

6047
    This uses our own step-based rpc call.
6048

6049
    """
6050
    self.feedback_fn("* wait until resync is done")
6051
    all_done = False
6052
    while not all_done:
6053
      all_done = True
6054
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6055
                                            self.nodes_ip,
6056
                                            self.instance.disks)
6057
      min_percent = 100
6058
      for node, nres in result.items():
6059
        nres.Raise("Cannot resync disks on node %s" % node)
6060
        node_done, node_percent = nres.payload
6061
        all_done = all_done and node_done
6062
        if node_percent is not None:
6063
          min_percent = min(min_percent, node_percent)
6064
      if not all_done:
6065
        if min_percent < 100:
6066
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6067
        time.sleep(2)
6068

    
6069
  def _EnsureSecondary(self, node):
6070
    """Demote a node to secondary.
6071

6072
    """
6073
    self.feedback_fn("* switching node %s to secondary mode" % node)
6074

    
6075
    for dev in self.instance.disks:
6076
      self.cfg.SetDiskID(dev, node)
6077

    
6078
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6079
                                          self.instance.disks)
6080
    result.Raise("Cannot change disk to secondary on node %s" % node)
6081

    
6082
  def _GoStandalone(self):
6083
    """Disconnect from the network.
6084

6085
    """
6086
    self.feedback_fn("* changing into standalone mode")
6087
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6088
                                               self.instance.disks)
6089
    for node, nres in result.items():
6090
      nres.Raise("Cannot disconnect disks node %s" % node)
6091

    
6092
  def _GoReconnect(self, multimaster):
6093
    """Reconnect to the network.
6094

6095
    """
6096
    if multimaster:
6097
      msg = "dual-master"
6098
    else:
6099
      msg = "single-master"
6100
    self.feedback_fn("* changing disks into %s mode" % msg)
6101
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6102
                                           self.instance.disks,
6103
                                           self.instance.name, multimaster)
6104
    for node, nres in result.items():
6105
      nres.Raise("Cannot change disks config on node %s" % node)
6106

    
6107
  def _ExecCleanup(self):
6108
    """Try to cleanup after a failed migration.
6109

6110
    The cleanup is done by:
6111
      - check that the instance is running only on one node
6112
        (and update the config if needed)
6113
      - change disks on its secondary node to secondary
6114
      - wait until disks are fully synchronized
6115
      - disconnect from the network
6116
      - change disks into single-master mode
6117
      - wait again until disks are fully synchronized
6118

6119
    """
6120
    instance = self.instance
6121
    target_node = self.target_node
6122
    source_node = self.source_node
6123

    
6124
    # check running on only one node
6125
    self.feedback_fn("* checking where the instance actually runs"
6126
                     " (if this hangs, the hypervisor might be in"
6127
                     " a bad state)")
6128
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6129
    for node, result in ins_l.items():
6130
      result.Raise("Can't contact node %s" % node)
6131

    
6132
    runningon_source = instance.name in ins_l[source_node].payload
6133
    runningon_target = instance.name in ins_l[target_node].payload
6134

    
6135
    if runningon_source and runningon_target:
6136
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6137
                               " or the hypervisor is confused. You will have"
6138
                               " to ensure manually that it runs only on one"
6139
                               " and restart this operation.")
6140

    
6141
    if not (runningon_source or runningon_target):
6142
      raise errors.OpExecError("Instance does not seem to be running at all."
6143
                               " In this case, it's safer to repair by"
6144
                               " running 'gnt-instance stop' to ensure disk"
6145
                               " shutdown, and then restarting it.")
6146

    
6147
    if runningon_target:
6148
      # the migration has actually succeeded, we need to update the config
6149
      self.feedback_fn("* instance running on secondary node (%s),"
6150
                       " updating config" % target_node)
6151
      instance.primary_node = target_node
6152
      self.cfg.Update(instance, self.feedback_fn)
6153
      demoted_node = source_node
6154
    else:
6155
      self.feedback_fn("* instance confirmed to be running on its"
6156
                       " primary node (%s)" % source_node)
6157
      demoted_node = target_node
6158

    
6159
    self._EnsureSecondary(demoted_node)
6160
    try:
6161
      self._WaitUntilSync()
6162
    except errors.OpExecError:
6163
      # we ignore here errors, since if the device is standalone, it
6164
      # won't be able to sync
6165
      pass
6166
    self._GoStandalone()
6167
    self._GoReconnect(False)
6168
    self._WaitUntilSync()
6169

    
6170
    self.feedback_fn("* done")
6171

    
6172
  def _RevertDiskStatus(self):
6173
    """Try to revert the disk status after a failed migration.
6174

6175
    """
6176
    target_node = self.target_node
6177
    try:
6178
      self._EnsureSecondary(target_node)
6179
      self._GoStandalone()
6180
      self._GoReconnect(False)
6181
      self._WaitUntilSync()
6182
    except errors.OpExecError, err:
6183
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6184
                         " drives: error '%s'\n"
6185
                         "Please look and recover the instance status" %
6186
                         str(err))
6187

    
6188
  def _AbortMigration(self):
6189
    """Call the hypervisor code to abort a started migration.
6190

6191
    """
6192
    instance = self.instance
6193
    target_node = self.target_node
6194
    migration_info = self.migration_info
6195

    
6196
    abort_result = self.rpc.call_finalize_migration(target_node,
6197
                                                    instance,
6198
                                                    migration_info,
6199
                                                    False)
6200
    abort_msg = abort_result.fail_msg
6201
    if abort_msg:
6202
      logging.error("Aborting migration failed on target node %s: %s",
6203
                    target_node, abort_msg)
6204
      # Don't raise an exception here, as we stil have to try to revert the
6205
      # disk status, even if this step failed.
6206

    
6207
  def _ExecMigration(self):
6208
    """Migrate an instance.
6209

6210
    The migrate is done by:
6211
      - change the disks into dual-master mode
6212
      - wait until disks are fully synchronized again
6213
      - migrate the instance
6214
      - change disks on the new secondary node (the old primary) to secondary
6215
      - wait until disks are fully synchronized
6216
      - change disks into single-master mode
6217

6218
    """
6219
    instance = self.instance
6220
    target_node = self.target_node
6221
    source_node = self.source_node
6222

    
6223
    self.feedback_fn("* checking disk consistency between source and target")
6224
    for dev in instance.disks:
6225
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6226
        raise errors.OpExecError("Disk %s is degraded or not fully"
6227
                                 " synchronized on target node,"
6228
                                 " aborting migrate." % dev.iv_name)
6229

    
6230
    # First get the migration information from the remote node
6231
    result = self.rpc.call_migration_info(source_node, instance)
6232
    msg = result.fail_msg
6233
    if msg:
6234
      log_err = ("Failed fetching source migration information from %s: %s" %
6235
                 (source_node, msg))
6236
      logging.error(log_err)
6237
      raise errors.OpExecError(log_err)
6238

    
6239
    self.migration_info = migration_info = result.payload
6240

    
6241
    # Then switch the disks to master/master mode
6242
    self._EnsureSecondary(target_node)
6243
    self._GoStandalone()
6244
    self._GoReconnect(True)
6245
    self._WaitUntilSync()
6246

    
6247
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6248
    result = self.rpc.call_accept_instance(target_node,
6249
                                           instance,
6250
                                           migration_info,
6251
                                           self.nodes_ip[target_node])
6252

    
6253
    msg = result.fail_msg
6254
    if msg:
6255
      logging.error("Instance pre-migration failed, trying to revert"
6256
                    " disk status: %s", msg)
6257
      self.feedback_fn("Pre-migration failed, aborting")
6258
      self._AbortMigration()
6259
      self._RevertDiskStatus()
6260
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6261
                               (instance.name, msg))
6262

    
6263
    self.feedback_fn("* migrating instance to %s" % target_node)
6264
    time.sleep(10)
6265
    result = self.rpc.call_instance_migrate(source_node, instance,
6266
                                            self.nodes_ip[target_node],
6267
                                            self.live)
6268
    msg = result.fail_msg
6269
    if msg:
6270
      logging.error("Instance migration failed, trying to revert"
6271
                    " disk status: %s", msg)
6272
      self.feedback_fn("Migration failed, aborting")
6273
      self._AbortMigration()
6274
      self._RevertDiskStatus()
6275
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6276
                               (instance.name, msg))
6277
    time.sleep(10)
6278

    
6279
    instance.primary_node = target_node
6280
    # distribute new instance config to the other nodes
6281
    self.cfg.Update(instance, self.feedback_fn)
6282

    
6283
    result = self.rpc.call_finalize_migration(target_node,
6284
                                              instance,
6285
                                              migration_info,
6286
                                              True)
6287
    msg = result.fail_msg
6288
    if msg:
6289
      logging.error("Instance migration succeeded, but finalization failed:"
6290
                    " %s", msg)
6291
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6292
                               msg)
6293

    
6294
    self._EnsureSecondary(source_node)
6295
    self._WaitUntilSync()
6296
    self._GoStandalone()
6297
    self._GoReconnect(False)
6298
    self._WaitUntilSync()
6299

    
6300
    self.feedback_fn("* done")
6301

    
6302
  def Exec(self, feedback_fn):
6303
    """Perform the migration.
6304

6305
    """
6306
    feedback_fn("Migrating instance %s" % self.instance.name)
6307

    
6308
    self.feedback_fn = feedback_fn
6309

    
6310
    self.source_node = self.instance.primary_node
6311
    self.target_node = self.instance.secondary_nodes[0]
6312
    self.all_nodes = [self.source_node, self.target_node]
6313
    self.nodes_ip = {
6314
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6315
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6316
      }
6317

    
6318
    if self.cleanup:
6319
      return self._ExecCleanup()
6320
    else:
6321
      return self._ExecMigration()
6322

    
6323

    
6324
def _CreateBlockDev(lu, node, instance, device, force_create,
6325
                    info, force_open):
6326
  """Create a tree of block devices on a given node.
6327

6328
  If this device type has to be created on secondaries, create it and
6329
  all its children.
6330

6331
  If not, just recurse to children keeping the same 'force' value.
6332

6333
  @param lu: the lu on whose behalf we execute
6334
  @param node: the node on which to create the device
6335
  @type instance: L{objects.Instance}
6336
  @param instance: the instance which owns the device
6337
  @type device: L{objects.Disk}
6338
  @param device: the device to create
6339
  @type force_create: boolean
6340
  @param force_create: whether to force creation of this device; this
6341
      will be change to True whenever we find a device which has
6342
      CreateOnSecondary() attribute
6343
  @param info: the extra 'metadata' we should attach to the device
6344
      (this will be represented as a LVM tag)
6345
  @type force_open: boolean
6346
  @param force_open: this parameter will be passes to the
6347
      L{backend.BlockdevCreate} function where it specifies
6348
      whether we run on primary or not, and it affects both
6349
      the child assembly and the device own Open() execution
6350

6351
  """
6352
  if device.CreateOnSecondary():
6353
    force_create = True
6354

    
6355
  if device.children:
6356
    for child in device.children:
6357
      _CreateBlockDev(lu, node, instance, child, force_create,
6358
                      info, force_open)
6359

    
6360
  if not force_create:
6361
    return
6362

    
6363
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6364

    
6365

    
6366
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6367
  """Create a single block device on a given node.
6368

6369
  This will not recurse over children of the device, so they must be
6370
  created in advance.
6371

6372
  @param lu: the lu on whose behalf we execute
6373
  @param node: the node on which to create the device
6374
  @type instance: L{objects.Instance}
6375
  @param instance: the instance which owns the device
6376
  @type device: L{objects.Disk}
6377
  @param device: the device to create
6378
  @param info: the extra 'metadata' we should attach to the device
6379
      (this will be represented as a LVM tag)
6380
  @type force_open: boolean
6381
  @param force_open: this parameter will be passes to the
6382
      L{backend.BlockdevCreate} function where it specifies
6383
      whether we run on primary or not, and it affects both
6384
      the child assembly and the device own Open() execution
6385

6386
  """
6387
  lu.cfg.SetDiskID(device, node)
6388
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6389
                                       instance.name, force_open, info)
6390
  result.Raise("Can't create block device %s on"
6391
               " node %s for instance %s" % (device, node, instance.name))
6392
  if device.physical_id is None:
6393
    device.physical_id = result.payload
6394

    
6395

    
6396
def _GenerateUniqueNames(lu, exts):
6397
  """Generate a suitable LV name.
6398

6399
  This will generate a logical volume name for the given instance.
6400

6401
  """
6402
  results = []
6403
  for val in exts:
6404
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6405
    results.append("%s%s" % (new_id, val))
6406
  return results
6407

    
6408

    
6409
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6410
                         p_minor, s_minor):
6411
  """Generate a drbd8 device complete with its children.
6412

6413
  """
6414
  port = lu.cfg.AllocatePort()
6415
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6416
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6417
                          logical_id=(vgname, names[0]))
6418
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6419
                          logical_id=(vgname, names[1]))
6420
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6421
                          logical_id=(primary, secondary, port,
6422
                                      p_minor, s_minor,
6423
                                      shared_secret),
6424
                          children=[dev_data, dev_meta],
6425
                          iv_name=iv_name)
6426
  return drbd_dev
6427

    
6428

    
6429
def _GenerateDiskTemplate(lu, template_name,
6430
                          instance_name, primary_node,
6431
                          secondary_nodes, disk_info,
6432
                          file_storage_dir, file_driver,
6433
                          base_index, feedback_fn):
6434
  """Generate the entire disk layout for a given template type.
6435

6436
  """
6437
  #TODO: compute space requirements
6438

    
6439
  vgname = lu.cfg.GetVGName()
6440
  disk_count = len(disk_info)
6441
  disks = []
6442
  if template_name == constants.DT_DISKLESS:
6443
    pass
6444
  elif template_name == constants.DT_PLAIN:
6445
    if len(secondary_nodes) != 0:
6446
      raise errors.ProgrammerError("Wrong template configuration")
6447

    
6448
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6449
                                      for i in range(disk_count)])
6450
    for idx, disk in enumerate(disk_info):
6451
      disk_index = idx + base_index
6452
      vg = disk.get("vg", vgname)
6453
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6454
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6455
                              logical_id=(vg, names[idx]),
6456
                              iv_name="disk/%d" % disk_index,
6457
                              mode=disk["mode"])
6458
      disks.append(disk_dev)
6459
  elif template_name == constants.DT_DRBD8:
6460
    if len(secondary_nodes) != 1:
6461
      raise errors.ProgrammerError("Wrong template configuration")
6462
    remote_node = secondary_nodes[0]
6463
    minors = lu.cfg.AllocateDRBDMinor(
6464
      [primary_node, remote_node] * len(disk_info), instance_name)
6465

    
6466
    names = []
6467
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6468
                                               for i in range(disk_count)]):
6469
      names.append(lv_prefix + "_data")
6470
      names.append(lv_prefix + "_meta")
6471
    for idx, disk in enumerate(disk_info):
6472
      disk_index = idx + base_index
6473
      vg = disk.get("vg", vgname)
6474
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6475
                                      disk["size"], vg, names[idx*2:idx*2+2],
6476
                                      "disk/%d" % disk_index,
6477
                                      minors[idx*2], minors[idx*2+1])
6478
      disk_dev.mode = disk["mode"]
6479
      disks.append(disk_dev)
6480
  elif template_name == constants.DT_FILE:
6481
    if len(secondary_nodes) != 0:
6482
      raise errors.ProgrammerError("Wrong template configuration")
6483

    
6484
    opcodes.RequireFileStorage()
6485

    
6486
    for idx, disk in enumerate(disk_info):
6487
      disk_index = idx + base_index
6488
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6489
                              iv_name="disk/%d" % disk_index,
6490
                              logical_id=(file_driver,
6491
                                          "%s/disk%d" % (file_storage_dir,
6492
                                                         disk_index)),
6493
                              mode=disk["mode"])
6494
      disks.append(disk_dev)
6495
  else:
6496
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6497
  return disks
6498

    
6499

    
6500
def _GetInstanceInfoText(instance):
6501
  """Compute that text that should be added to the disk's metadata.
6502

6503
  """
6504
  return "originstname+%s" % instance.name
6505

    
6506

    
6507
def _CalcEta(time_taken, written, total_size):
6508
  """Calculates the ETA based on size written and total size.
6509

6510
  @param time_taken: The time taken so far
6511
  @param written: amount written so far
6512
  @param total_size: The total size of data to be written
6513
  @return: The remaining time in seconds
6514

6515
  """
6516
  avg_time = time_taken / float(written)
6517
  return (total_size - written) * avg_time
6518

    
6519

    
6520
def _WipeDisks(lu, instance):
6521
  """Wipes instance disks.
6522

6523
  @type lu: L{LogicalUnit}
6524
  @param lu: the logical unit on whose behalf we execute
6525
  @type instance: L{objects.Instance}
6526
  @param instance: the instance whose disks we should create
6527
  @return: the success of the wipe
6528

6529
  """
6530
  node = instance.primary_node
6531
  logging.info("Pause sync of instance %s disks", instance.name)
6532
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
6533

    
6534
  for idx, success in enumerate(result.payload):
6535
    if not success:
6536
      logging.warn("pause-sync of instance %s for disks %d failed",
6537
                   instance.name, idx)
6538

    
6539
  try:
6540
    for idx, device in enumerate(instance.disks):
6541
      lu.LogInfo("* Wiping disk %d", idx)
6542
      logging.info("Wiping disk %d for instance %s", idx, instance.name)
6543

    
6544
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6545
      # MAX_WIPE_CHUNK at max
6546
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6547
                            constants.MIN_WIPE_CHUNK_PERCENT)
6548

    
6549
      offset = 0
6550
      size = device.size
6551
      last_output = 0
6552
      start_time = time.time()
6553

    
6554
      while offset < size:
6555
        wipe_size = min(wipe_chunk_size, size - offset)
6556
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6557
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
6558
                     (idx, offset, wipe_size))
6559
        now = time.time()
6560
        offset += wipe_size
6561
        if now - last_output >= 60:
6562
          eta = _CalcEta(now - start_time, offset, size)
6563
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
6564
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
6565
          last_output = now
6566
  finally:
6567
    logging.info("Resume sync of instance %s disks", instance.name)
6568

    
6569
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
6570

    
6571
    for idx, success in enumerate(result.payload):
6572
      if not success:
6573
        lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
6574
                      " look at the status and troubleshoot the issue.", idx)
6575
        logging.warn("resume-sync of instance %s for disks %d failed",
6576
                     instance.name, idx)
6577

    
6578

    
6579
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6580
  """Create all disks for an instance.
6581

6582
  This abstracts away some work from AddInstance.
6583

6584
  @type lu: L{LogicalUnit}
6585
  @param lu: the logical unit on whose behalf we execute
6586
  @type instance: L{objects.Instance}
6587
  @param instance: the instance whose disks we should create
6588
  @type to_skip: list
6589
  @param to_skip: list of indices to skip
6590
  @type target_node: string
6591
  @param target_node: if passed, overrides the target node for creation
6592
  @rtype: boolean
6593
  @return: the success of the creation
6594

6595
  """
6596
  info = _GetInstanceInfoText(instance)
6597
  if target_node is None:
6598
    pnode = instance.primary_node
6599
    all_nodes = instance.all_nodes
6600
  else:
6601
    pnode = target_node
6602
    all_nodes = [pnode]
6603

    
6604
  if instance.disk_template == constants.DT_FILE:
6605
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6606
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6607

    
6608
    result.Raise("Failed to create directory '%s' on"
6609
                 " node %s" % (file_storage_dir, pnode))
6610

    
6611
  # Note: this needs to be kept in sync with adding of disks in
6612
  # LUSetInstanceParams
6613
  for idx, device in enumerate(instance.disks):
6614
    if to_skip and idx in to_skip:
6615
      continue
6616
    logging.info("Creating volume %s for instance %s",
6617
                 device.iv_name, instance.name)
6618
    #HARDCODE
6619
    for node in all_nodes:
6620
      f_create = node == pnode
6621
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6622

    
6623

    
6624
def _RemoveDisks(lu, instance, target_node=None):
6625
  """Remove all disks for an instance.
6626

6627
  This abstracts away some work from `AddInstance()` and
6628
  `RemoveInstance()`. Note that in case some of the devices couldn't
6629
  be removed, the removal will continue with the other ones (compare
6630
  with `_CreateDisks()`).
6631

6632
  @type lu: L{LogicalUnit}
6633
  @param lu: the logical unit on whose behalf we execute
6634
  @type instance: L{objects.Instance}
6635
  @param instance: the instance whose disks we should remove
6636
  @type target_node: string
6637
  @param target_node: used to override the node on which to remove the disks
6638
  @rtype: boolean
6639
  @return: the success of the removal
6640

6641
  """
6642
  logging.info("Removing block devices for instance %s", instance.name)
6643

    
6644
  all_result = True
6645
  for device in instance.disks:
6646
    if target_node:
6647
      edata = [(target_node, device)]
6648
    else:
6649
      edata = device.ComputeNodeTree(instance.primary_node)
6650
    for node, disk in edata:
6651
      lu.cfg.SetDiskID(disk, node)
6652
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6653
      if msg:
6654
        lu.LogWarning("Could not remove block device %s on node %s,"
6655
                      " continuing anyway: %s", device.iv_name, node, msg)
6656
        all_result = False
6657

    
6658
  if instance.disk_template == constants.DT_FILE:
6659
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6660
    if target_node:
6661
      tgt = target_node
6662
    else:
6663
      tgt = instance.primary_node
6664
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6665
    if result.fail_msg:
6666
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6667
                    file_storage_dir, instance.primary_node, result.fail_msg)
6668
      all_result = False
6669

    
6670
  return all_result
6671

    
6672

    
6673
def _ComputeDiskSizePerVG(disk_template, disks):
6674
  """Compute disk size requirements in the volume group
6675

6676
  """
6677
  def _compute(disks, payload):
6678
    """Universal algorithm
6679

6680
    """
6681
    vgs = {}
6682
    for disk in disks:
6683
      vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6684

    
6685
    return vgs
6686

    
6687
  # Required free disk space as a function of disk and swap space
6688
  req_size_dict = {
6689
    constants.DT_DISKLESS: None,
6690
    constants.DT_PLAIN: _compute(disks, 0),
6691
    # 128 MB are added for drbd metadata for each disk
6692
    constants.DT_DRBD8: _compute(disks, 128),
6693
    constants.DT_FILE: None,
6694
  }
6695

    
6696
  if disk_template not in req_size_dict:
6697
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6698
                                 " is unknown" %  disk_template)
6699

    
6700
  return req_size_dict[disk_template]
6701

    
6702

    
6703
def _ComputeDiskSize(disk_template, disks):
6704
  """Compute disk size requirements in the volume group
6705

6706
  """
6707
  # Required free disk space as a function of disk and swap space
6708
  req_size_dict = {
6709
    constants.DT_DISKLESS: None,
6710
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6711
    # 128 MB are added for drbd metadata for each disk
6712
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6713
    constants.DT_FILE: None,
6714
  }
6715

    
6716
  if disk_template not in req_size_dict:
6717
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6718
                                 " is unknown" %  disk_template)
6719

    
6720
  return req_size_dict[disk_template]
6721

    
6722

    
6723
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6724
  """Hypervisor parameter validation.
6725

6726
  This function abstract the hypervisor parameter validation to be
6727
  used in both instance create and instance modify.
6728

6729
  @type lu: L{LogicalUnit}
6730
  @param lu: the logical unit for which we check
6731
  @type nodenames: list
6732
  @param nodenames: the list of nodes on which we should check
6733
  @type hvname: string
6734
  @param hvname: the name of the hypervisor we should use
6735
  @type hvparams: dict
6736
  @param hvparams: the parameters which we need to check
6737
  @raise errors.OpPrereqError: if the parameters are not valid
6738

6739
  """
6740
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6741
                                                  hvname,
6742
                                                  hvparams)
6743
  for node in nodenames:
6744
    info = hvinfo[node]
6745
    if info.offline:
6746
      continue
6747
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6748

    
6749

    
6750
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6751
  """OS parameters validation.
6752

6753
  @type lu: L{LogicalUnit}
6754
  @param lu: the logical unit for which we check
6755
  @type required: boolean
6756
  @param required: whether the validation should fail if the OS is not
6757
      found
6758
  @type nodenames: list
6759
  @param nodenames: the list of nodes on which we should check
6760
  @type osname: string
6761
  @param osname: the name of the hypervisor we should use
6762
  @type osparams: dict
6763
  @param osparams: the parameters which we need to check
6764
  @raise errors.OpPrereqError: if the parameters are not valid
6765

6766
  """
6767
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6768
                                   [constants.OS_VALIDATE_PARAMETERS],
6769
                                   osparams)
6770
  for node, nres in result.items():
6771
    # we don't check for offline cases since this should be run only
6772
    # against the master node and/or an instance's nodes
6773
    nres.Raise("OS Parameters validation failed on node %s" % node)
6774
    if not nres.payload:
6775
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6776
                 osname, node)
6777

    
6778

    
6779
class LUCreateInstance(LogicalUnit):
6780
  """Create an instance.
6781

6782
  """
6783
  HPATH = "instance-add"
6784
  HTYPE = constants.HTYPE_INSTANCE
6785
  REQ_BGL = False
6786

    
6787
  def CheckArguments(self):
6788
    """Check arguments.
6789

6790
    """
6791
    # do not require name_check to ease forward/backward compatibility
6792
    # for tools
6793
    if self.op.no_install and self.op.start:
6794
      self.LogInfo("No-installation mode selected, disabling startup")
6795
      self.op.start = False
6796
    # validate/normalize the instance name
6797
    self.op.instance_name = \
6798
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
6799

    
6800
    if self.op.ip_check and not self.op.name_check:
6801
      # TODO: make the ip check more flexible and not depend on the name check
6802
      raise errors.OpPrereqError("Cannot do ip check without a name check",
6803
                                 errors.ECODE_INVAL)
6804

    
6805
    # check nics' parameter names
6806
    for nic in self.op.nics:
6807
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6808

    
6809
    # check disks. parameter names and consistent adopt/no-adopt strategy
6810
    has_adopt = has_no_adopt = False
6811
    for disk in self.op.disks:
6812
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6813
      if "adopt" in disk:
6814
        has_adopt = True
6815
      else:
6816
        has_no_adopt = True
6817
    if has_adopt and has_no_adopt:
6818
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6819
                                 errors.ECODE_INVAL)
6820
    if has_adopt:
6821
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6822
        raise errors.OpPrereqError("Disk adoption is not supported for the"
6823
                                   " '%s' disk template" %
6824
                                   self.op.disk_template,
6825
                                   errors.ECODE_INVAL)
6826
      if self.op.iallocator is not None:
6827
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6828
                                   " iallocator script", errors.ECODE_INVAL)
6829
      if self.op.mode == constants.INSTANCE_IMPORT:
6830
        raise errors.OpPrereqError("Disk adoption not allowed for"
6831
                                   " instance import", errors.ECODE_INVAL)
6832

    
6833
    self.adopt_disks = has_adopt
6834

    
6835
    # instance name verification
6836
    if self.op.name_check:
6837
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6838
      self.op.instance_name = self.hostname1.name
6839
      # used in CheckPrereq for ip ping check
6840
      self.check_ip = self.hostname1.ip
6841
    else:
6842
      self.check_ip = None
6843

    
6844
    # file storage checks
6845
    if (self.op.file_driver and
6846
        not self.op.file_driver in constants.FILE_DRIVER):
6847
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6848
                                 self.op.file_driver, errors.ECODE_INVAL)
6849

    
6850
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6851
      raise errors.OpPrereqError("File storage directory path not absolute",
6852
                                 errors.ECODE_INVAL)
6853

    
6854
    ### Node/iallocator related checks
6855
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6856

    
6857
    if self.op.pnode is not None:
6858
      if self.op.disk_template in constants.DTS_NET_MIRROR:
6859
        if self.op.snode is None:
6860
          raise errors.OpPrereqError("The networked disk templates need"
6861
                                     " a mirror node", errors.ECODE_INVAL)
6862
      elif self.op.snode:
6863
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6864
                        " template")
6865
        self.op.snode = None
6866

    
6867
    self._cds = _GetClusterDomainSecret()
6868

    
6869
    if self.op.mode == constants.INSTANCE_IMPORT:
6870
      # On import force_variant must be True, because if we forced it at
6871
      # initial install, our only chance when importing it back is that it
6872
      # works again!
6873
      self.op.force_variant = True
6874

    
6875
      if self.op.no_install:
6876
        self.LogInfo("No-installation mode has no effect during import")
6877

    
6878
    elif self.op.mode == constants.INSTANCE_CREATE:
6879
      if self.op.os_type is None:
6880
        raise errors.OpPrereqError("No guest OS specified",
6881
                                   errors.ECODE_INVAL)
6882
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6883
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6884
                                   " installation" % self.op.os_type,
6885
                                   errors.ECODE_STATE)
6886
      if self.op.disk_template is None:
6887
        raise errors.OpPrereqError("No disk template specified",
6888
                                   errors.ECODE_INVAL)
6889

    
6890
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6891
      # Check handshake to ensure both clusters have the same domain secret
6892
      src_handshake = self.op.source_handshake
6893
      if not src_handshake:
6894
        raise errors.OpPrereqError("Missing source handshake",
6895
                                   errors.ECODE_INVAL)
6896

    
6897
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6898
                                                           src_handshake)
6899
      if errmsg:
6900
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6901
                                   errors.ECODE_INVAL)
6902

    
6903
      # Load and check source CA
6904
      self.source_x509_ca_pem = self.op.source_x509_ca
6905
      if not self.source_x509_ca_pem:
6906
        raise errors.OpPrereqError("Missing source X509 CA",
6907
                                   errors.ECODE_INVAL)
6908

    
6909
      try:
6910
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6911
                                                    self._cds)
6912
      except OpenSSL.crypto.Error, err:
6913
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6914
                                   (err, ), errors.ECODE_INVAL)
6915

    
6916
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6917
      if errcode is not None:
6918
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6919
                                   errors.ECODE_INVAL)
6920

    
6921
      self.source_x509_ca = cert
6922

    
6923
      src_instance_name = self.op.source_instance_name
6924
      if not src_instance_name:
6925
        raise errors.OpPrereqError("Missing source instance name",
6926
                                   errors.ECODE_INVAL)
6927

    
6928
      self.source_instance_name = \
6929
          netutils.GetHostname(name=src_instance_name).name
6930

    
6931
    else:
6932
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6933
                                 self.op.mode, errors.ECODE_INVAL)
6934

    
6935
  def ExpandNames(self):
6936
    """ExpandNames for CreateInstance.
6937

6938
    Figure out the right locks for instance creation.
6939

6940
    """
6941
    self.needed_locks = {}
6942

    
6943
    instance_name = self.op.instance_name
6944
    # this is just a preventive check, but someone might still add this
6945
    # instance in the meantime, and creation will fail at lock-add time
6946
    if instance_name in self.cfg.GetInstanceList():
6947
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6948
                                 instance_name, errors.ECODE_EXISTS)
6949

    
6950
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6951

    
6952
    if self.op.iallocator:
6953
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6954
    else:
6955
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6956
      nodelist = [self.op.pnode]
6957
      if self.op.snode is not None:
6958
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6959
        nodelist.append(self.op.snode)
6960
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6961

    
6962
    # in case of import lock the source node too
6963
    if self.op.mode == constants.INSTANCE_IMPORT:
6964
      src_node = self.op.src_node
6965
      src_path = self.op.src_path
6966

    
6967
      if src_path is None:
6968
        self.op.src_path = src_path = self.op.instance_name
6969

    
6970
      if src_node is None:
6971
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6972
        self.op.src_node = None
6973
        if os.path.isabs(src_path):
6974
          raise errors.OpPrereqError("Importing an instance from an absolute"
6975
                                     " path requires a source node option.",
6976
                                     errors.ECODE_INVAL)
6977
      else:
6978
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6979
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6980
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6981
        if not os.path.isabs(src_path):
6982
          self.op.src_path = src_path = \
6983
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6984

    
6985
  def _RunAllocator(self):
6986
    """Run the allocator based on input opcode.
6987

6988
    """
6989
    nics = [n.ToDict() for n in self.nics]
6990
    ial = IAllocator(self.cfg, self.rpc,
6991
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6992
                     name=self.op.instance_name,
6993
                     disk_template=self.op.disk_template,
6994
                     tags=[],
6995
                     os=self.op.os_type,
6996
                     vcpus=self.be_full[constants.BE_VCPUS],
6997
                     mem_size=self.be_full[constants.BE_MEMORY],
6998
                     disks=self.disks,
6999
                     nics=nics,
7000
                     hypervisor=self.op.hypervisor,
7001
                     )
7002

    
7003
    ial.Run(self.op.iallocator)
7004

    
7005
    if not ial.success:
7006
      raise errors.OpPrereqError("Can't compute nodes using"
7007
                                 " iallocator '%s': %s" %
7008
                                 (self.op.iallocator, ial.info),
7009
                                 errors.ECODE_NORES)
7010
    if len(ial.result) != ial.required_nodes:
7011
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7012
                                 " of nodes (%s), required %s" %
7013
                                 (self.op.iallocator, len(ial.result),
7014
                                  ial.required_nodes), errors.ECODE_FAULT)
7015
    self.op.pnode = ial.result[0]
7016
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7017
                 self.op.instance_name, self.op.iallocator,
7018
                 utils.CommaJoin(ial.result))
7019
    if ial.required_nodes == 2:
7020
      self.op.snode = ial.result[1]
7021

    
7022
  def BuildHooksEnv(self):
7023
    """Build hooks env.
7024

7025
    This runs on master, primary and secondary nodes of the instance.
7026

7027
    """
7028
    env = {
7029
      "ADD_MODE": self.op.mode,
7030
      }
7031
    if self.op.mode == constants.INSTANCE_IMPORT:
7032
      env["SRC_NODE"] = self.op.src_node
7033
      env["SRC_PATH"] = self.op.src_path
7034
      env["SRC_IMAGES"] = self.src_images
7035

    
7036
    env.update(_BuildInstanceHookEnv(
7037
      name=self.op.instance_name,
7038
      primary_node=self.op.pnode,
7039
      secondary_nodes=self.secondaries,
7040
      status=self.op.start,
7041
      os_type=self.op.os_type,
7042
      memory=self.be_full[constants.BE_MEMORY],
7043
      vcpus=self.be_full[constants.BE_VCPUS],
7044
      nics=_NICListToTuple(self, self.nics),
7045
      disk_template=self.op.disk_template,
7046
      disks=[(d["size"], d["mode"]) for d in self.disks],
7047
      bep=self.be_full,
7048
      hvp=self.hv_full,
7049
      hypervisor_name=self.op.hypervisor,
7050
    ))
7051

    
7052
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7053
          self.secondaries)
7054
    return env, nl, nl
7055

    
7056
  def _ReadExportInfo(self):
7057
    """Reads the export information from disk.
7058

7059
    It will override the opcode source node and path with the actual
7060
    information, if these two were not specified before.
7061

7062
    @return: the export information
7063

7064
    """
7065
    assert self.op.mode == constants.INSTANCE_IMPORT
7066

    
7067
    src_node = self.op.src_node
7068
    src_path = self.op.src_path
7069

    
7070
    if src_node is None:
7071
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7072
      exp_list = self.rpc.call_export_list(locked_nodes)
7073
      found = False
7074
      for node in exp_list:
7075
        if exp_list[node].fail_msg:
7076
          continue
7077
        if src_path in exp_list[node].payload:
7078
          found = True
7079
          self.op.src_node = src_node = node
7080
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7081
                                                       src_path)
7082
          break
7083
      if not found:
7084
        raise errors.OpPrereqError("No export found for relative path %s" %
7085
                                    src_path, errors.ECODE_INVAL)
7086

    
7087
    _CheckNodeOnline(self, src_node)
7088
    result = self.rpc.call_export_info(src_node, src_path)
7089
    result.Raise("No export or invalid export found in dir %s" % src_path)
7090

    
7091
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7092
    if not export_info.has_section(constants.INISECT_EXP):
7093
      raise errors.ProgrammerError("Corrupted export config",
7094
                                   errors.ECODE_ENVIRON)
7095

    
7096
    ei_version = export_info.get(constants.INISECT_EXP, "version")
7097
    if (int(ei_version) != constants.EXPORT_VERSION):
7098
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7099
                                 (ei_version, constants.EXPORT_VERSION),
7100
                                 errors.ECODE_ENVIRON)
7101
    return export_info
7102

    
7103
  def _ReadExportParams(self, einfo):
7104
    """Use export parameters as defaults.
7105

7106
    In case the opcode doesn't specify (as in override) some instance
7107
    parameters, then try to use them from the export information, if
7108
    that declares them.
7109

7110
    """
7111
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7112

    
7113
    if self.op.disk_template is None:
7114
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
7115
        self.op.disk_template = einfo.get(constants.INISECT_INS,
7116
                                          "disk_template")
7117
      else:
7118
        raise errors.OpPrereqError("No disk template specified and the export"
7119
                                   " is missing the disk_template information",
7120
                                   errors.ECODE_INVAL)
7121

    
7122
    if not self.op.disks:
7123
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7124
        disks = []
7125
        # TODO: import the disk iv_name too
7126
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7127
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7128
          disks.append({"size": disk_sz})
7129
        self.op.disks = disks
7130
      else:
7131
        raise errors.OpPrereqError("No disk info specified and the export"
7132
                                   " is missing the disk information",
7133
                                   errors.ECODE_INVAL)
7134

    
7135
    if (not self.op.nics and
7136
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7137
      nics = []
7138
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7139
        ndict = {}
7140
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7141
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7142
          ndict[name] = v
7143
        nics.append(ndict)
7144
      self.op.nics = nics
7145

    
7146
    if (self.op.hypervisor is None and
7147
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7148
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7149
    if einfo.has_section(constants.INISECT_HYP):
7150
      # use the export parameters but do not override the ones
7151
      # specified by the user
7152
      for name, value in einfo.items(constants.INISECT_HYP):
7153
        if name not in self.op.hvparams:
7154
          self.op.hvparams[name] = value
7155

    
7156
    if einfo.has_section(constants.INISECT_BEP):
7157
      # use the parameters, without overriding
7158
      for name, value in einfo.items(constants.INISECT_BEP):
7159
        if name not in self.op.beparams:
7160
          self.op.beparams[name] = value
7161
    else:
7162
      # try to read the parameters old style, from the main section
7163
      for name in constants.BES_PARAMETERS:
7164
        if (name not in self.op.beparams and
7165
            einfo.has_option(constants.INISECT_INS, name)):
7166
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7167

    
7168
    if einfo.has_section(constants.INISECT_OSP):
7169
      # use the parameters, without overriding
7170
      for name, value in einfo.items(constants.INISECT_OSP):
7171
        if name not in self.op.osparams:
7172
          self.op.osparams[name] = value
7173

    
7174
  def _RevertToDefaults(self, cluster):
7175
    """Revert the instance parameters to the default values.
7176

7177
    """
7178
    # hvparams
7179
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7180
    for name in self.op.hvparams.keys():
7181
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7182
        del self.op.hvparams[name]
7183
    # beparams
7184
    be_defs = cluster.SimpleFillBE({})
7185
    for name in self.op.beparams.keys():
7186
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7187
        del self.op.beparams[name]
7188
    # nic params
7189
    nic_defs = cluster.SimpleFillNIC({})
7190
    for nic in self.op.nics:
7191
      for name in constants.NICS_PARAMETERS:
7192
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7193
          del nic[name]
7194
    # osparams
7195
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7196
    for name in self.op.osparams.keys():
7197
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7198
        del self.op.osparams[name]
7199

    
7200
  def CheckPrereq(self):
7201
    """Check prerequisites.
7202

7203
    """
7204
    if self.op.mode == constants.INSTANCE_IMPORT:
7205
      export_info = self._ReadExportInfo()
7206
      self._ReadExportParams(export_info)
7207

    
7208
    if (not self.cfg.GetVGName() and
7209
        self.op.disk_template not in constants.DTS_NOT_LVM):
7210
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7211
                                 " instances", errors.ECODE_STATE)
7212

    
7213
    if self.op.hypervisor is None:
7214
      self.op.hypervisor = self.cfg.GetHypervisorType()
7215

    
7216
    cluster = self.cfg.GetClusterInfo()
7217
    enabled_hvs = cluster.enabled_hypervisors
7218
    if self.op.hypervisor not in enabled_hvs:
7219
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7220
                                 " cluster (%s)" % (self.op.hypervisor,
7221
                                  ",".join(enabled_hvs)),
7222
                                 errors.ECODE_STATE)
7223

    
7224
    # check hypervisor parameter syntax (locally)
7225
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7226
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7227
                                      self.op.hvparams)
7228
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7229
    hv_type.CheckParameterSyntax(filled_hvp)
7230
    self.hv_full = filled_hvp
7231
    # check that we don't specify global parameters on an instance
7232
    _CheckGlobalHvParams(self.op.hvparams)
7233

    
7234
    # fill and remember the beparams dict
7235
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7236
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7237

    
7238
    # build os parameters
7239
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7240

    
7241
    # now that hvp/bep are in final format, let's reset to defaults,
7242
    # if told to do so
7243
    if self.op.identify_defaults:
7244
      self._RevertToDefaults(cluster)
7245

    
7246
    # NIC buildup
7247
    self.nics = []
7248
    for idx, nic in enumerate(self.op.nics):
7249
      nic_mode_req = nic.get("mode", None)
7250
      nic_mode = nic_mode_req
7251
      if nic_mode is None:
7252
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7253

    
7254
      # in routed mode, for the first nic, the default ip is 'auto'
7255
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7256
        default_ip_mode = constants.VALUE_AUTO
7257
      else:
7258
        default_ip_mode = constants.VALUE_NONE
7259

    
7260
      # ip validity checks
7261
      ip = nic.get("ip", default_ip_mode)
7262
      if ip is None or ip.lower() == constants.VALUE_NONE:
7263
        nic_ip = None
7264
      elif ip.lower() == constants.VALUE_AUTO:
7265
        if not self.op.name_check:
7266
          raise errors.OpPrereqError("IP address set to auto but name checks"
7267
                                     " have been skipped",
7268
                                     errors.ECODE_INVAL)
7269
        nic_ip = self.hostname1.ip
7270
      else:
7271
        if not netutils.IPAddress.IsValid(ip):
7272
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7273
                                     errors.ECODE_INVAL)
7274
        nic_ip = ip
7275

    
7276
      # TODO: check the ip address for uniqueness
7277
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7278
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7279
                                   errors.ECODE_INVAL)
7280

    
7281
      # MAC address verification
7282
      mac = nic.get("mac", constants.VALUE_AUTO)
7283
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7284
        mac = utils.NormalizeAndValidateMac(mac)
7285

    
7286
        try:
7287
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7288
        except errors.ReservationError:
7289
          raise errors.OpPrereqError("MAC address %s already in use"
7290
                                     " in cluster" % mac,
7291
                                     errors.ECODE_NOTUNIQUE)
7292

    
7293
      # bridge verification
7294
      bridge = nic.get("bridge", None)
7295
      link = nic.get("link", None)
7296
      if bridge and link:
7297
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7298
                                   " at the same time", errors.ECODE_INVAL)
7299
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7300
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7301
                                   errors.ECODE_INVAL)
7302
      elif bridge:
7303
        link = bridge
7304

    
7305
      nicparams = {}
7306
      if nic_mode_req:
7307
        nicparams[constants.NIC_MODE] = nic_mode_req
7308
      if link:
7309
        nicparams[constants.NIC_LINK] = link
7310

    
7311
      check_params = cluster.SimpleFillNIC(nicparams)
7312
      objects.NIC.CheckParameterSyntax(check_params)
7313
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7314

    
7315
    # disk checks/pre-build
7316
    self.disks = []
7317
    for disk in self.op.disks:
7318
      mode = disk.get("mode", constants.DISK_RDWR)
7319
      if mode not in constants.DISK_ACCESS_SET:
7320
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7321
                                   mode, errors.ECODE_INVAL)
7322
      size = disk.get("size", None)
7323
      if size is None:
7324
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7325
      try:
7326
        size = int(size)
7327
      except (TypeError, ValueError):
7328
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7329
                                   errors.ECODE_INVAL)
7330
      vg = disk.get("vg", self.cfg.GetVGName())
7331
      new_disk = {"size": size, "mode": mode, "vg": vg}
7332
      if "adopt" in disk:
7333
        new_disk["adopt"] = disk["adopt"]
7334
      self.disks.append(new_disk)
7335

    
7336
    if self.op.mode == constants.INSTANCE_IMPORT:
7337

    
7338
      # Check that the new instance doesn't have less disks than the export
7339
      instance_disks = len(self.disks)
7340
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7341
      if instance_disks < export_disks:
7342
        raise errors.OpPrereqError("Not enough disks to import."
7343
                                   " (instance: %d, export: %d)" %
7344
                                   (instance_disks, export_disks),
7345
                                   errors.ECODE_INVAL)
7346

    
7347
      disk_images = []
7348
      for idx in range(export_disks):
7349
        option = 'disk%d_dump' % idx
7350
        if export_info.has_option(constants.INISECT_INS, option):
7351
          # FIXME: are the old os-es, disk sizes, etc. useful?
7352
          export_name = export_info.get(constants.INISECT_INS, option)
7353
          image = utils.PathJoin(self.op.src_path, export_name)
7354
          disk_images.append(image)
7355
        else:
7356
          disk_images.append(False)
7357

    
7358
      self.src_images = disk_images
7359

    
7360
      old_name = export_info.get(constants.INISECT_INS, 'name')
7361
      try:
7362
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7363
      except (TypeError, ValueError), err:
7364
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7365
                                   " an integer: %s" % str(err),
7366
                                   errors.ECODE_STATE)
7367
      if self.op.instance_name == old_name:
7368
        for idx, nic in enumerate(self.nics):
7369
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7370
            nic_mac_ini = 'nic%d_mac' % idx
7371
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7372

    
7373
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7374

    
7375
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7376
    if self.op.ip_check:
7377
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7378
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7379
                                   (self.check_ip, self.op.instance_name),
7380
                                   errors.ECODE_NOTUNIQUE)
7381

    
7382
    #### mac address generation
7383
    # By generating here the mac address both the allocator and the hooks get
7384
    # the real final mac address rather than the 'auto' or 'generate' value.
7385
    # There is a race condition between the generation and the instance object
7386
    # creation, which means that we know the mac is valid now, but we're not
7387
    # sure it will be when we actually add the instance. If things go bad
7388
    # adding the instance will abort because of a duplicate mac, and the
7389
    # creation job will fail.
7390
    for nic in self.nics:
7391
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7392
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7393

    
7394
    #### allocator run
7395

    
7396
    if self.op.iallocator is not None:
7397
      self._RunAllocator()
7398

    
7399
    #### node related checks
7400

    
7401
    # check primary node
7402
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7403
    assert self.pnode is not None, \
7404
      "Cannot retrieve locked node %s" % self.op.pnode
7405
    if pnode.offline:
7406
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7407
                                 pnode.name, errors.ECODE_STATE)
7408
    if pnode.drained:
7409
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7410
                                 pnode.name, errors.ECODE_STATE)
7411
    if not pnode.vm_capable:
7412
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7413
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
7414

    
7415
    self.secondaries = []
7416

    
7417
    # mirror node verification
7418
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7419
      if self.op.snode == pnode.name:
7420
        raise errors.OpPrereqError("The secondary node cannot be the"
7421
                                   " primary node.", errors.ECODE_INVAL)
7422
      _CheckNodeOnline(self, self.op.snode)
7423
      _CheckNodeNotDrained(self, self.op.snode)
7424
      _CheckNodeVmCapable(self, self.op.snode)
7425
      self.secondaries.append(self.op.snode)
7426

    
7427
    nodenames = [pnode.name] + self.secondaries
7428

    
7429
    if not self.adopt_disks:
7430
      # Check lv size requirements, if not adopting
7431
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7432
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7433

    
7434
    else: # instead, we must check the adoption data
7435
      all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7436
      if len(all_lvs) != len(self.disks):
7437
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7438
                                   errors.ECODE_INVAL)
7439
      for lv_name in all_lvs:
7440
        try:
7441
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7442
          # to ReserveLV uses the same syntax
7443
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7444
        except errors.ReservationError:
7445
          raise errors.OpPrereqError("LV named %s used by another instance" %
7446
                                     lv_name, errors.ECODE_NOTUNIQUE)
7447

    
7448
      vg_names = self.rpc.call_vg_list([pnode.name])
7449
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7450

    
7451
      node_lvs = self.rpc.call_lv_list([pnode.name],
7452
                                       vg_names[pnode.name].payload.keys()
7453
                                      )[pnode.name]
7454
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7455
      node_lvs = node_lvs.payload
7456

    
7457
      delta = all_lvs.difference(node_lvs.keys())
7458
      if delta:
7459
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7460
                                   utils.CommaJoin(delta),
7461
                                   errors.ECODE_INVAL)
7462
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7463
      if online_lvs:
7464
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7465
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7466
                                   errors.ECODE_STATE)
7467
      # update the size of disk based on what is found
7468
      for dsk in self.disks:
7469
        dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7470

    
7471
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7472

    
7473
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7474
    # check OS parameters (remotely)
7475
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7476

    
7477
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7478

    
7479
    # memory check on primary node
7480
    if self.op.start:
7481
      _CheckNodeFreeMemory(self, self.pnode.name,
7482
                           "creating instance %s" % self.op.instance_name,
7483
                           self.be_full[constants.BE_MEMORY],
7484
                           self.op.hypervisor)
7485

    
7486
    self.dry_run_result = list(nodenames)
7487

    
7488
  def Exec(self, feedback_fn):
7489
    """Create and add the instance to the cluster.
7490

7491
    """
7492
    instance = self.op.instance_name
7493
    pnode_name = self.pnode.name
7494

    
7495
    ht_kind = self.op.hypervisor
7496
    if ht_kind in constants.HTS_REQ_PORT:
7497
      network_port = self.cfg.AllocatePort()
7498
    else:
7499
      network_port = None
7500

    
7501
    if constants.ENABLE_FILE_STORAGE:
7502
      # this is needed because os.path.join does not accept None arguments
7503
      if self.op.file_storage_dir is None:
7504
        string_file_storage_dir = ""
7505
      else:
7506
        string_file_storage_dir = self.op.file_storage_dir
7507

    
7508
      # build the full file storage dir path
7509
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7510
                                        string_file_storage_dir, instance)
7511
    else:
7512
      file_storage_dir = ""
7513

    
7514
    disks = _GenerateDiskTemplate(self,
7515
                                  self.op.disk_template,
7516
                                  instance, pnode_name,
7517
                                  self.secondaries,
7518
                                  self.disks,
7519
                                  file_storage_dir,
7520
                                  self.op.file_driver,
7521
                                  0,
7522
                                  feedback_fn)
7523

    
7524
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7525
                            primary_node=pnode_name,
7526
                            nics=self.nics, disks=disks,
7527
                            disk_template=self.op.disk_template,
7528
                            admin_up=False,
7529
                            network_port=network_port,
7530
                            beparams=self.op.beparams,
7531
                            hvparams=self.op.hvparams,
7532
                            hypervisor=self.op.hypervisor,
7533
                            osparams=self.op.osparams,
7534
                            )
7535

    
7536
    if self.adopt_disks:
7537
      # rename LVs to the newly-generated names; we need to construct
7538
      # 'fake' LV disks with the old data, plus the new unique_id
7539
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7540
      rename_to = []
7541
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7542
        rename_to.append(t_dsk.logical_id)
7543
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7544
        self.cfg.SetDiskID(t_dsk, pnode_name)
7545
      result = self.rpc.call_blockdev_rename(pnode_name,
7546
                                             zip(tmp_disks, rename_to))
7547
      result.Raise("Failed to rename adoped LVs")
7548
    else:
7549
      feedback_fn("* creating instance disks...")
7550
      try:
7551
        _CreateDisks(self, iobj)
7552
      except errors.OpExecError:
7553
        self.LogWarning("Device creation failed, reverting...")
7554
        try:
7555
          _RemoveDisks(self, iobj)
7556
        finally:
7557
          self.cfg.ReleaseDRBDMinors(instance)
7558
          raise
7559

    
7560
      if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7561
        feedback_fn("* wiping instance disks...")
7562
        try:
7563
          _WipeDisks(self, iobj)
7564
        except errors.OpExecError:
7565
          self.LogWarning("Device wiping failed, reverting...")
7566
          try:
7567
            _RemoveDisks(self, iobj)
7568
          finally:
7569
            self.cfg.ReleaseDRBDMinors(instance)
7570
            raise
7571

    
7572
    feedback_fn("adding instance %s to cluster config" % instance)
7573

    
7574
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7575

    
7576
    # Declare that we don't want to remove the instance lock anymore, as we've
7577
    # added the instance to the config
7578
    del self.remove_locks[locking.LEVEL_INSTANCE]
7579
    # Unlock all the nodes
7580
    if self.op.mode == constants.INSTANCE_IMPORT:
7581
      nodes_keep = [self.op.src_node]
7582
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7583
                       if node != self.op.src_node]
7584
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7585
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7586
    else:
7587
      self.context.glm.release(locking.LEVEL_NODE)
7588
      del self.acquired_locks[locking.LEVEL_NODE]
7589

    
7590
    if self.op.wait_for_sync:
7591
      disk_abort = not _WaitForSync(self, iobj)
7592
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7593
      # make sure the disks are not degraded (still sync-ing is ok)
7594
      time.sleep(15)
7595
      feedback_fn("* checking mirrors status")
7596
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7597
    else:
7598
      disk_abort = False
7599

    
7600
    if disk_abort:
7601
      _RemoveDisks(self, iobj)
7602
      self.cfg.RemoveInstance(iobj.name)
7603
      # Make sure the instance lock gets removed
7604
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7605
      raise errors.OpExecError("There are some degraded disks for"
7606
                               " this instance")
7607

    
7608
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7609
      if self.op.mode == constants.INSTANCE_CREATE:
7610
        if not self.op.no_install:
7611
          feedback_fn("* running the instance OS create scripts...")
7612
          # FIXME: pass debug option from opcode to backend
7613
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7614
                                                 self.op.debug_level)
7615
          result.Raise("Could not add os for instance %s"
7616
                       " on node %s" % (instance, pnode_name))
7617

    
7618
      elif self.op.mode == constants.INSTANCE_IMPORT:
7619
        feedback_fn("* running the instance OS import scripts...")
7620

    
7621
        transfers = []
7622

    
7623
        for idx, image in enumerate(self.src_images):
7624
          if not image:
7625
            continue
7626

    
7627
          # FIXME: pass debug option from opcode to backend
7628
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7629
                                             constants.IEIO_FILE, (image, ),
7630
                                             constants.IEIO_SCRIPT,
7631
                                             (iobj.disks[idx], idx),
7632
                                             None)
7633
          transfers.append(dt)
7634

    
7635
        import_result = \
7636
          masterd.instance.TransferInstanceData(self, feedback_fn,
7637
                                                self.op.src_node, pnode_name,
7638
                                                self.pnode.secondary_ip,
7639
                                                iobj, transfers)
7640
        if not compat.all(import_result):
7641
          self.LogWarning("Some disks for instance %s on node %s were not"
7642
                          " imported successfully" % (instance, pnode_name))
7643

    
7644
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7645
        feedback_fn("* preparing remote import...")
7646
        # The source cluster will stop the instance before attempting to make a
7647
        # connection. In some cases stopping an instance can take a long time,
7648
        # hence the shutdown timeout is added to the connection timeout.
7649
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7650
                           self.op.source_shutdown_timeout)
7651
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7652

    
7653
        assert iobj.primary_node == self.pnode.name
7654
        disk_results = \
7655
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7656
                                        self.source_x509_ca,
7657
                                        self._cds, timeouts)
7658
        if not compat.all(disk_results):
7659
          # TODO: Should the instance still be started, even if some disks
7660
          # failed to import (valid for local imports, too)?
7661
          self.LogWarning("Some disks for instance %s on node %s were not"
7662
                          " imported successfully" % (instance, pnode_name))
7663

    
7664
        # Run rename script on newly imported instance
7665
        assert iobj.name == instance
7666
        feedback_fn("Running rename script for %s" % instance)
7667
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7668
                                                   self.source_instance_name,
7669
                                                   self.op.debug_level)
7670
        if result.fail_msg:
7671
          self.LogWarning("Failed to run rename script for %s on node"
7672
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7673

    
7674
      else:
7675
        # also checked in the prereq part
7676
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7677
                                     % self.op.mode)
7678

    
7679
    if self.op.start:
7680
      iobj.admin_up = True
7681
      self.cfg.Update(iobj, feedback_fn)
7682
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7683
      feedback_fn("* starting instance...")
7684
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7685
      result.Raise("Could not start instance")
7686

    
7687
    return list(iobj.all_nodes)
7688

    
7689

    
7690
class LUConnectConsole(NoHooksLU):
7691
  """Connect to an instance's console.
7692

7693
  This is somewhat special in that it returns the command line that
7694
  you need to run on the master node in order to connect to the
7695
  console.
7696

7697
  """
7698
  REQ_BGL = False
7699

    
7700
  def ExpandNames(self):
7701
    self._ExpandAndLockInstance()
7702

    
7703
  def CheckPrereq(self):
7704
    """Check prerequisites.
7705

7706
    This checks that the instance is in the cluster.
7707

7708
    """
7709
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7710
    assert self.instance is not None, \
7711
      "Cannot retrieve locked instance %s" % self.op.instance_name
7712
    _CheckNodeOnline(self, self.instance.primary_node)
7713

    
7714
  def Exec(self, feedback_fn):
7715
    """Connect to the console of an instance
7716

7717
    """
7718
    instance = self.instance
7719
    node = instance.primary_node
7720

    
7721
    node_insts = self.rpc.call_instance_list([node],
7722
                                             [instance.hypervisor])[node]
7723
    node_insts.Raise("Can't get node information from %s" % node)
7724

    
7725
    if instance.name not in node_insts.payload:
7726
      if instance.admin_up:
7727
        state = "ERROR_down"
7728
      else:
7729
        state = "ADMIN_down"
7730
      raise errors.OpExecError("Instance %s is not running (state %s)" %
7731
                               (instance.name, state))
7732

    
7733
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7734

    
7735
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7736
    cluster = self.cfg.GetClusterInfo()
7737
    # beparams and hvparams are passed separately, to avoid editing the
7738
    # instance and then saving the defaults in the instance itself.
7739
    hvparams = cluster.FillHV(instance)
7740
    beparams = cluster.FillBE(instance)
7741
    console = hyper.GetInstanceConsole(instance, hvparams, beparams)
7742

    
7743
    assert console.instance == instance.name
7744
    assert console.Validate()
7745

    
7746
    return console.ToDict()
7747

    
7748

    
7749
class LUReplaceDisks(LogicalUnit):
7750
  """Replace the disks of an instance.
7751

7752
  """
7753
  HPATH = "mirrors-replace"
7754
  HTYPE = constants.HTYPE_INSTANCE
7755
  REQ_BGL = False
7756

    
7757
  def CheckArguments(self):
7758
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7759
                                  self.op.iallocator)
7760

    
7761
  def ExpandNames(self):
7762
    self._ExpandAndLockInstance()
7763

    
7764
    if self.op.iallocator is not None:
7765
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7766

    
7767
    elif self.op.remote_node is not None:
7768
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7769
      self.op.remote_node = remote_node
7770

    
7771
      # Warning: do not remove the locking of the new secondary here
7772
      # unless DRBD8.AddChildren is changed to work in parallel;
7773
      # currently it doesn't since parallel invocations of
7774
      # FindUnusedMinor will conflict
7775
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7776
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7777

    
7778
    else:
7779
      self.needed_locks[locking.LEVEL_NODE] = []
7780
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7781

    
7782
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7783
                                   self.op.iallocator, self.op.remote_node,
7784
                                   self.op.disks, False, self.op.early_release)
7785

    
7786
    self.tasklets = [self.replacer]
7787

    
7788
  def DeclareLocks(self, level):
7789
    # If we're not already locking all nodes in the set we have to declare the
7790
    # instance's primary/secondary nodes.
7791
    if (level == locking.LEVEL_NODE and
7792
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7793
      self._LockInstancesNodes()
7794

    
7795
  def BuildHooksEnv(self):
7796
    """Build hooks env.
7797

7798
    This runs on the master, the primary and all the secondaries.
7799

7800
    """
7801
    instance = self.replacer.instance
7802
    env = {
7803
      "MODE": self.op.mode,
7804
      "NEW_SECONDARY": self.op.remote_node,
7805
      "OLD_SECONDARY": instance.secondary_nodes[0],
7806
      }
7807
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7808
    nl = [
7809
      self.cfg.GetMasterNode(),
7810
      instance.primary_node,
7811
      ]
7812
    if self.op.remote_node is not None:
7813
      nl.append(self.op.remote_node)
7814
    return env, nl, nl
7815

    
7816

    
7817
class TLReplaceDisks(Tasklet):
7818
  """Replaces disks for an instance.
7819

7820
  Note: Locking is not within the scope of this class.
7821

7822
  """
7823
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7824
               disks, delay_iallocator, early_release):
7825
    """Initializes this class.
7826

7827
    """
7828
    Tasklet.__init__(self, lu)
7829

    
7830
    # Parameters
7831
    self.instance_name = instance_name
7832
    self.mode = mode
7833
    self.iallocator_name = iallocator_name
7834
    self.remote_node = remote_node
7835
    self.disks = disks
7836
    self.delay_iallocator = delay_iallocator
7837
    self.early_release = early_release
7838

    
7839
    # Runtime data
7840
    self.instance = None
7841
    self.new_node = None
7842
    self.target_node = None
7843
    self.other_node = None
7844
    self.remote_node_info = None
7845
    self.node_secondary_ip = None
7846

    
7847
  @staticmethod
7848
  def CheckArguments(mode, remote_node, iallocator):
7849
    """Helper function for users of this class.
7850

7851
    """
7852
    # check for valid parameter combination
7853
    if mode == constants.REPLACE_DISK_CHG:
7854
      if remote_node is None and iallocator is None:
7855
        raise errors.OpPrereqError("When changing the secondary either an"
7856
                                   " iallocator script must be used or the"
7857
                                   " new node given", errors.ECODE_INVAL)
7858

    
7859
      if remote_node is not None and iallocator is not None:
7860
        raise errors.OpPrereqError("Give either the iallocator or the new"
7861
                                   " secondary, not both", errors.ECODE_INVAL)
7862

    
7863
    elif remote_node is not None or iallocator is not None:
7864
      # Not replacing the secondary
7865
      raise errors.OpPrereqError("The iallocator and new node options can"
7866
                                 " only be used when changing the"
7867
                                 " secondary node", errors.ECODE_INVAL)
7868

    
7869
  @staticmethod
7870
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7871
    """Compute a new secondary node using an IAllocator.
7872

7873
    """
7874
    ial = IAllocator(lu.cfg, lu.rpc,
7875
                     mode=constants.IALLOCATOR_MODE_RELOC,
7876
                     name=instance_name,
7877
                     relocate_from=relocate_from)
7878

    
7879
    ial.Run(iallocator_name)
7880

    
7881
    if not ial.success:
7882
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7883
                                 " %s" % (iallocator_name, ial.info),
7884
                                 errors.ECODE_NORES)
7885

    
7886
    if len(ial.result) != ial.required_nodes:
7887
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7888
                                 " of nodes (%s), required %s" %
7889
                                 (iallocator_name,
7890
                                  len(ial.result), ial.required_nodes),
7891
                                 errors.ECODE_FAULT)
7892

    
7893
    remote_node_name = ial.result[0]
7894

    
7895
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7896
               instance_name, remote_node_name)
7897

    
7898
    return remote_node_name
7899

    
7900
  def _FindFaultyDisks(self, node_name):
7901
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7902
                                    node_name, True)
7903

    
7904
  def CheckPrereq(self):
7905
    """Check prerequisites.
7906

7907
    This checks that the instance is in the cluster.
7908

7909
    """
7910
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7911
    assert instance is not None, \
7912
      "Cannot retrieve locked instance %s" % self.instance_name
7913

    
7914
    if instance.disk_template != constants.DT_DRBD8:
7915
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7916
                                 " instances", errors.ECODE_INVAL)
7917

    
7918
    if len(instance.secondary_nodes) != 1:
7919
      raise errors.OpPrereqError("The instance has a strange layout,"
7920
                                 " expected one secondary but found %d" %
7921
                                 len(instance.secondary_nodes),
7922
                                 errors.ECODE_FAULT)
7923

    
7924
    if not self.delay_iallocator:
7925
      self._CheckPrereq2()
7926

    
7927
  def _CheckPrereq2(self):
7928
    """Check prerequisites, second part.
7929

7930
    This function should always be part of CheckPrereq. It was separated and is
7931
    now called from Exec because during node evacuation iallocator was only
7932
    called with an unmodified cluster model, not taking planned changes into
7933
    account.
7934

7935
    """
7936
    instance = self.instance
7937
    secondary_node = instance.secondary_nodes[0]
7938

    
7939
    if self.iallocator_name is None:
7940
      remote_node = self.remote_node
7941
    else:
7942
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7943
                                       instance.name, instance.secondary_nodes)
7944

    
7945
    if remote_node is not None:
7946
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7947
      assert self.remote_node_info is not None, \
7948
        "Cannot retrieve locked node %s" % remote_node
7949
    else:
7950
      self.remote_node_info = None
7951

    
7952
    if remote_node == self.instance.primary_node:
7953
      raise errors.OpPrereqError("The specified node is the primary node of"
7954
                                 " the instance.", errors.ECODE_INVAL)
7955

    
7956
    if remote_node == secondary_node:
7957
      raise errors.OpPrereqError("The specified node is already the"
7958
                                 " secondary node of the instance.",
7959
                                 errors.ECODE_INVAL)
7960

    
7961
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7962
                                    constants.REPLACE_DISK_CHG):
7963
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7964
                                 errors.ECODE_INVAL)
7965

    
7966
    if self.mode == constants.REPLACE_DISK_AUTO:
7967
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7968
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7969

    
7970
      if faulty_primary and faulty_secondary:
7971
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7972
                                   " one node and can not be repaired"
7973
                                   " automatically" % self.instance_name,
7974
                                   errors.ECODE_STATE)
7975

    
7976
      if faulty_primary:
7977
        self.disks = faulty_primary
7978
        self.target_node = instance.primary_node
7979
        self.other_node = secondary_node
7980
        check_nodes = [self.target_node, self.other_node]
7981
      elif faulty_secondary:
7982
        self.disks = faulty_secondary
7983
        self.target_node = secondary_node
7984
        self.other_node = instance.primary_node
7985
        check_nodes = [self.target_node, self.other_node]
7986
      else:
7987
        self.disks = []
7988
        check_nodes = []
7989

    
7990
    else:
7991
      # Non-automatic modes
7992
      if self.mode == constants.REPLACE_DISK_PRI:
7993
        self.target_node = instance.primary_node
7994
        self.other_node = secondary_node
7995
        check_nodes = [self.target_node, self.other_node]
7996

    
7997
      elif self.mode == constants.REPLACE_DISK_SEC:
7998
        self.target_node = secondary_node
7999
        self.other_node = instance.primary_node
8000
        check_nodes = [self.target_node, self.other_node]
8001

    
8002
      elif self.mode == constants.REPLACE_DISK_CHG:
8003
        self.new_node = remote_node
8004
        self.other_node = instance.primary_node
8005
        self.target_node = secondary_node
8006
        check_nodes = [self.new_node, self.other_node]
8007

    
8008
        _CheckNodeNotDrained(self.lu, remote_node)
8009
        _CheckNodeVmCapable(self.lu, remote_node)
8010

    
8011
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
8012
        assert old_node_info is not None
8013
        if old_node_info.offline and not self.early_release:
8014
          # doesn't make sense to delay the release
8015
          self.early_release = True
8016
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8017
                          " early-release mode", secondary_node)
8018

    
8019
      else:
8020
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8021
                                     self.mode)
8022

    
8023
      # If not specified all disks should be replaced
8024
      if not self.disks:
8025
        self.disks = range(len(self.instance.disks))
8026

    
8027
    for node in check_nodes:
8028
      _CheckNodeOnline(self.lu, node)
8029

    
8030
    # Check whether disks are valid
8031
    for disk_idx in self.disks:
8032
      instance.FindDisk(disk_idx)
8033

    
8034
    # Get secondary node IP addresses
8035
    node_2nd_ip = {}
8036

    
8037
    for node_name in [self.target_node, self.other_node, self.new_node]:
8038
      if node_name is not None:
8039
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8040

    
8041
    self.node_secondary_ip = node_2nd_ip
8042

    
8043
  def Exec(self, feedback_fn):
8044
    """Execute disk replacement.
8045

8046
    This dispatches the disk replacement to the appropriate handler.
8047

8048
    """
8049
    if self.delay_iallocator:
8050
      self._CheckPrereq2()
8051

    
8052
    if not self.disks:
8053
      feedback_fn("No disks need replacement")
8054
      return
8055

    
8056
    feedback_fn("Replacing disk(s) %s for %s" %
8057
                (utils.CommaJoin(self.disks), self.instance.name))
8058

    
8059
    activate_disks = (not self.instance.admin_up)
8060

    
8061
    # Activate the instance disks if we're replacing them on a down instance
8062
    if activate_disks:
8063
      _StartInstanceDisks(self.lu, self.instance, True)
8064

    
8065
    try:
8066
      # Should we replace the secondary node?
8067
      if self.new_node is not None:
8068
        fn = self._ExecDrbd8Secondary
8069
      else:
8070
        fn = self._ExecDrbd8DiskOnly
8071

    
8072
      return fn(feedback_fn)
8073

    
8074
    finally:
8075
      # Deactivate the instance disks if we're replacing them on a
8076
      # down instance
8077
      if activate_disks:
8078
        _SafeShutdownInstanceDisks(self.lu, self.instance)
8079

    
8080
  def _CheckVolumeGroup(self, nodes):
8081
    self.lu.LogInfo("Checking volume groups")
8082

    
8083
    vgname = self.cfg.GetVGName()
8084

    
8085
    # Make sure volume group exists on all involved nodes
8086
    results = self.rpc.call_vg_list(nodes)
8087
    if not results:
8088
      raise errors.OpExecError("Can't list volume groups on the nodes")
8089

    
8090
    for node in nodes:
8091
      res = results[node]
8092
      res.Raise("Error checking node %s" % node)
8093
      if vgname not in res.payload:
8094
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
8095
                                 (vgname, node))
8096

    
8097
  def _CheckDisksExistence(self, nodes):
8098
    # Check disk existence
8099
    for idx, dev in enumerate(self.instance.disks):
8100
      if idx not in self.disks:
8101
        continue
8102

    
8103
      for node in nodes:
8104
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8105
        self.cfg.SetDiskID(dev, node)
8106

    
8107
        result = self.rpc.call_blockdev_find(node, dev)
8108

    
8109
        msg = result.fail_msg
8110
        if msg or not result.payload:
8111
          if not msg:
8112
            msg = "disk not found"
8113
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8114
                                   (idx, node, msg))
8115

    
8116
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8117
    for idx, dev in enumerate(self.instance.disks):
8118
      if idx not in self.disks:
8119
        continue
8120

    
8121
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8122
                      (idx, node_name))
8123

    
8124
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8125
                                   ldisk=ldisk):
8126
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8127
                                 " replace disks for instance %s" %
8128
                                 (node_name, self.instance.name))
8129

    
8130
  def _CreateNewStorage(self, node_name):
8131
    vgname = self.cfg.GetVGName()
8132
    iv_names = {}
8133

    
8134
    for idx, dev in enumerate(self.instance.disks):
8135
      if idx not in self.disks:
8136
        continue
8137

    
8138
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8139

    
8140
      self.cfg.SetDiskID(dev, node_name)
8141

    
8142
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8143
      names = _GenerateUniqueNames(self.lu, lv_names)
8144

    
8145
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8146
                             logical_id=(vgname, names[0]))
8147
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8148
                             logical_id=(vgname, names[1]))
8149

    
8150
      new_lvs = [lv_data, lv_meta]
8151
      old_lvs = dev.children
8152
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8153

    
8154
      # we pass force_create=True to force the LVM creation
8155
      for new_lv in new_lvs:
8156
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8157
                        _GetInstanceInfoText(self.instance), False)
8158

    
8159
    return iv_names
8160

    
8161
  def _CheckDevices(self, node_name, iv_names):
8162
    for name, (dev, _, _) in iv_names.iteritems():
8163
      self.cfg.SetDiskID(dev, node_name)
8164

    
8165
      result = self.rpc.call_blockdev_find(node_name, dev)
8166

    
8167
      msg = result.fail_msg
8168
      if msg or not result.payload:
8169
        if not msg:
8170
          msg = "disk not found"
8171
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
8172
                                 (name, msg))
8173

    
8174
      if result.payload.is_degraded:
8175
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
8176

    
8177
  def _RemoveOldStorage(self, node_name, iv_names):
8178
    for name, (_, old_lvs, _) in iv_names.iteritems():
8179
      self.lu.LogInfo("Remove logical volumes for %s" % name)
8180

    
8181
      for lv in old_lvs:
8182
        self.cfg.SetDiskID(lv, node_name)
8183

    
8184
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8185
        if msg:
8186
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
8187
                             hint="remove unused LVs manually")
8188

    
8189
  def _ReleaseNodeLock(self, node_name):
8190
    """Releases the lock for a given node."""
8191
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8192

    
8193
  def _ExecDrbd8DiskOnly(self, feedback_fn):
8194
    """Replace a disk on the primary or secondary for DRBD 8.
8195

8196
    The algorithm for replace is quite complicated:
8197

8198
      1. for each disk to be replaced:
8199

8200
        1. create new LVs on the target node with unique names
8201
        1. detach old LVs from the drbd device
8202
        1. rename old LVs to name_replaced.<time_t>
8203
        1. rename new LVs to old LVs
8204
        1. attach the new LVs (with the old names now) to the drbd device
8205

8206
      1. wait for sync across all devices
8207

8208
      1. for each modified disk:
8209

8210
        1. remove old LVs (which have the name name_replaces.<time_t>)
8211

8212
    Failures are not very well handled.
8213

8214
    """
8215
    steps_total = 6
8216

    
8217
    # Step: check device activation
8218
    self.lu.LogStep(1, steps_total, "Check device existence")
8219
    self._CheckDisksExistence([self.other_node, self.target_node])
8220
    self._CheckVolumeGroup([self.target_node, self.other_node])
8221

    
8222
    # Step: check other node consistency
8223
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8224
    self._CheckDisksConsistency(self.other_node,
8225
                                self.other_node == self.instance.primary_node,
8226
                                False)
8227

    
8228
    # Step: create new storage
8229
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8230
    iv_names = self._CreateNewStorage(self.target_node)
8231

    
8232
    # Step: for each lv, detach+rename*2+attach
8233
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8234
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8235
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8236

    
8237
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8238
                                                     old_lvs)
8239
      result.Raise("Can't detach drbd from local storage on node"
8240
                   " %s for device %s" % (self.target_node, dev.iv_name))
8241
      #dev.children = []
8242
      #cfg.Update(instance)
8243

    
8244
      # ok, we created the new LVs, so now we know we have the needed
8245
      # storage; as such, we proceed on the target node to rename
8246
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8247
      # using the assumption that logical_id == physical_id (which in
8248
      # turn is the unique_id on that node)
8249

    
8250
      # FIXME(iustin): use a better name for the replaced LVs
8251
      temp_suffix = int(time.time())
8252
      ren_fn = lambda d, suff: (d.physical_id[0],
8253
                                d.physical_id[1] + "_replaced-%s" % suff)
8254

    
8255
      # Build the rename list based on what LVs exist on the node
8256
      rename_old_to_new = []
8257
      for to_ren in old_lvs:
8258
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8259
        if not result.fail_msg and result.payload:
8260
          # device exists
8261
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8262

    
8263
      self.lu.LogInfo("Renaming the old LVs on the target node")
8264
      result = self.rpc.call_blockdev_rename(self.target_node,
8265
                                             rename_old_to_new)
8266
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
8267

    
8268
      # Now we rename the new LVs to the old LVs
8269
      self.lu.LogInfo("Renaming the new LVs on the target node")
8270
      rename_new_to_old = [(new, old.physical_id)
8271
                           for old, new in zip(old_lvs, new_lvs)]
8272
      result = self.rpc.call_blockdev_rename(self.target_node,
8273
                                             rename_new_to_old)
8274
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8275

    
8276
      for old, new in zip(old_lvs, new_lvs):
8277
        new.logical_id = old.logical_id
8278
        self.cfg.SetDiskID(new, self.target_node)
8279

    
8280
      for disk in old_lvs:
8281
        disk.logical_id = ren_fn(disk, temp_suffix)
8282
        self.cfg.SetDiskID(disk, self.target_node)
8283

    
8284
      # Now that the new lvs have the old name, we can add them to the device
8285
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8286
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8287
                                                  new_lvs)
8288
      msg = result.fail_msg
8289
      if msg:
8290
        for new_lv in new_lvs:
8291
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
8292
                                               new_lv).fail_msg
8293
          if msg2:
8294
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8295
                               hint=("cleanup manually the unused logical"
8296
                                     "volumes"))
8297
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8298

    
8299
      dev.children = new_lvs
8300

    
8301
      self.cfg.Update(self.instance, feedback_fn)
8302

    
8303
    cstep = 5
8304
    if self.early_release:
8305
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8306
      cstep += 1
8307
      self._RemoveOldStorage(self.target_node, iv_names)
8308
      # WARNING: we release both node locks here, do not do other RPCs
8309
      # than WaitForSync to the primary node
8310
      self._ReleaseNodeLock([self.target_node, self.other_node])
8311

    
8312
    # Wait for sync
8313
    # This can fail as the old devices are degraded and _WaitForSync
8314
    # does a combined result over all disks, so we don't check its return value
8315
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8316
    cstep += 1
8317
    _WaitForSync(self.lu, self.instance)
8318

    
8319
    # Check all devices manually
8320
    self._CheckDevices(self.instance.primary_node, iv_names)
8321

    
8322
    # Step: remove old storage
8323
    if not self.early_release:
8324
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8325
      cstep += 1
8326
      self._RemoveOldStorage(self.target_node, iv_names)
8327

    
8328
  def _ExecDrbd8Secondary(self, feedback_fn):
8329
    """Replace the secondary node for DRBD 8.
8330

8331
    The algorithm for replace is quite complicated:
8332
      - for all disks of the instance:
8333
        - create new LVs on the new node with same names
8334
        - shutdown the drbd device on the old secondary
8335
        - disconnect the drbd network on the primary
8336
        - create the drbd device on the new secondary
8337
        - network attach the drbd on the primary, using an artifice:
8338
          the drbd code for Attach() will connect to the network if it
8339
          finds a device which is connected to the good local disks but
8340
          not network enabled
8341
      - wait for sync across all devices
8342
      - remove all disks from the old secondary
8343

8344
    Failures are not very well handled.
8345

8346
    """
8347
    steps_total = 6
8348

    
8349
    # Step: check device activation
8350
    self.lu.LogStep(1, steps_total, "Check device existence")
8351
    self._CheckDisksExistence([self.instance.primary_node])
8352
    self._CheckVolumeGroup([self.instance.primary_node])
8353

    
8354
    # Step: check other node consistency
8355
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8356
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8357

    
8358
    # Step: create new storage
8359
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8360
    for idx, dev in enumerate(self.instance.disks):
8361
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8362
                      (self.new_node, idx))
8363
      # we pass force_create=True to force LVM creation
8364
      for new_lv in dev.children:
8365
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8366
                        _GetInstanceInfoText(self.instance), False)
8367

    
8368
    # Step 4: dbrd minors and drbd setups changes
8369
    # after this, we must manually remove the drbd minors on both the
8370
    # error and the success paths
8371
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8372
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8373
                                         for dev in self.instance.disks],
8374
                                        self.instance.name)
8375
    logging.debug("Allocated minors %r", minors)
8376

    
8377
    iv_names = {}
8378
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8379
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8380
                      (self.new_node, idx))
8381
      # create new devices on new_node; note that we create two IDs:
8382
      # one without port, so the drbd will be activated without
8383
      # networking information on the new node at this stage, and one
8384
      # with network, for the latter activation in step 4
8385
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8386
      if self.instance.primary_node == o_node1:
8387
        p_minor = o_minor1
8388
      else:
8389
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8390
        p_minor = o_minor2
8391

    
8392
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8393
                      p_minor, new_minor, o_secret)
8394
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8395
                    p_minor, new_minor, o_secret)
8396

    
8397
      iv_names[idx] = (dev, dev.children, new_net_id)
8398
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8399
                    new_net_id)
8400
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8401
                              logical_id=new_alone_id,
8402
                              children=dev.children,
8403
                              size=dev.size)
8404
      try:
8405
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8406
                              _GetInstanceInfoText(self.instance), False)
8407
      except errors.GenericError:
8408
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8409
        raise
8410

    
8411
    # We have new devices, shutdown the drbd on the old secondary
8412
    for idx, dev in enumerate(self.instance.disks):
8413
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8414
      self.cfg.SetDiskID(dev, self.target_node)
8415
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8416
      if msg:
8417
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8418
                           "node: %s" % (idx, msg),
8419
                           hint=("Please cleanup this device manually as"
8420
                                 " soon as possible"))
8421

    
8422
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8423
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8424
                                               self.node_secondary_ip,
8425
                                               self.instance.disks)\
8426
                                              [self.instance.primary_node]
8427

    
8428
    msg = result.fail_msg
8429
    if msg:
8430
      # detaches didn't succeed (unlikely)
8431
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8432
      raise errors.OpExecError("Can't detach the disks from the network on"
8433
                               " old node: %s" % (msg,))
8434

    
8435
    # if we managed to detach at least one, we update all the disks of
8436
    # the instance to point to the new secondary
8437
    self.lu.LogInfo("Updating instance configuration")
8438
    for dev, _, new_logical_id in iv_names.itervalues():
8439
      dev.logical_id = new_logical_id
8440
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8441

    
8442
    self.cfg.Update(self.instance, feedback_fn)
8443

    
8444
    # and now perform the drbd attach
8445
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8446
                    " (standalone => connected)")
8447
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8448
                                            self.new_node],
8449
                                           self.node_secondary_ip,
8450
                                           self.instance.disks,
8451
                                           self.instance.name,
8452
                                           False)
8453
    for to_node, to_result in result.items():
8454
      msg = to_result.fail_msg
8455
      if msg:
8456
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8457
                           to_node, msg,
8458
                           hint=("please do a gnt-instance info to see the"
8459
                                 " status of disks"))
8460
    cstep = 5
8461
    if self.early_release:
8462
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8463
      cstep += 1
8464
      self._RemoveOldStorage(self.target_node, iv_names)
8465
      # WARNING: we release all node locks here, do not do other RPCs
8466
      # than WaitForSync to the primary node
8467
      self._ReleaseNodeLock([self.instance.primary_node,
8468
                             self.target_node,
8469
                             self.new_node])
8470

    
8471
    # Wait for sync
8472
    # This can fail as the old devices are degraded and _WaitForSync
8473
    # does a combined result over all disks, so we don't check its return value
8474
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8475
    cstep += 1
8476
    _WaitForSync(self.lu, self.instance)
8477

    
8478
    # Check all devices manually
8479
    self._CheckDevices(self.instance.primary_node, iv_names)
8480

    
8481
    # Step: remove old storage
8482
    if not self.early_release:
8483
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8484
      self._RemoveOldStorage(self.target_node, iv_names)
8485

    
8486

    
8487
class LURepairNodeStorage(NoHooksLU):
8488
  """Repairs the volume group on a node.
8489

8490
  """
8491
  REQ_BGL = False
8492

    
8493
  def CheckArguments(self):
8494
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8495

    
8496
    storage_type = self.op.storage_type
8497

    
8498
    if (constants.SO_FIX_CONSISTENCY not in
8499
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8500
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8501
                                 " repaired" % storage_type,
8502
                                 errors.ECODE_INVAL)
8503

    
8504
  def ExpandNames(self):
8505
    self.needed_locks = {
8506
      locking.LEVEL_NODE: [self.op.node_name],
8507
      }
8508

    
8509
  def _CheckFaultyDisks(self, instance, node_name):
8510
    """Ensure faulty disks abort the opcode or at least warn."""
8511
    try:
8512
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8513
                                  node_name, True):
8514
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8515
                                   " node '%s'" % (instance.name, node_name),
8516
                                   errors.ECODE_STATE)
8517
    except errors.OpPrereqError, err:
8518
      if self.op.ignore_consistency:
8519
        self.proc.LogWarning(str(err.args[0]))
8520
      else:
8521
        raise
8522

    
8523
  def CheckPrereq(self):
8524
    """Check prerequisites.
8525

8526
    """
8527
    # Check whether any instance on this node has faulty disks
8528
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8529
      if not inst.admin_up:
8530
        continue
8531
      check_nodes = set(inst.all_nodes)
8532
      check_nodes.discard(self.op.node_name)
8533
      for inst_node_name in check_nodes:
8534
        self._CheckFaultyDisks(inst, inst_node_name)
8535

    
8536
  def Exec(self, feedback_fn):
8537
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8538
                (self.op.name, self.op.node_name))
8539

    
8540
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8541
    result = self.rpc.call_storage_execute(self.op.node_name,
8542
                                           self.op.storage_type, st_args,
8543
                                           self.op.name,
8544
                                           constants.SO_FIX_CONSISTENCY)
8545
    result.Raise("Failed to repair storage unit '%s' on %s" %
8546
                 (self.op.name, self.op.node_name))
8547

    
8548

    
8549
class LUNodeEvacuationStrategy(NoHooksLU):
8550
  """Computes the node evacuation strategy.
8551

8552
  """
8553
  REQ_BGL = False
8554

    
8555
  def CheckArguments(self):
8556
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8557

    
8558
  def ExpandNames(self):
8559
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8560
    self.needed_locks = locks = {}
8561
    if self.op.remote_node is None:
8562
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8563
    else:
8564
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8565
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8566

    
8567
  def Exec(self, feedback_fn):
8568
    if self.op.remote_node is not None:
8569
      instances = []
8570
      for node in self.op.nodes:
8571
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8572
      result = []
8573
      for i in instances:
8574
        if i.primary_node == self.op.remote_node:
8575
          raise errors.OpPrereqError("Node %s is the primary node of"
8576
                                     " instance %s, cannot use it as"
8577
                                     " secondary" %
8578
                                     (self.op.remote_node, i.name),
8579
                                     errors.ECODE_INVAL)
8580
        result.append([i.name, self.op.remote_node])
8581
    else:
8582
      ial = IAllocator(self.cfg, self.rpc,
8583
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8584
                       evac_nodes=self.op.nodes)
8585
      ial.Run(self.op.iallocator, validate=True)
8586
      if not ial.success:
8587
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8588
                                 errors.ECODE_NORES)
8589
      result = ial.result
8590
    return result
8591

    
8592

    
8593
class LUGrowDisk(LogicalUnit):
8594
  """Grow a disk of an instance.
8595

8596
  """
8597
  HPATH = "disk-grow"
8598
  HTYPE = constants.HTYPE_INSTANCE
8599
  REQ_BGL = False
8600

    
8601
  def ExpandNames(self):
8602
    self._ExpandAndLockInstance()
8603
    self.needed_locks[locking.LEVEL_NODE] = []
8604
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8605

    
8606
  def DeclareLocks(self, level):
8607
    if level == locking.LEVEL_NODE:
8608
      self._LockInstancesNodes()
8609

    
8610
  def BuildHooksEnv(self):
8611
    """Build hooks env.
8612

8613
    This runs on the master, the primary and all the secondaries.
8614

8615
    """
8616
    env = {
8617
      "DISK": self.op.disk,
8618
      "AMOUNT": self.op.amount,
8619
      }
8620
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8621
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8622
    return env, nl, nl
8623

    
8624
  def CheckPrereq(self):
8625
    """Check prerequisites.
8626

8627
    This checks that the instance is in the cluster.
8628

8629
    """
8630
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8631
    assert instance is not None, \
8632
      "Cannot retrieve locked instance %s" % self.op.instance_name
8633
    nodenames = list(instance.all_nodes)
8634
    for node in nodenames:
8635
      _CheckNodeOnline(self, node)
8636

    
8637
    self.instance = instance
8638

    
8639
    if instance.disk_template not in constants.DTS_GROWABLE:
8640
      raise errors.OpPrereqError("Instance's disk layout does not support"
8641
                                 " growing.", errors.ECODE_INVAL)
8642

    
8643
    self.disk = instance.FindDisk(self.op.disk)
8644

    
8645
    if instance.disk_template != constants.DT_FILE:
8646
      # TODO: check the free disk space for file, when that feature
8647
      # will be supported
8648
      _CheckNodesFreeDiskPerVG(self, nodenames,
8649
                               {self.disk.physical_id[0]: self.op.amount})
8650

    
8651
  def Exec(self, feedback_fn):
8652
    """Execute disk grow.
8653

8654
    """
8655
    instance = self.instance
8656
    disk = self.disk
8657

    
8658
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8659
    if not disks_ok:
8660
      raise errors.OpExecError("Cannot activate block device to grow")
8661

    
8662
    for node in instance.all_nodes:
8663
      self.cfg.SetDiskID(disk, node)
8664
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8665
      result.Raise("Grow request failed to node %s" % node)
8666

    
8667
      # TODO: Rewrite code to work properly
8668
      # DRBD goes into sync mode for a short amount of time after executing the
8669
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8670
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8671
      # time is a work-around.
8672
      time.sleep(5)
8673

    
8674
    disk.RecordGrow(self.op.amount)
8675
    self.cfg.Update(instance, feedback_fn)
8676
    if self.op.wait_for_sync:
8677
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8678
      if disk_abort:
8679
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8680
                             " status.\nPlease check the instance.")
8681
      if not instance.admin_up:
8682
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8683
    elif not instance.admin_up:
8684
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8685
                           " not supposed to be running because no wait for"
8686
                           " sync mode was requested.")
8687

    
8688

    
8689
class LUQueryInstanceData(NoHooksLU):
8690
  """Query runtime instance data.
8691

8692
  """
8693
  REQ_BGL = False
8694

    
8695
  def ExpandNames(self):
8696
    self.needed_locks = {}
8697
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8698

    
8699
    if self.op.instances:
8700
      self.wanted_names = []
8701
      for name in self.op.instances:
8702
        full_name = _ExpandInstanceName(self.cfg, name)
8703
        self.wanted_names.append(full_name)
8704
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8705
    else:
8706
      self.wanted_names = None
8707
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8708

    
8709
    self.needed_locks[locking.LEVEL_NODE] = []
8710
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8711

    
8712
  def DeclareLocks(self, level):
8713
    if level == locking.LEVEL_NODE:
8714
      self._LockInstancesNodes()
8715

    
8716
  def CheckPrereq(self):
8717
    """Check prerequisites.
8718

8719
    This only checks the optional instance list against the existing names.
8720

8721
    """
8722
    if self.wanted_names is None:
8723
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8724

    
8725
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8726
                             in self.wanted_names]
8727

    
8728
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8729
    """Returns the status of a block device
8730

8731
    """
8732
    if self.op.static or not node:
8733
      return None
8734

    
8735
    self.cfg.SetDiskID(dev, node)
8736

    
8737
    result = self.rpc.call_blockdev_find(node, dev)
8738
    if result.offline:
8739
      return None
8740

    
8741
    result.Raise("Can't compute disk status for %s" % instance_name)
8742

    
8743
    status = result.payload
8744
    if status is None:
8745
      return None
8746

    
8747
    return (status.dev_path, status.major, status.minor,
8748
            status.sync_percent, status.estimated_time,
8749
            status.is_degraded, status.ldisk_status)
8750

    
8751
  def _ComputeDiskStatus(self, instance, snode, dev):
8752
    """Compute block device status.
8753

8754
    """
8755
    if dev.dev_type in constants.LDS_DRBD:
8756
      # we change the snode then (otherwise we use the one passed in)
8757
      if dev.logical_id[0] == instance.primary_node:
8758
        snode = dev.logical_id[1]
8759
      else:
8760
        snode = dev.logical_id[0]
8761

    
8762
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8763
                                              instance.name, dev)
8764
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8765

    
8766
    if dev.children:
8767
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8768
                      for child in dev.children]
8769
    else:
8770
      dev_children = []
8771

    
8772
    data = {
8773
      "iv_name": dev.iv_name,
8774
      "dev_type": dev.dev_type,
8775
      "logical_id": dev.logical_id,
8776
      "physical_id": dev.physical_id,
8777
      "pstatus": dev_pstatus,
8778
      "sstatus": dev_sstatus,
8779
      "children": dev_children,
8780
      "mode": dev.mode,
8781
      "size": dev.size,
8782
      }
8783

    
8784
    return data
8785

    
8786
  def Exec(self, feedback_fn):
8787
    """Gather and return data"""
8788
    result = {}
8789

    
8790
    cluster = self.cfg.GetClusterInfo()
8791

    
8792
    for instance in self.wanted_instances:
8793
      if not self.op.static:
8794
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8795
                                                  instance.name,
8796
                                                  instance.hypervisor)
8797
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8798
        remote_info = remote_info.payload
8799
        if remote_info and "state" in remote_info:
8800
          remote_state = "up"
8801
        else:
8802
          remote_state = "down"
8803
      else:
8804
        remote_state = None
8805
      if instance.admin_up:
8806
        config_state = "up"
8807
      else:
8808
        config_state = "down"
8809

    
8810
      disks = [self._ComputeDiskStatus(instance, None, device)
8811
               for device in instance.disks]
8812

    
8813
      idict = {
8814
        "name": instance.name,
8815
        "config_state": config_state,
8816
        "run_state": remote_state,
8817
        "pnode": instance.primary_node,
8818
        "snodes": instance.secondary_nodes,
8819
        "os": instance.os,
8820
        # this happens to be the same format used for hooks
8821
        "nics": _NICListToTuple(self, instance.nics),
8822
        "disk_template": instance.disk_template,
8823
        "disks": disks,
8824
        "hypervisor": instance.hypervisor,
8825
        "network_port": instance.network_port,
8826
        "hv_instance": instance.hvparams,
8827
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8828
        "be_instance": instance.beparams,
8829
        "be_actual": cluster.FillBE(instance),
8830
        "os_instance": instance.osparams,
8831
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8832
        "serial_no": instance.serial_no,
8833
        "mtime": instance.mtime,
8834
        "ctime": instance.ctime,
8835
        "uuid": instance.uuid,
8836
        }
8837

    
8838
      result[instance.name] = idict
8839

    
8840
    return result
8841

    
8842

    
8843
class LUSetInstanceParams(LogicalUnit):
8844
  """Modifies an instances's parameters.
8845

8846
  """
8847
  HPATH = "instance-modify"
8848
  HTYPE = constants.HTYPE_INSTANCE
8849
  REQ_BGL = False
8850

    
8851
  def CheckArguments(self):
8852
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8853
            self.op.hvparams or self.op.beparams or self.op.os_name):
8854
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8855

    
8856
    if self.op.hvparams:
8857
      _CheckGlobalHvParams(self.op.hvparams)
8858

    
8859
    # Disk validation
8860
    disk_addremove = 0
8861
    for disk_op, disk_dict in self.op.disks:
8862
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8863
      if disk_op == constants.DDM_REMOVE:
8864
        disk_addremove += 1
8865
        continue
8866
      elif disk_op == constants.DDM_ADD:
8867
        disk_addremove += 1
8868
      else:
8869
        if not isinstance(disk_op, int):
8870
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8871
        if not isinstance(disk_dict, dict):
8872
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8873
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8874

    
8875
      if disk_op == constants.DDM_ADD:
8876
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8877
        if mode not in constants.DISK_ACCESS_SET:
8878
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8879
                                     errors.ECODE_INVAL)
8880
        size = disk_dict.get('size', None)
8881
        if size is None:
8882
          raise errors.OpPrereqError("Required disk parameter size missing",
8883
                                     errors.ECODE_INVAL)
8884
        try:
8885
          size = int(size)
8886
        except (TypeError, ValueError), err:
8887
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8888
                                     str(err), errors.ECODE_INVAL)
8889
        disk_dict['size'] = size
8890
      else:
8891
        # modification of disk
8892
        if 'size' in disk_dict:
8893
          raise errors.OpPrereqError("Disk size change not possible, use"
8894
                                     " grow-disk", errors.ECODE_INVAL)
8895

    
8896
    if disk_addremove > 1:
8897
      raise errors.OpPrereqError("Only one disk add or remove operation"
8898
                                 " supported at a time", errors.ECODE_INVAL)
8899

    
8900
    if self.op.disks and self.op.disk_template is not None:
8901
      raise errors.OpPrereqError("Disk template conversion and other disk"
8902
                                 " changes not supported at the same time",
8903
                                 errors.ECODE_INVAL)
8904

    
8905
    if (self.op.disk_template and
8906
        self.op.disk_template in constants.DTS_NET_MIRROR and
8907
        self.op.remote_node is None):
8908
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
8909
                                 " one requires specifying a secondary node",
8910
                                 errors.ECODE_INVAL)
8911

    
8912
    # NIC validation
8913
    nic_addremove = 0
8914
    for nic_op, nic_dict in self.op.nics:
8915
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8916
      if nic_op == constants.DDM_REMOVE:
8917
        nic_addremove += 1
8918
        continue
8919
      elif nic_op == constants.DDM_ADD:
8920
        nic_addremove += 1
8921
      else:
8922
        if not isinstance(nic_op, int):
8923
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8924
        if not isinstance(nic_dict, dict):
8925
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8926
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8927

    
8928
      # nic_dict should be a dict
8929
      nic_ip = nic_dict.get('ip', None)
8930
      if nic_ip is not None:
8931
        if nic_ip.lower() == constants.VALUE_NONE:
8932
          nic_dict['ip'] = None
8933
        else:
8934
          if not netutils.IPAddress.IsValid(nic_ip):
8935
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8936
                                       errors.ECODE_INVAL)
8937

    
8938
      nic_bridge = nic_dict.get('bridge', None)
8939
      nic_link = nic_dict.get('link', None)
8940
      if nic_bridge and nic_link:
8941
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8942
                                   " at the same time", errors.ECODE_INVAL)
8943
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8944
        nic_dict['bridge'] = None
8945
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8946
        nic_dict['link'] = None
8947

    
8948
      if nic_op == constants.DDM_ADD:
8949
        nic_mac = nic_dict.get('mac', None)
8950
        if nic_mac is None:
8951
          nic_dict['mac'] = constants.VALUE_AUTO
8952

    
8953
      if 'mac' in nic_dict:
8954
        nic_mac = nic_dict['mac']
8955
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8956
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8957

    
8958
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8959
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8960
                                     " modifying an existing nic",
8961
                                     errors.ECODE_INVAL)
8962

    
8963
    if nic_addremove > 1:
8964
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8965
                                 " supported at a time", errors.ECODE_INVAL)
8966

    
8967
  def ExpandNames(self):
8968
    self._ExpandAndLockInstance()
8969
    self.needed_locks[locking.LEVEL_NODE] = []
8970
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8971

    
8972
  def DeclareLocks(self, level):
8973
    if level == locking.LEVEL_NODE:
8974
      self._LockInstancesNodes()
8975
      if self.op.disk_template and self.op.remote_node:
8976
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8977
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8978

    
8979
  def BuildHooksEnv(self):
8980
    """Build hooks env.
8981

8982
    This runs on the master, primary and secondaries.
8983

8984
    """
8985
    args = dict()
8986
    if constants.BE_MEMORY in self.be_new:
8987
      args['memory'] = self.be_new[constants.BE_MEMORY]
8988
    if constants.BE_VCPUS in self.be_new:
8989
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8990
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8991
    # information at all.
8992
    if self.op.nics:
8993
      args['nics'] = []
8994
      nic_override = dict(self.op.nics)
8995
      for idx, nic in enumerate(self.instance.nics):
8996
        if idx in nic_override:
8997
          this_nic_override = nic_override[idx]
8998
        else:
8999
          this_nic_override = {}
9000
        if 'ip' in this_nic_override:
9001
          ip = this_nic_override['ip']
9002
        else:
9003
          ip = nic.ip
9004
        if 'mac' in this_nic_override:
9005
          mac = this_nic_override['mac']
9006
        else:
9007
          mac = nic.mac
9008
        if idx in self.nic_pnew:
9009
          nicparams = self.nic_pnew[idx]
9010
        else:
9011
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9012
        mode = nicparams[constants.NIC_MODE]
9013
        link = nicparams[constants.NIC_LINK]
9014
        args['nics'].append((ip, mac, mode, link))
9015
      if constants.DDM_ADD in nic_override:
9016
        ip = nic_override[constants.DDM_ADD].get('ip', None)
9017
        mac = nic_override[constants.DDM_ADD]['mac']
9018
        nicparams = self.nic_pnew[constants.DDM_ADD]
9019
        mode = nicparams[constants.NIC_MODE]
9020
        link = nicparams[constants.NIC_LINK]
9021
        args['nics'].append((ip, mac, mode, link))
9022
      elif constants.DDM_REMOVE in nic_override:
9023
        del args['nics'][-1]
9024

    
9025
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9026
    if self.op.disk_template:
9027
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9028
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9029
    return env, nl, nl
9030

    
9031
  def CheckPrereq(self):
9032
    """Check prerequisites.
9033

9034
    This only checks the instance list against the existing names.
9035

9036
    """
9037
    # checking the new params on the primary/secondary nodes
9038

    
9039
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9040
    cluster = self.cluster = self.cfg.GetClusterInfo()
9041
    assert self.instance is not None, \
9042
      "Cannot retrieve locked instance %s" % self.op.instance_name
9043
    pnode = instance.primary_node
9044
    nodelist = list(instance.all_nodes)
9045

    
9046
    # OS change
9047
    if self.op.os_name and not self.op.force:
9048
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9049
                      self.op.force_variant)
9050
      instance_os = self.op.os_name
9051
    else:
9052
      instance_os = instance.os
9053

    
9054
    if self.op.disk_template:
9055
      if instance.disk_template == self.op.disk_template:
9056
        raise errors.OpPrereqError("Instance already has disk template %s" %
9057
                                   instance.disk_template, errors.ECODE_INVAL)
9058

    
9059
      if (instance.disk_template,
9060
          self.op.disk_template) not in self._DISK_CONVERSIONS:
9061
        raise errors.OpPrereqError("Unsupported disk template conversion from"
9062
                                   " %s to %s" % (instance.disk_template,
9063
                                                  self.op.disk_template),
9064
                                   errors.ECODE_INVAL)
9065
      _CheckInstanceDown(self, instance, "cannot change disk template")
9066
      if self.op.disk_template in constants.DTS_NET_MIRROR:
9067
        if self.op.remote_node == pnode:
9068
          raise errors.OpPrereqError("Given new secondary node %s is the same"
9069
                                     " as the primary node of the instance" %
9070
                                     self.op.remote_node, errors.ECODE_STATE)
9071
        _CheckNodeOnline(self, self.op.remote_node)
9072
        _CheckNodeNotDrained(self, self.op.remote_node)
9073
        # FIXME: here we assume that the old instance type is DT_PLAIN
9074
        assert instance.disk_template == constants.DT_PLAIN
9075
        disks = [{"size": d.size, "vg": d.logical_id[0]}
9076
                 for d in instance.disks]
9077
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9078
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9079

    
9080
    # hvparams processing
9081
    if self.op.hvparams:
9082
      hv_type = instance.hypervisor
9083
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9084
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9085
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9086

    
9087
      # local check
9088
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9089
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9090
      self.hv_new = hv_new # the new actual values
9091
      self.hv_inst = i_hvdict # the new dict (without defaults)
9092
    else:
9093
      self.hv_new = self.hv_inst = {}
9094

    
9095
    # beparams processing
9096
    if self.op.beparams:
9097
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9098
                                   use_none=True)
9099
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9100
      be_new = cluster.SimpleFillBE(i_bedict)
9101
      self.be_new = be_new # the new actual values
9102
      self.be_inst = i_bedict # the new dict (without defaults)
9103
    else:
9104
      self.be_new = self.be_inst = {}
9105

    
9106
    # osparams processing
9107
    if self.op.osparams:
9108
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9109
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9110
      self.os_inst = i_osdict # the new dict (without defaults)
9111
    else:
9112
      self.os_inst = {}
9113

    
9114
    self.warn = []
9115

    
9116
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9117
      mem_check_list = [pnode]
9118
      if be_new[constants.BE_AUTO_BALANCE]:
9119
        # either we changed auto_balance to yes or it was from before
9120
        mem_check_list.extend(instance.secondary_nodes)
9121
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9122
                                                  instance.hypervisor)
9123
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9124
                                         instance.hypervisor)
9125
      pninfo = nodeinfo[pnode]
9126
      msg = pninfo.fail_msg
9127
      if msg:
9128
        # Assume the primary node is unreachable and go ahead
9129
        self.warn.append("Can't get info from primary node %s: %s" %
9130
                         (pnode,  msg))
9131
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
9132
        self.warn.append("Node data from primary node %s doesn't contain"
9133
                         " free memory information" % pnode)
9134
      elif instance_info.fail_msg:
9135
        self.warn.append("Can't get instance runtime information: %s" %
9136
                        instance_info.fail_msg)
9137
      else:
9138
        if instance_info.payload:
9139
          current_mem = int(instance_info.payload['memory'])
9140
        else:
9141
          # Assume instance not running
9142
          # (there is a slight race condition here, but it's not very probable,
9143
          # and we have no other way to check)
9144
          current_mem = 0
9145
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9146
                    pninfo.payload['memory_free'])
9147
        if miss_mem > 0:
9148
          raise errors.OpPrereqError("This change will prevent the instance"
9149
                                     " from starting, due to %d MB of memory"
9150
                                     " missing on its primary node" % miss_mem,
9151
                                     errors.ECODE_NORES)
9152

    
9153
      if be_new[constants.BE_AUTO_BALANCE]:
9154
        for node, nres in nodeinfo.items():
9155
          if node not in instance.secondary_nodes:
9156
            continue
9157
          msg = nres.fail_msg
9158
          if msg:
9159
            self.warn.append("Can't get info from secondary node %s: %s" %
9160
                             (node, msg))
9161
          elif not isinstance(nres.payload.get('memory_free', None), int):
9162
            self.warn.append("Secondary node %s didn't return free"
9163
                             " memory information" % node)
9164
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9165
            self.warn.append("Not enough memory to failover instance to"
9166
                             " secondary node %s" % node)
9167

    
9168
    # NIC processing
9169
    self.nic_pnew = {}
9170
    self.nic_pinst = {}
9171
    for nic_op, nic_dict in self.op.nics:
9172
      if nic_op == constants.DDM_REMOVE:
9173
        if not instance.nics:
9174
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9175
                                     errors.ECODE_INVAL)
9176
        continue
9177
      if nic_op != constants.DDM_ADD:
9178
        # an existing nic
9179
        if not instance.nics:
9180
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9181
                                     " no NICs" % nic_op,
9182
                                     errors.ECODE_INVAL)
9183
        if nic_op < 0 or nic_op >= len(instance.nics):
9184
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9185
                                     " are 0 to %d" %
9186
                                     (nic_op, len(instance.nics) - 1),
9187
                                     errors.ECODE_INVAL)
9188
        old_nic_params = instance.nics[nic_op].nicparams
9189
        old_nic_ip = instance.nics[nic_op].ip
9190
      else:
9191
        old_nic_params = {}
9192
        old_nic_ip = None
9193

    
9194
      update_params_dict = dict([(key, nic_dict[key])
9195
                                 for key in constants.NICS_PARAMETERS
9196
                                 if key in nic_dict])
9197

    
9198
      if 'bridge' in nic_dict:
9199
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9200

    
9201
      new_nic_params = _GetUpdatedParams(old_nic_params,
9202
                                         update_params_dict)
9203
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9204
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9205
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9206
      self.nic_pinst[nic_op] = new_nic_params
9207
      self.nic_pnew[nic_op] = new_filled_nic_params
9208
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9209

    
9210
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
9211
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9212
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9213
        if msg:
9214
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9215
          if self.op.force:
9216
            self.warn.append(msg)
9217
          else:
9218
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9219
      if new_nic_mode == constants.NIC_MODE_ROUTED:
9220
        if 'ip' in nic_dict:
9221
          nic_ip = nic_dict['ip']
9222
        else:
9223
          nic_ip = old_nic_ip
9224
        if nic_ip is None:
9225
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9226
                                     ' on a routed nic', errors.ECODE_INVAL)
9227
      if 'mac' in nic_dict:
9228
        nic_mac = nic_dict['mac']
9229
        if nic_mac is None:
9230
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9231
                                     errors.ECODE_INVAL)
9232
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9233
          # otherwise generate the mac
9234
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9235
        else:
9236
          # or validate/reserve the current one
9237
          try:
9238
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9239
          except errors.ReservationError:
9240
            raise errors.OpPrereqError("MAC address %s already in use"
9241
                                       " in cluster" % nic_mac,
9242
                                       errors.ECODE_NOTUNIQUE)
9243

    
9244
    # DISK processing
9245
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9246
      raise errors.OpPrereqError("Disk operations not supported for"
9247
                                 " diskless instances",
9248
                                 errors.ECODE_INVAL)
9249
    for disk_op, _ in self.op.disks:
9250
      if disk_op == constants.DDM_REMOVE:
9251
        if len(instance.disks) == 1:
9252
          raise errors.OpPrereqError("Cannot remove the last disk of"
9253
                                     " an instance", errors.ECODE_INVAL)
9254
        _CheckInstanceDown(self, instance, "cannot remove disks")
9255

    
9256
      if (disk_op == constants.DDM_ADD and
9257
          len(instance.nics) >= constants.MAX_DISKS):
9258
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9259
                                   " add more" % constants.MAX_DISKS,
9260
                                   errors.ECODE_STATE)
9261
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9262
        # an existing disk
9263
        if disk_op < 0 or disk_op >= len(instance.disks):
9264
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
9265
                                     " are 0 to %d" %
9266
                                     (disk_op, len(instance.disks)),
9267
                                     errors.ECODE_INVAL)
9268

    
9269
    return
9270

    
9271
  def _ConvertPlainToDrbd(self, feedback_fn):
9272
    """Converts an instance from plain to drbd.
9273

9274
    """
9275
    feedback_fn("Converting template to drbd")
9276
    instance = self.instance
9277
    pnode = instance.primary_node
9278
    snode = self.op.remote_node
9279

    
9280
    # create a fake disk info for _GenerateDiskTemplate
9281
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9282
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9283
                                      instance.name, pnode, [snode],
9284
                                      disk_info, None, None, 0, feedback_fn)
9285
    info = _GetInstanceInfoText(instance)
9286
    feedback_fn("Creating aditional volumes...")
9287
    # first, create the missing data and meta devices
9288
    for disk in new_disks:
9289
      # unfortunately this is... not too nice
9290
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9291
                            info, True)
9292
      for child in disk.children:
9293
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
9294
    # at this stage, all new LVs have been created, we can rename the
9295
    # old ones
9296
    feedback_fn("Renaming original volumes...")
9297
    rename_list = [(o, n.children[0].logical_id)
9298
                   for (o, n) in zip(instance.disks, new_disks)]
9299
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
9300
    result.Raise("Failed to rename original LVs")
9301

    
9302
    feedback_fn("Initializing DRBD devices...")
9303
    # all child devices are in place, we can now create the DRBD devices
9304
    for disk in new_disks:
9305
      for node in [pnode, snode]:
9306
        f_create = node == pnode
9307
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9308

    
9309
    # at this point, the instance has been modified
9310
    instance.disk_template = constants.DT_DRBD8
9311
    instance.disks = new_disks
9312
    self.cfg.Update(instance, feedback_fn)
9313

    
9314
    # disks are created, waiting for sync
9315
    disk_abort = not _WaitForSync(self, instance)
9316
    if disk_abort:
9317
      raise errors.OpExecError("There are some degraded disks for"
9318
                               " this instance, please cleanup manually")
9319

    
9320
  def _ConvertDrbdToPlain(self, feedback_fn):
9321
    """Converts an instance from drbd to plain.
9322

9323
    """
9324
    instance = self.instance
9325
    assert len(instance.secondary_nodes) == 1
9326
    pnode = instance.primary_node
9327
    snode = instance.secondary_nodes[0]
9328
    feedback_fn("Converting template to plain")
9329

    
9330
    old_disks = instance.disks
9331
    new_disks = [d.children[0] for d in old_disks]
9332

    
9333
    # copy over size and mode
9334
    for parent, child in zip(old_disks, new_disks):
9335
      child.size = parent.size
9336
      child.mode = parent.mode
9337

    
9338
    # update instance structure
9339
    instance.disks = new_disks
9340
    instance.disk_template = constants.DT_PLAIN
9341
    self.cfg.Update(instance, feedback_fn)
9342

    
9343
    feedback_fn("Removing volumes on the secondary node...")
9344
    for disk in old_disks:
9345
      self.cfg.SetDiskID(disk, snode)
9346
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9347
      if msg:
9348
        self.LogWarning("Could not remove block device %s on node %s,"
9349
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9350

    
9351
    feedback_fn("Removing unneeded volumes on the primary node...")
9352
    for idx, disk in enumerate(old_disks):
9353
      meta = disk.children[1]
9354
      self.cfg.SetDiskID(meta, pnode)
9355
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9356
      if msg:
9357
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9358
                        " continuing anyway: %s", idx, pnode, msg)
9359

    
9360
  def Exec(self, feedback_fn):
9361
    """Modifies an instance.
9362

9363
    All parameters take effect only at the next restart of the instance.
9364

9365
    """
9366
    # Process here the warnings from CheckPrereq, as we don't have a
9367
    # feedback_fn there.
9368
    for warn in self.warn:
9369
      feedback_fn("WARNING: %s" % warn)
9370

    
9371
    result = []
9372
    instance = self.instance
9373
    # disk changes
9374
    for disk_op, disk_dict in self.op.disks:
9375
      if disk_op == constants.DDM_REMOVE:
9376
        # remove the last disk
9377
        device = instance.disks.pop()
9378
        device_idx = len(instance.disks)
9379
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9380
          self.cfg.SetDiskID(disk, node)
9381
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9382
          if msg:
9383
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9384
                            " continuing anyway", device_idx, node, msg)
9385
        result.append(("disk/%d" % device_idx, "remove"))
9386
      elif disk_op == constants.DDM_ADD:
9387
        # add a new disk
9388
        if instance.disk_template == constants.DT_FILE:
9389
          file_driver, file_path = instance.disks[0].logical_id
9390
          file_path = os.path.dirname(file_path)
9391
        else:
9392
          file_driver = file_path = None
9393
        disk_idx_base = len(instance.disks)
9394
        new_disk = _GenerateDiskTemplate(self,
9395
                                         instance.disk_template,
9396
                                         instance.name, instance.primary_node,
9397
                                         instance.secondary_nodes,
9398
                                         [disk_dict],
9399
                                         file_path,
9400
                                         file_driver,
9401
                                         disk_idx_base, feedback_fn)[0]
9402
        instance.disks.append(new_disk)
9403
        info = _GetInstanceInfoText(instance)
9404

    
9405
        logging.info("Creating volume %s for instance %s",
9406
                     new_disk.iv_name, instance.name)
9407
        # Note: this needs to be kept in sync with _CreateDisks
9408
        #HARDCODE
9409
        for node in instance.all_nodes:
9410
          f_create = node == instance.primary_node
9411
          try:
9412
            _CreateBlockDev(self, node, instance, new_disk,
9413
                            f_create, info, f_create)
9414
          except errors.OpExecError, err:
9415
            self.LogWarning("Failed to create volume %s (%s) on"
9416
                            " node %s: %s",
9417
                            new_disk.iv_name, new_disk, node, err)
9418
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9419
                       (new_disk.size, new_disk.mode)))
9420
      else:
9421
        # change a given disk
9422
        instance.disks[disk_op].mode = disk_dict['mode']
9423
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9424

    
9425
    if self.op.disk_template:
9426
      r_shut = _ShutdownInstanceDisks(self, instance)
9427
      if not r_shut:
9428
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9429
                                 " proceed with disk template conversion")
9430
      mode = (instance.disk_template, self.op.disk_template)
9431
      try:
9432
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9433
      except:
9434
        self.cfg.ReleaseDRBDMinors(instance.name)
9435
        raise
9436
      result.append(("disk_template", self.op.disk_template))
9437

    
9438
    # NIC changes
9439
    for nic_op, nic_dict in self.op.nics:
9440
      if nic_op == constants.DDM_REMOVE:
9441
        # remove the last nic
9442
        del instance.nics[-1]
9443
        result.append(("nic.%d" % len(instance.nics), "remove"))
9444
      elif nic_op == constants.DDM_ADD:
9445
        # mac and bridge should be set, by now
9446
        mac = nic_dict['mac']
9447
        ip = nic_dict.get('ip', None)
9448
        nicparams = self.nic_pinst[constants.DDM_ADD]
9449
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9450
        instance.nics.append(new_nic)
9451
        result.append(("nic.%d" % (len(instance.nics) - 1),
9452
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9453
                       (new_nic.mac, new_nic.ip,
9454
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9455
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9456
                       )))
9457
      else:
9458
        for key in 'mac', 'ip':
9459
          if key in nic_dict:
9460
            setattr(instance.nics[nic_op], key, nic_dict[key])
9461
        if nic_op in self.nic_pinst:
9462
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9463
        for key, val in nic_dict.iteritems():
9464
          result.append(("nic.%s/%d" % (key, nic_op), val))
9465

    
9466
    # hvparams changes
9467
    if self.op.hvparams:
9468
      instance.hvparams = self.hv_inst
9469
      for key, val in self.op.hvparams.iteritems():
9470
        result.append(("hv/%s" % key, val))
9471

    
9472
    # beparams changes
9473
    if self.op.beparams:
9474
      instance.beparams = self.be_inst
9475
      for key, val in self.op.beparams.iteritems():
9476
        result.append(("be/%s" % key, val))
9477

    
9478
    # OS change
9479
    if self.op.os_name:
9480
      instance.os = self.op.os_name
9481

    
9482
    # osparams changes
9483
    if self.op.osparams:
9484
      instance.osparams = self.os_inst
9485
      for key, val in self.op.osparams.iteritems():
9486
        result.append(("os/%s" % key, val))
9487

    
9488
    self.cfg.Update(instance, feedback_fn)
9489

    
9490
    return result
9491

    
9492
  _DISK_CONVERSIONS = {
9493
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9494
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9495
    }
9496

    
9497

    
9498
class LUQueryExports(NoHooksLU):
9499
  """Query the exports list
9500

9501
  """
9502
  REQ_BGL = False
9503

    
9504
  def ExpandNames(self):
9505
    self.needed_locks = {}
9506
    self.share_locks[locking.LEVEL_NODE] = 1
9507
    if not self.op.nodes:
9508
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9509
    else:
9510
      self.needed_locks[locking.LEVEL_NODE] = \
9511
        _GetWantedNodes(self, self.op.nodes)
9512

    
9513
  def Exec(self, feedback_fn):
9514
    """Compute the list of all the exported system images.
9515

9516
    @rtype: dict
9517
    @return: a dictionary with the structure node->(export-list)
9518
        where export-list is a list of the instances exported on
9519
        that node.
9520

9521
    """
9522
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9523
    rpcresult = self.rpc.call_export_list(self.nodes)
9524
    result = {}
9525
    for node in rpcresult:
9526
      if rpcresult[node].fail_msg:
9527
        result[node] = False
9528
      else:
9529
        result[node] = rpcresult[node].payload
9530

    
9531
    return result
9532

    
9533

    
9534
class LUPrepareExport(NoHooksLU):
9535
  """Prepares an instance for an export and returns useful information.
9536

9537
  """
9538
  REQ_BGL = False
9539

    
9540
  def ExpandNames(self):
9541
    self._ExpandAndLockInstance()
9542

    
9543
  def CheckPrereq(self):
9544
    """Check prerequisites.
9545

9546
    """
9547
    instance_name = self.op.instance_name
9548

    
9549
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9550
    assert self.instance is not None, \
9551
          "Cannot retrieve locked instance %s" % self.op.instance_name
9552
    _CheckNodeOnline(self, self.instance.primary_node)
9553

    
9554
    self._cds = _GetClusterDomainSecret()
9555

    
9556
  def Exec(self, feedback_fn):
9557
    """Prepares an instance for an export.
9558

9559
    """
9560
    instance = self.instance
9561

    
9562
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9563
      salt = utils.GenerateSecret(8)
9564

    
9565
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9566
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9567
                                              constants.RIE_CERT_VALIDITY)
9568
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9569

    
9570
      (name, cert_pem) = result.payload
9571

    
9572
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9573
                                             cert_pem)
9574

    
9575
      return {
9576
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9577
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9578
                          salt),
9579
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9580
        }
9581

    
9582
    return None
9583

    
9584

    
9585
class LUExportInstance(LogicalUnit):
9586
  """Export an instance to an image in the cluster.
9587

9588
  """
9589
  HPATH = "instance-export"
9590
  HTYPE = constants.HTYPE_INSTANCE
9591
  REQ_BGL = False
9592

    
9593
  def CheckArguments(self):
9594
    """Check the arguments.
9595

9596
    """
9597
    self.x509_key_name = self.op.x509_key_name
9598
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9599

    
9600
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9601
      if not self.x509_key_name:
9602
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9603
                                   errors.ECODE_INVAL)
9604

    
9605
      if not self.dest_x509_ca_pem:
9606
        raise errors.OpPrereqError("Missing destination X509 CA",
9607
                                   errors.ECODE_INVAL)
9608

    
9609
  def ExpandNames(self):
9610
    self._ExpandAndLockInstance()
9611

    
9612
    # Lock all nodes for local exports
9613
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9614
      # FIXME: lock only instance primary and destination node
9615
      #
9616
      # Sad but true, for now we have do lock all nodes, as we don't know where
9617
      # the previous export might be, and in this LU we search for it and
9618
      # remove it from its current node. In the future we could fix this by:
9619
      #  - making a tasklet to search (share-lock all), then create the
9620
      #    new one, then one to remove, after
9621
      #  - removing the removal operation altogether
9622
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9623

    
9624
  def DeclareLocks(self, level):
9625
    """Last minute lock declaration."""
9626
    # All nodes are locked anyway, so nothing to do here.
9627

    
9628
  def BuildHooksEnv(self):
9629
    """Build hooks env.
9630

9631
    This will run on the master, primary node and target node.
9632

9633
    """
9634
    env = {
9635
      "EXPORT_MODE": self.op.mode,
9636
      "EXPORT_NODE": self.op.target_node,
9637
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9638
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9639
      # TODO: Generic function for boolean env variables
9640
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9641
      }
9642

    
9643
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9644

    
9645
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9646

    
9647
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9648
      nl.append(self.op.target_node)
9649

    
9650
    return env, nl, nl
9651

    
9652
  def CheckPrereq(self):
9653
    """Check prerequisites.
9654

9655
    This checks that the instance and node names are valid.
9656

9657
    """
9658
    instance_name = self.op.instance_name
9659

    
9660
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9661
    assert self.instance is not None, \
9662
          "Cannot retrieve locked instance %s" % self.op.instance_name
9663
    _CheckNodeOnline(self, self.instance.primary_node)
9664

    
9665
    if (self.op.remove_instance and self.instance.admin_up and
9666
        not self.op.shutdown):
9667
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9668
                                 " down before")
9669

    
9670
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9671
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9672
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9673
      assert self.dst_node is not None
9674

    
9675
      _CheckNodeOnline(self, self.dst_node.name)
9676
      _CheckNodeNotDrained(self, self.dst_node.name)
9677

    
9678
      self._cds = None
9679
      self.dest_disk_info = None
9680
      self.dest_x509_ca = None
9681

    
9682
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9683
      self.dst_node = None
9684

    
9685
      if len(self.op.target_node) != len(self.instance.disks):
9686
        raise errors.OpPrereqError(("Received destination information for %s"
9687
                                    " disks, but instance %s has %s disks") %
9688
                                   (len(self.op.target_node), instance_name,
9689
                                    len(self.instance.disks)),
9690
                                   errors.ECODE_INVAL)
9691

    
9692
      cds = _GetClusterDomainSecret()
9693

    
9694
      # Check X509 key name
9695
      try:
9696
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9697
      except (TypeError, ValueError), err:
9698
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9699

    
9700
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9701
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9702
                                   errors.ECODE_INVAL)
9703

    
9704
      # Load and verify CA
9705
      try:
9706
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9707
      except OpenSSL.crypto.Error, err:
9708
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9709
                                   (err, ), errors.ECODE_INVAL)
9710

    
9711
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9712
      if errcode is not None:
9713
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9714
                                   (msg, ), errors.ECODE_INVAL)
9715

    
9716
      self.dest_x509_ca = cert
9717

    
9718
      # Verify target information
9719
      disk_info = []
9720
      for idx, disk_data in enumerate(self.op.target_node):
9721
        try:
9722
          (host, port, magic) = \
9723
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9724
        except errors.GenericError, err:
9725
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9726
                                     (idx, err), errors.ECODE_INVAL)
9727

    
9728
        disk_info.append((host, port, magic))
9729

    
9730
      assert len(disk_info) == len(self.op.target_node)
9731
      self.dest_disk_info = disk_info
9732

    
9733
    else:
9734
      raise errors.ProgrammerError("Unhandled export mode %r" %
9735
                                   self.op.mode)
9736

    
9737
    # instance disk type verification
9738
    # TODO: Implement export support for file-based disks
9739
    for disk in self.instance.disks:
9740
      if disk.dev_type == constants.LD_FILE:
9741
        raise errors.OpPrereqError("Export not supported for instances with"
9742
                                   " file-based disks", errors.ECODE_INVAL)
9743

    
9744
  def _CleanupExports(self, feedback_fn):
9745
    """Removes exports of current instance from all other nodes.
9746

9747
    If an instance in a cluster with nodes A..D was exported to node C, its
9748
    exports will be removed from the nodes A, B and D.
9749

9750
    """
9751
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
9752

    
9753
    nodelist = self.cfg.GetNodeList()
9754
    nodelist.remove(self.dst_node.name)
9755

    
9756
    # on one-node clusters nodelist will be empty after the removal
9757
    # if we proceed the backup would be removed because OpQueryExports
9758
    # substitutes an empty list with the full cluster node list.
9759
    iname = self.instance.name
9760
    if nodelist:
9761
      feedback_fn("Removing old exports for instance %s" % iname)
9762
      exportlist = self.rpc.call_export_list(nodelist)
9763
      for node in exportlist:
9764
        if exportlist[node].fail_msg:
9765
          continue
9766
        if iname in exportlist[node].payload:
9767
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9768
          if msg:
9769
            self.LogWarning("Could not remove older export for instance %s"
9770
                            " on node %s: %s", iname, node, msg)
9771

    
9772
  def Exec(self, feedback_fn):
9773
    """Export an instance to an image in the cluster.
9774

9775
    """
9776
    assert self.op.mode in constants.EXPORT_MODES
9777

    
9778
    instance = self.instance
9779
    src_node = instance.primary_node
9780

    
9781
    if self.op.shutdown:
9782
      # shutdown the instance, but not the disks
9783
      feedback_fn("Shutting down instance %s" % instance.name)
9784
      result = self.rpc.call_instance_shutdown(src_node, instance,
9785
                                               self.op.shutdown_timeout)
9786
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9787
      result.Raise("Could not shutdown instance %s on"
9788
                   " node %s" % (instance.name, src_node))
9789

    
9790
    # set the disks ID correctly since call_instance_start needs the
9791
    # correct drbd minor to create the symlinks
9792
    for disk in instance.disks:
9793
      self.cfg.SetDiskID(disk, src_node)
9794

    
9795
    activate_disks = (not instance.admin_up)
9796

    
9797
    if activate_disks:
9798
      # Activate the instance disks if we'exporting a stopped instance
9799
      feedback_fn("Activating disks for %s" % instance.name)
9800
      _StartInstanceDisks(self, instance, None)
9801

    
9802
    try:
9803
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9804
                                                     instance)
9805

    
9806
      helper.CreateSnapshots()
9807
      try:
9808
        if (self.op.shutdown and instance.admin_up and
9809
            not self.op.remove_instance):
9810
          assert not activate_disks
9811
          feedback_fn("Starting instance %s" % instance.name)
9812
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9813
          msg = result.fail_msg
9814
          if msg:
9815
            feedback_fn("Failed to start instance: %s" % msg)
9816
            _ShutdownInstanceDisks(self, instance)
9817
            raise errors.OpExecError("Could not start instance: %s" % msg)
9818

    
9819
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
9820
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9821
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9822
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9823
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9824

    
9825
          (key_name, _, _) = self.x509_key_name
9826

    
9827
          dest_ca_pem = \
9828
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9829
                                            self.dest_x509_ca)
9830

    
9831
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9832
                                                     key_name, dest_ca_pem,
9833
                                                     timeouts)
9834
      finally:
9835
        helper.Cleanup()
9836

    
9837
      # Check for backwards compatibility
9838
      assert len(dresults) == len(instance.disks)
9839
      assert compat.all(isinstance(i, bool) for i in dresults), \
9840
             "Not all results are boolean: %r" % dresults
9841

    
9842
    finally:
9843
      if activate_disks:
9844
        feedback_fn("Deactivating disks for %s" % instance.name)
9845
        _ShutdownInstanceDisks(self, instance)
9846

    
9847
    if not (compat.all(dresults) and fin_resu):
9848
      failures = []
9849
      if not fin_resu:
9850
        failures.append("export finalization")
9851
      if not compat.all(dresults):
9852
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9853
                               if not dsk)
9854
        failures.append("disk export: disk(s) %s" % fdsk)
9855

    
9856
      raise errors.OpExecError("Export failed, errors in %s" %
9857
                               utils.CommaJoin(failures))
9858

    
9859
    # At this point, the export was successful, we can cleanup/finish
9860

    
9861
    # Remove instance if requested
9862
    if self.op.remove_instance:
9863
      feedback_fn("Removing instance %s" % instance.name)
9864
      _RemoveInstance(self, feedback_fn, instance,
9865
                      self.op.ignore_remove_failures)
9866

    
9867
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9868
      self._CleanupExports(feedback_fn)
9869

    
9870
    return fin_resu, dresults
9871

    
9872

    
9873
class LURemoveExport(NoHooksLU):
9874
  """Remove exports related to the named instance.
9875

9876
  """
9877
  REQ_BGL = False
9878

    
9879
  def ExpandNames(self):
9880
    self.needed_locks = {}
9881
    # We need all nodes to be locked in order for RemoveExport to work, but we
9882
    # don't need to lock the instance itself, as nothing will happen to it (and
9883
    # we can remove exports also for a removed instance)
9884
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9885

    
9886
  def Exec(self, feedback_fn):
9887
    """Remove any export.
9888

9889
    """
9890
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9891
    # If the instance was not found we'll try with the name that was passed in.
9892
    # This will only work if it was an FQDN, though.
9893
    fqdn_warn = False
9894
    if not instance_name:
9895
      fqdn_warn = True
9896
      instance_name = self.op.instance_name
9897

    
9898
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9899
    exportlist = self.rpc.call_export_list(locked_nodes)
9900
    found = False
9901
    for node in exportlist:
9902
      msg = exportlist[node].fail_msg
9903
      if msg:
9904
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9905
        continue
9906
      if instance_name in exportlist[node].payload:
9907
        found = True
9908
        result = self.rpc.call_export_remove(node, instance_name)
9909
        msg = result.fail_msg
9910
        if msg:
9911
          logging.error("Could not remove export for instance %s"
9912
                        " on node %s: %s", instance_name, node, msg)
9913

    
9914
    if fqdn_warn and not found:
9915
      feedback_fn("Export not found. If trying to remove an export belonging"
9916
                  " to a deleted instance please use its Fully Qualified"
9917
                  " Domain Name.")
9918

    
9919

    
9920
class LUAddGroup(LogicalUnit):
9921
  """Logical unit for creating node groups.
9922

9923
  """
9924
  HPATH = "group-add"
9925
  HTYPE = constants.HTYPE_GROUP
9926
  REQ_BGL = False
9927

    
9928
  def ExpandNames(self):
9929
    # We need the new group's UUID here so that we can create and acquire the
9930
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
9931
    # that it should not check whether the UUID exists in the configuration.
9932
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
9933
    self.needed_locks = {}
9934
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
9935

    
9936
  def CheckPrereq(self):
9937
    """Check prerequisites.
9938

9939
    This checks that the given group name is not an existing node group
9940
    already.
9941

9942
    """
9943
    try:
9944
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
9945
    except errors.OpPrereqError:
9946
      pass
9947
    else:
9948
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
9949
                                 " node group (UUID: %s)" %
9950
                                 (self.op.group_name, existing_uuid),
9951
                                 errors.ECODE_EXISTS)
9952

    
9953
    if self.op.ndparams:
9954
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
9955

    
9956
  def BuildHooksEnv(self):
9957
    """Build hooks env.
9958

9959
    """
9960
    env = {
9961
      "GROUP_NAME": self.op.group_name,
9962
      }
9963
    mn = self.cfg.GetMasterNode()
9964
    return env, [mn], [mn]
9965

    
9966
  def Exec(self, feedback_fn):
9967
    """Add the node group to the cluster.
9968

9969
    """
9970
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
9971
                                  uuid=self.group_uuid,
9972
                                  alloc_policy=self.op.alloc_policy,
9973
                                  ndparams=self.op.ndparams)
9974

    
9975
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
9976
    del self.remove_locks[locking.LEVEL_NODEGROUP]
9977

    
9978

    
9979
class LUAssignGroupNodes(NoHooksLU):
9980
  """Logical unit for assigning nodes to groups.
9981

9982
  """
9983
  REQ_BGL = False
9984

    
9985
  def ExpandNames(self):
9986
    # These raise errors.OpPrereqError on their own:
9987
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
9988
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9989

    
9990
    # We want to lock all the affected nodes and groups. We have readily
9991
    # available the list of nodes, and the *destination* group. To gather the
9992
    # list of "source" groups, we need to fetch node information.
9993
    self.node_data = self.cfg.GetAllNodesInfo()
9994
    affected_groups = set(self.node_data[node].group for node in self.op.nodes)
9995
    affected_groups.add(self.group_uuid)
9996

    
9997
    self.needed_locks = {
9998
      locking.LEVEL_NODEGROUP: list(affected_groups),
9999
      locking.LEVEL_NODE: self.op.nodes,
10000
      }
10001

    
10002
  def CheckPrereq(self):
10003
    """Check prerequisites.
10004

10005
    """
10006
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10007
    instance_data = self.cfg.GetAllInstancesInfo()
10008

    
10009
    if self.group is None:
10010
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10011
                               (self.op.group_name, self.group_uuid))
10012

    
10013
    (new_splits, previous_splits) = \
10014
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10015
                                             for node in self.op.nodes],
10016
                                            self.node_data, instance_data)
10017

    
10018
    if new_splits:
10019
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10020

    
10021
      if not self.op.force:
10022
        raise errors.OpExecError("The following instances get split by this"
10023
                                 " change and --force was not given: %s" %
10024
                                 fmt_new_splits)
10025
      else:
10026
        self.LogWarning("This operation will split the following instances: %s",
10027
                        fmt_new_splits)
10028

    
10029
        if previous_splits:
10030
          self.LogWarning("In addition, these already-split instances continue"
10031
                          " to be spit across groups: %s",
10032
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
10033

    
10034
  def Exec(self, feedback_fn):
10035
    """Assign nodes to a new group.
10036

10037
    """
10038
    for node in self.op.nodes:
10039
      self.node_data[node].group = self.group_uuid
10040

    
10041
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10042

    
10043
  @staticmethod
10044
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10045
    """Check for split instances after a node assignment.
10046

10047
    This method considers a series of node assignments as an atomic operation,
10048
    and returns information about split instances after applying the set of
10049
    changes.
10050

10051
    In particular, it returns information about newly split instances, and
10052
    instances that were already split, and remain so after the change.
10053

10054
    Only instances whose disk template is listed in constants.DTS_NET_MIRROR are
10055
    considered.
10056

10057
    @type changes: list of (node_name, new_group_uuid) pairs.
10058
    @param changes: list of node assignments to consider.
10059
    @param node_data: a dict with data for all nodes
10060
    @param instance_data: a dict with all instances to consider
10061
    @rtype: a two-tuple
10062
    @return: a list of instances that were previously okay and result split as a
10063
      consequence of this change, and a list of instances that were previously
10064
      split and this change does not fix.
10065

10066
    """
10067
    changed_nodes = dict((node, group) for node, group in changes
10068
                         if node_data[node].group != group)
10069

    
10070
    all_split_instances = set()
10071
    previously_split_instances = set()
10072

    
10073
    def InstanceNodes(instance):
10074
      return [instance.primary_node] + list(instance.secondary_nodes)
10075

    
10076
    for inst in instance_data.values():
10077
      if inst.disk_template not in constants.DTS_NET_MIRROR:
10078
        continue
10079

    
10080
      instance_nodes = InstanceNodes(inst)
10081

    
10082
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
10083
        previously_split_instances.add(inst.name)
10084

    
10085
      if len(set(changed_nodes.get(node, node_data[node].group)
10086
                 for node in instance_nodes)) > 1:
10087
        all_split_instances.add(inst.name)
10088

    
10089
    return (list(all_split_instances - previously_split_instances),
10090
            list(previously_split_instances & all_split_instances))
10091

    
10092

    
10093
class _GroupQuery(_QueryBase):
10094

    
10095
  FIELDS = query.GROUP_FIELDS
10096

    
10097
  def ExpandNames(self, lu):
10098
    lu.needed_locks = {}
10099

    
10100
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10101
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10102

    
10103
    if not self.names:
10104
      self.wanted = [name_to_uuid[name]
10105
                     for name in utils.NiceSort(name_to_uuid.keys())]
10106
    else:
10107
      # Accept names to be either names or UUIDs.
10108
      missing = []
10109
      self.wanted = []
10110
      all_uuid = frozenset(self._all_groups.keys())
10111

    
10112
      for name in self.names:
10113
        if name in all_uuid:
10114
          self.wanted.append(name)
10115
        elif name in name_to_uuid:
10116
          self.wanted.append(name_to_uuid[name])
10117
        else:
10118
          missing.append(name)
10119

    
10120
      if missing:
10121
        raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10122
                                   errors.ECODE_NOENT)
10123

    
10124
  def DeclareLocks(self, lu, level):
10125
    pass
10126

    
10127
  def _GetQueryData(self, lu):
10128
    """Computes the list of node groups and their attributes.
10129

10130
    """
10131
    do_nodes = query.GQ_NODE in self.requested_data
10132
    do_instances = query.GQ_INST in self.requested_data
10133

    
10134
    group_to_nodes = None
10135
    group_to_instances = None
10136

    
10137
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10138
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10139
    # latter GetAllInstancesInfo() is not enough, for we have to go through
10140
    # instance->node. Hence, we will need to process nodes even if we only need
10141
    # instance information.
10142
    if do_nodes or do_instances:
10143
      all_nodes = lu.cfg.GetAllNodesInfo()
10144
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10145
      node_to_group = {}
10146

    
10147
      for node in all_nodes.values():
10148
        if node.group in group_to_nodes:
10149
          group_to_nodes[node.group].append(node.name)
10150
          node_to_group[node.name] = node.group
10151

    
10152
      if do_instances:
10153
        all_instances = lu.cfg.GetAllInstancesInfo()
10154
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
10155

    
10156
        for instance in all_instances.values():
10157
          node = instance.primary_node
10158
          if node in node_to_group:
10159
            group_to_instances[node_to_group[node]].append(instance.name)
10160

    
10161
        if not do_nodes:
10162
          # Do not pass on node information if it was not requested.
10163
          group_to_nodes = None
10164

    
10165
    return query.GroupQueryData([self._all_groups[uuid]
10166
                                 for uuid in self.wanted],
10167
                                group_to_nodes, group_to_instances)
10168

    
10169

    
10170
class LUQueryGroups(NoHooksLU):
10171
  """Logical unit for querying node groups.
10172

10173
  """
10174
  REQ_BGL = False
10175

    
10176
  def CheckArguments(self):
10177
    self.gq = _GroupQuery(self.op.names, self.op.output_fields, False)
10178

    
10179
  def ExpandNames(self):
10180
    self.gq.ExpandNames(self)
10181

    
10182
  def Exec(self, feedback_fn):
10183
    return self.gq.OldStyleQuery(self)
10184

    
10185

    
10186
class LUSetGroupParams(LogicalUnit):
10187
  """Modifies the parameters of a node group.
10188

10189
  """
10190
  HPATH = "group-modify"
10191
  HTYPE = constants.HTYPE_GROUP
10192
  REQ_BGL = False
10193

    
10194
  def CheckArguments(self):
10195
    all_changes = [
10196
      self.op.ndparams,
10197
      self.op.alloc_policy,
10198
      ]
10199

    
10200
    if all_changes.count(None) == len(all_changes):
10201
      raise errors.OpPrereqError("Please pass at least one modification",
10202
                                 errors.ECODE_INVAL)
10203

    
10204
  def ExpandNames(self):
10205
    # This raises errors.OpPrereqError on its own:
10206
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10207

    
10208
    self.needed_locks = {
10209
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10210
      }
10211

    
10212
  def CheckPrereq(self):
10213
    """Check prerequisites.
10214

10215
    """
10216
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10217

    
10218
    if self.group is None:
10219
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10220
                               (self.op.group_name, self.group_uuid))
10221

    
10222
    if self.op.ndparams:
10223
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10224
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10225
      self.new_ndparams = new_ndparams
10226

    
10227
  def BuildHooksEnv(self):
10228
    """Build hooks env.
10229

10230
    """
10231
    env = {
10232
      "GROUP_NAME": self.op.group_name,
10233
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
10234
      }
10235
    mn = self.cfg.GetMasterNode()
10236
    return env, [mn], [mn]
10237

    
10238
  def Exec(self, feedback_fn):
10239
    """Modifies the node group.
10240

10241
    """
10242
    result = []
10243

    
10244
    if self.op.ndparams:
10245
      self.group.ndparams = self.new_ndparams
10246
      result.append(("ndparams", str(self.group.ndparams)))
10247

    
10248
    if self.op.alloc_policy:
10249
      self.group.alloc_policy = self.op.alloc_policy
10250

    
10251
    self.cfg.Update(self.group, feedback_fn)
10252
    return result
10253

    
10254

    
10255

    
10256
class LURemoveGroup(LogicalUnit):
10257
  HPATH = "group-remove"
10258
  HTYPE = constants.HTYPE_GROUP
10259
  REQ_BGL = False
10260

    
10261
  def ExpandNames(self):
10262
    # This will raises errors.OpPrereqError on its own:
10263
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10264
    self.needed_locks = {
10265
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10266
      }
10267

    
10268
  def CheckPrereq(self):
10269
    """Check prerequisites.
10270

10271
    This checks that the given group name exists as a node group, that is
10272
    empty (i.e., contains no nodes), and that is not the last group of the
10273
    cluster.
10274

10275
    """
10276
    # Verify that the group is empty.
10277
    group_nodes = [node.name
10278
                   for node in self.cfg.GetAllNodesInfo().values()
10279
                   if node.group == self.group_uuid]
10280

    
10281
    if group_nodes:
10282
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
10283
                                 " nodes: %s" %
10284
                                 (self.op.group_name,
10285
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
10286
                                 errors.ECODE_STATE)
10287

    
10288
    # Verify the cluster would not be left group-less.
10289
    if len(self.cfg.GetNodeGroupList()) == 1:
10290
      raise errors.OpPrereqError("Group '%s' is the last group in the cluster,"
10291
                                 " which cannot be left without at least one"
10292
                                 " group" % self.op.group_name,
10293
                                 errors.ECODE_STATE)
10294

    
10295
  def BuildHooksEnv(self):
10296
    """Build hooks env.
10297

10298
    """
10299
    env = {
10300
      "GROUP_NAME": self.op.group_name,
10301
      }
10302
    mn = self.cfg.GetMasterNode()
10303
    return env, [mn], [mn]
10304

    
10305
  def Exec(self, feedback_fn):
10306
    """Remove the node group.
10307

10308
    """
10309
    try:
10310
      self.cfg.RemoveNodeGroup(self.group_uuid)
10311
    except errors.ConfigurationError:
10312
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10313
                               (self.op.group_name, self.group_uuid))
10314

    
10315
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10316

    
10317

    
10318
class LURenameGroup(LogicalUnit):
10319
  HPATH = "group-rename"
10320
  HTYPE = constants.HTYPE_GROUP
10321
  REQ_BGL = False
10322

    
10323
  def ExpandNames(self):
10324
    # This raises errors.OpPrereqError on its own:
10325
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10326

    
10327
    self.needed_locks = {
10328
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10329
      }
10330

    
10331
  def CheckPrereq(self):
10332
    """Check prerequisites.
10333

10334
    This checks that the given old_name exists as a node group, and that
10335
    new_name doesn't.
10336

10337
    """
10338
    try:
10339
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10340
    except errors.OpPrereqError:
10341
      pass
10342
    else:
10343
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10344
                                 " node group (UUID: %s)" %
10345
                                 (self.op.new_name, new_name_uuid),
10346
                                 errors.ECODE_EXISTS)
10347

    
10348
  def BuildHooksEnv(self):
10349
    """Build hooks env.
10350

10351
    """
10352
    env = {
10353
      "OLD_NAME": self.op.old_name,
10354
      "NEW_NAME": self.op.new_name,
10355
      }
10356

    
10357
    mn = self.cfg.GetMasterNode()
10358
    all_nodes = self.cfg.GetAllNodesInfo()
10359
    run_nodes = [mn]
10360
    all_nodes.pop(mn, None)
10361

    
10362
    for node in all_nodes.values():
10363
      if node.group == self.group_uuid:
10364
        run_nodes.append(node.name)
10365

    
10366
    return env, run_nodes, run_nodes
10367

    
10368
  def Exec(self, feedback_fn):
10369
    """Rename the node group.
10370

10371
    """
10372
    group = self.cfg.GetNodeGroup(self.group_uuid)
10373

    
10374
    if group is None:
10375
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10376
                               (self.op.old_name, self.group_uuid))
10377

    
10378
    group.name = self.op.new_name
10379
    self.cfg.Update(group, feedback_fn)
10380

    
10381
    return self.op.new_name
10382

    
10383

    
10384
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10385
  """Generic tags LU.
10386

10387
  This is an abstract class which is the parent of all the other tags LUs.
10388

10389
  """
10390

    
10391
  def ExpandNames(self):
10392
    self.needed_locks = {}
10393
    if self.op.kind == constants.TAG_NODE:
10394
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10395
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
10396
    elif self.op.kind == constants.TAG_INSTANCE:
10397
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10398
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10399

    
10400
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10401
    # not possible to acquire the BGL based on opcode parameters)
10402

    
10403
  def CheckPrereq(self):
10404
    """Check prerequisites.
10405

10406
    """
10407
    if self.op.kind == constants.TAG_CLUSTER:
10408
      self.target = self.cfg.GetClusterInfo()
10409
    elif self.op.kind == constants.TAG_NODE:
10410
      self.target = self.cfg.GetNodeInfo(self.op.name)
10411
    elif self.op.kind == constants.TAG_INSTANCE:
10412
      self.target = self.cfg.GetInstanceInfo(self.op.name)
10413
    else:
10414
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10415
                                 str(self.op.kind), errors.ECODE_INVAL)
10416

    
10417

    
10418
class LUGetTags(TagsLU):
10419
  """Returns the tags of a given object.
10420

10421
  """
10422
  REQ_BGL = False
10423

    
10424
  def ExpandNames(self):
10425
    TagsLU.ExpandNames(self)
10426

    
10427
    # Share locks as this is only a read operation
10428
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10429

    
10430
  def Exec(self, feedback_fn):
10431
    """Returns the tag list.
10432

10433
    """
10434
    return list(self.target.GetTags())
10435

    
10436

    
10437
class LUSearchTags(NoHooksLU):
10438
  """Searches the tags for a given pattern.
10439

10440
  """
10441
  REQ_BGL = False
10442

    
10443
  def ExpandNames(self):
10444
    self.needed_locks = {}
10445

    
10446
  def CheckPrereq(self):
10447
    """Check prerequisites.
10448

10449
    This checks the pattern passed for validity by compiling it.
10450

10451
    """
10452
    try:
10453
      self.re = re.compile(self.op.pattern)
10454
    except re.error, err:
10455
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10456
                                 (self.op.pattern, err), errors.ECODE_INVAL)
10457

    
10458
  def Exec(self, feedback_fn):
10459
    """Returns the tag list.
10460

10461
    """
10462
    cfg = self.cfg
10463
    tgts = [("/cluster", cfg.GetClusterInfo())]
10464
    ilist = cfg.GetAllInstancesInfo().values()
10465
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10466
    nlist = cfg.GetAllNodesInfo().values()
10467
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10468
    results = []
10469
    for path, target in tgts:
10470
      for tag in target.GetTags():
10471
        if self.re.search(tag):
10472
          results.append((path, tag))
10473
    return results
10474

    
10475

    
10476
class LUAddTags(TagsLU):
10477
  """Sets a tag on a given object.
10478

10479
  """
10480
  REQ_BGL = False
10481

    
10482
  def CheckPrereq(self):
10483
    """Check prerequisites.
10484

10485
    This checks the type and length of the tag name and value.
10486

10487
    """
10488
    TagsLU.CheckPrereq(self)
10489
    for tag in self.op.tags:
10490
      objects.TaggableObject.ValidateTag(tag)
10491

    
10492
  def Exec(self, feedback_fn):
10493
    """Sets the tag.
10494

10495
    """
10496
    try:
10497
      for tag in self.op.tags:
10498
        self.target.AddTag(tag)
10499
    except errors.TagError, err:
10500
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
10501
    self.cfg.Update(self.target, feedback_fn)
10502

    
10503

    
10504
class LUDelTags(TagsLU):
10505
  """Delete a list of tags from a given object.
10506

10507
  """
10508
  REQ_BGL = False
10509

    
10510
  def CheckPrereq(self):
10511
    """Check prerequisites.
10512

10513
    This checks that we have the given tag.
10514

10515
    """
10516
    TagsLU.CheckPrereq(self)
10517
    for tag in self.op.tags:
10518
      objects.TaggableObject.ValidateTag(tag)
10519
    del_tags = frozenset(self.op.tags)
10520
    cur_tags = self.target.GetTags()
10521

    
10522
    diff_tags = del_tags - cur_tags
10523
    if diff_tags:
10524
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
10525
      raise errors.OpPrereqError("Tag(s) %s not found" %
10526
                                 (utils.CommaJoin(diff_names), ),
10527
                                 errors.ECODE_NOENT)
10528

    
10529
  def Exec(self, feedback_fn):
10530
    """Remove the tag from the object.
10531

10532
    """
10533
    for tag in self.op.tags:
10534
      self.target.RemoveTag(tag)
10535
    self.cfg.Update(self.target, feedback_fn)
10536

    
10537

    
10538
class LUTestDelay(NoHooksLU):
10539
  """Sleep for a specified amount of time.
10540

10541
  This LU sleeps on the master and/or nodes for a specified amount of
10542
  time.
10543

10544
  """
10545
  REQ_BGL = False
10546

    
10547
  def ExpandNames(self):
10548
    """Expand names and set required locks.
10549

10550
    This expands the node list, if any.
10551

10552
    """
10553
    self.needed_locks = {}
10554
    if self.op.on_nodes:
10555
      # _GetWantedNodes can be used here, but is not always appropriate to use
10556
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10557
      # more information.
10558
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10559
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10560

    
10561
  def _TestDelay(self):
10562
    """Do the actual sleep.
10563

10564
    """
10565
    if self.op.on_master:
10566
      if not utils.TestDelay(self.op.duration):
10567
        raise errors.OpExecError("Error during master delay test")
10568
    if self.op.on_nodes:
10569
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10570
      for node, node_result in result.items():
10571
        node_result.Raise("Failure during rpc call to node %s" % node)
10572

    
10573
  def Exec(self, feedback_fn):
10574
    """Execute the test delay opcode, with the wanted repetitions.
10575

10576
    """
10577
    if self.op.repeat == 0:
10578
      self._TestDelay()
10579
    else:
10580
      top_value = self.op.repeat - 1
10581
      for i in range(self.op.repeat):
10582
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10583
        self._TestDelay()
10584

    
10585

    
10586
class LUTestJobqueue(NoHooksLU):
10587
  """Utility LU to test some aspects of the job queue.
10588

10589
  """
10590
  REQ_BGL = False
10591

    
10592
  # Must be lower than default timeout for WaitForJobChange to see whether it
10593
  # notices changed jobs
10594
  _CLIENT_CONNECT_TIMEOUT = 20.0
10595
  _CLIENT_CONFIRM_TIMEOUT = 60.0
10596

    
10597
  @classmethod
10598
  def _NotifyUsingSocket(cls, cb, errcls):
10599
    """Opens a Unix socket and waits for another program to connect.
10600

10601
    @type cb: callable
10602
    @param cb: Callback to send socket name to client
10603
    @type errcls: class
10604
    @param errcls: Exception class to use for errors
10605

10606
    """
10607
    # Using a temporary directory as there's no easy way to create temporary
10608
    # sockets without writing a custom loop around tempfile.mktemp and
10609
    # socket.bind
10610
    tmpdir = tempfile.mkdtemp()
10611
    try:
10612
      tmpsock = utils.PathJoin(tmpdir, "sock")
10613

    
10614
      logging.debug("Creating temporary socket at %s", tmpsock)
10615
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10616
      try:
10617
        sock.bind(tmpsock)
10618
        sock.listen(1)
10619

    
10620
        # Send details to client
10621
        cb(tmpsock)
10622

    
10623
        # Wait for client to connect before continuing
10624
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10625
        try:
10626
          (conn, _) = sock.accept()
10627
        except socket.error, err:
10628
          raise errcls("Client didn't connect in time (%s)" % err)
10629
      finally:
10630
        sock.close()
10631
    finally:
10632
      # Remove as soon as client is connected
10633
      shutil.rmtree(tmpdir)
10634

    
10635
    # Wait for client to close
10636
    try:
10637
      try:
10638
        # pylint: disable-msg=E1101
10639
        # Instance of '_socketobject' has no ... member
10640
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10641
        conn.recv(1)
10642
      except socket.error, err:
10643
        raise errcls("Client failed to confirm notification (%s)" % err)
10644
    finally:
10645
      conn.close()
10646

    
10647
  def _SendNotification(self, test, arg, sockname):
10648
    """Sends a notification to the client.
10649

10650
    @type test: string
10651
    @param test: Test name
10652
    @param arg: Test argument (depends on test)
10653
    @type sockname: string
10654
    @param sockname: Socket path
10655

10656
    """
10657
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10658

    
10659
  def _Notify(self, prereq, test, arg):
10660
    """Notifies the client of a test.
10661

10662
    @type prereq: bool
10663
    @param prereq: Whether this is a prereq-phase test
10664
    @type test: string
10665
    @param test: Test name
10666
    @param arg: Test argument (depends on test)
10667

10668
    """
10669
    if prereq:
10670
      errcls = errors.OpPrereqError
10671
    else:
10672
      errcls = errors.OpExecError
10673

    
10674
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10675
                                                  test, arg),
10676
                                   errcls)
10677

    
10678
  def CheckArguments(self):
10679
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10680
    self.expandnames_calls = 0
10681

    
10682
  def ExpandNames(self):
10683
    checkargs_calls = getattr(self, "checkargs_calls", 0)
10684
    if checkargs_calls < 1:
10685
      raise errors.ProgrammerError("CheckArguments was not called")
10686

    
10687
    self.expandnames_calls += 1
10688

    
10689
    if self.op.notify_waitlock:
10690
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
10691

    
10692
    self.LogInfo("Expanding names")
10693

    
10694
    # Get lock on master node (just to get a lock, not for a particular reason)
10695
    self.needed_locks = {
10696
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10697
      }
10698

    
10699
  def Exec(self, feedback_fn):
10700
    if self.expandnames_calls < 1:
10701
      raise errors.ProgrammerError("ExpandNames was not called")
10702

    
10703
    if self.op.notify_exec:
10704
      self._Notify(False, constants.JQT_EXEC, None)
10705

    
10706
    self.LogInfo("Executing")
10707

    
10708
    if self.op.log_messages:
10709
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10710
      for idx, msg in enumerate(self.op.log_messages):
10711
        self.LogInfo("Sending log message %s", idx + 1)
10712
        feedback_fn(constants.JQT_MSGPREFIX + msg)
10713
        # Report how many test messages have been sent
10714
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10715

    
10716
    if self.op.fail:
10717
      raise errors.OpExecError("Opcode failure was requested")
10718

    
10719
    return True
10720

    
10721

    
10722
class IAllocator(object):
10723
  """IAllocator framework.
10724

10725
  An IAllocator instance has three sets of attributes:
10726
    - cfg that is needed to query the cluster
10727
    - input data (all members of the _KEYS class attribute are required)
10728
    - four buffer attributes (in|out_data|text), that represent the
10729
      input (to the external script) in text and data structure format,
10730
      and the output from it, again in two formats
10731
    - the result variables from the script (success, info, nodes) for
10732
      easy usage
10733

10734
  """
10735
  # pylint: disable-msg=R0902
10736
  # lots of instance attributes
10737
  _ALLO_KEYS = [
10738
    "name", "mem_size", "disks", "disk_template",
10739
    "os", "tags", "nics", "vcpus", "hypervisor",
10740
    ]
10741
  _RELO_KEYS = [
10742
    "name", "relocate_from",
10743
    ]
10744
  _EVAC_KEYS = [
10745
    "evac_nodes",
10746
    ]
10747

    
10748
  def __init__(self, cfg, rpc, mode, **kwargs):
10749
    self.cfg = cfg
10750
    self.rpc = rpc
10751
    # init buffer variables
10752
    self.in_text = self.out_text = self.in_data = self.out_data = None
10753
    # init all input fields so that pylint is happy
10754
    self.mode = mode
10755
    self.mem_size = self.disks = self.disk_template = None
10756
    self.os = self.tags = self.nics = self.vcpus = None
10757
    self.hypervisor = None
10758
    self.relocate_from = None
10759
    self.name = None
10760
    self.evac_nodes = None
10761
    # computed fields
10762
    self.required_nodes = None
10763
    # init result fields
10764
    self.success = self.info = self.result = None
10765
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10766
      keyset = self._ALLO_KEYS
10767
      fn = self._AddNewInstance
10768
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10769
      keyset = self._RELO_KEYS
10770
      fn = self._AddRelocateInstance
10771
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10772
      keyset = self._EVAC_KEYS
10773
      fn = self._AddEvacuateNodes
10774
    else:
10775
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10776
                                   " IAllocator" % self.mode)
10777
    for key in kwargs:
10778
      if key not in keyset:
10779
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
10780
                                     " IAllocator" % key)
10781
      setattr(self, key, kwargs[key])
10782

    
10783
    for key in keyset:
10784
      if key not in kwargs:
10785
        raise errors.ProgrammerError("Missing input parameter '%s' to"
10786
                                     " IAllocator" % key)
10787
    self._BuildInputData(fn)
10788

    
10789
  def _ComputeClusterData(self):
10790
    """Compute the generic allocator input data.
10791

10792
    This is the data that is independent of the actual operation.
10793

10794
    """
10795
    cfg = self.cfg
10796
    cluster_info = cfg.GetClusterInfo()
10797
    # cluster data
10798
    data = {
10799
      "version": constants.IALLOCATOR_VERSION,
10800
      "cluster_name": cfg.GetClusterName(),
10801
      "cluster_tags": list(cluster_info.GetTags()),
10802
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10803
      # we don't have job IDs
10804
      }
10805
    ninfo = cfg.GetAllNodesInfo()
10806
    iinfo = cfg.GetAllInstancesInfo().values()
10807
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10808

    
10809
    # node data
10810
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
10811

    
10812
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10813
      hypervisor_name = self.hypervisor
10814
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10815
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10816
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10817
      hypervisor_name = cluster_info.enabled_hypervisors[0]
10818

    
10819
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10820
                                        hypervisor_name)
10821
    node_iinfo = \
10822
      self.rpc.call_all_instances_info(node_list,
10823
                                       cluster_info.enabled_hypervisors)
10824

    
10825
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10826

    
10827
    config_ndata = self._ComputeBasicNodeData(ninfo)
10828
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
10829
                                                 i_list, config_ndata)
10830
    assert len(data["nodes"]) == len(ninfo), \
10831
        "Incomplete node data computed"
10832

    
10833
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10834

    
10835
    self.in_data = data
10836

    
10837
  @staticmethod
10838
  def _ComputeNodeGroupData(cfg):
10839
    """Compute node groups data.
10840

10841
    """
10842
    ng = {}
10843
    for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10844
      ng[guuid] = {
10845
        "name": gdata.name,
10846
        "alloc_policy": gdata.alloc_policy,
10847
        }
10848
    return ng
10849

    
10850
  @staticmethod
10851
  def _ComputeBasicNodeData(node_cfg):
10852
    """Compute global node data.
10853

10854
    @rtype: dict
10855
    @returns: a dict of name: (node dict, node config)
10856

10857
    """
10858
    node_results = {}
10859
    for ninfo in node_cfg.values():
10860
      # fill in static (config-based) values
10861
      pnr = {
10862
        "tags": list(ninfo.GetTags()),
10863
        "primary_ip": ninfo.primary_ip,
10864
        "secondary_ip": ninfo.secondary_ip,
10865
        "offline": ninfo.offline,
10866
        "drained": ninfo.drained,
10867
        "master_candidate": ninfo.master_candidate,
10868
        "group": ninfo.group,
10869
        "master_capable": ninfo.master_capable,
10870
        "vm_capable": ninfo.vm_capable,
10871
        }
10872

    
10873
      node_results[ninfo.name] = pnr
10874

    
10875
    return node_results
10876

    
10877
  @staticmethod
10878
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
10879
                              node_results):
10880
    """Compute global node data.
10881

10882
    @param node_results: the basic node structures as filled from the config
10883

10884
    """
10885
    # make a copy of the current dict
10886
    node_results = dict(node_results)
10887
    for nname, nresult in node_data.items():
10888
      assert nname in node_results, "Missing basic data for node %s" % nname
10889
      ninfo = node_cfg[nname]
10890

    
10891
      if not (ninfo.offline or ninfo.drained):
10892
        nresult.Raise("Can't get data for node %s" % nname)
10893
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10894
                                nname)
10895
        remote_info = nresult.payload
10896

    
10897
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
10898
                     'vg_size', 'vg_free', 'cpu_total']:
10899
          if attr not in remote_info:
10900
            raise errors.OpExecError("Node '%s' didn't return attribute"
10901
                                     " '%s'" % (nname, attr))
10902
          if not isinstance(remote_info[attr], int):
10903
            raise errors.OpExecError("Node '%s' returned invalid value"
10904
                                     " for '%s': %s" %
10905
                                     (nname, attr, remote_info[attr]))
10906
        # compute memory used by primary instances
10907
        i_p_mem = i_p_up_mem = 0
10908
        for iinfo, beinfo in i_list:
10909
          if iinfo.primary_node == nname:
10910
            i_p_mem += beinfo[constants.BE_MEMORY]
10911
            if iinfo.name not in node_iinfo[nname].payload:
10912
              i_used_mem = 0
10913
            else:
10914
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10915
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10916
            remote_info['memory_free'] -= max(0, i_mem_diff)
10917

    
10918
            if iinfo.admin_up:
10919
              i_p_up_mem += beinfo[constants.BE_MEMORY]
10920

    
10921
        # compute memory used by instances
10922
        pnr_dyn = {
10923
          "total_memory": remote_info['memory_total'],
10924
          "reserved_memory": remote_info['memory_dom0'],
10925
          "free_memory": remote_info['memory_free'],
10926
          "total_disk": remote_info['vg_size'],
10927
          "free_disk": remote_info['vg_free'],
10928
          "total_cpus": remote_info['cpu_total'],
10929
          "i_pri_memory": i_p_mem,
10930
          "i_pri_up_memory": i_p_up_mem,
10931
          }
10932
        pnr_dyn.update(node_results[nname])
10933

    
10934
      node_results[nname] = pnr_dyn
10935

    
10936
    return node_results
10937

    
10938
  @staticmethod
10939
  def _ComputeInstanceData(cluster_info, i_list):
10940
    """Compute global instance data.
10941

10942
    """
10943
    instance_data = {}
10944
    for iinfo, beinfo in i_list:
10945
      nic_data = []
10946
      for nic in iinfo.nics:
10947
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10948
        nic_dict = {"mac": nic.mac,
10949
                    "ip": nic.ip,
10950
                    "mode": filled_params[constants.NIC_MODE],
10951
                    "link": filled_params[constants.NIC_LINK],
10952
                   }
10953
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10954
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10955
        nic_data.append(nic_dict)
10956
      pir = {
10957
        "tags": list(iinfo.GetTags()),
10958
        "admin_up": iinfo.admin_up,
10959
        "vcpus": beinfo[constants.BE_VCPUS],
10960
        "memory": beinfo[constants.BE_MEMORY],
10961
        "os": iinfo.os,
10962
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10963
        "nics": nic_data,
10964
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10965
        "disk_template": iinfo.disk_template,
10966
        "hypervisor": iinfo.hypervisor,
10967
        }
10968
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10969
                                                 pir["disks"])
10970
      instance_data[iinfo.name] = pir
10971

    
10972
    return instance_data
10973

    
10974
  def _AddNewInstance(self):
10975
    """Add new instance data to allocator structure.
10976

10977
    This in combination with _AllocatorGetClusterData will create the
10978
    correct structure needed as input for the allocator.
10979

10980
    The checks for the completeness of the opcode must have already been
10981
    done.
10982

10983
    """
10984
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10985

    
10986
    if self.disk_template in constants.DTS_NET_MIRROR:
10987
      self.required_nodes = 2
10988
    else:
10989
      self.required_nodes = 1
10990
    request = {
10991
      "name": self.name,
10992
      "disk_template": self.disk_template,
10993
      "tags": self.tags,
10994
      "os": self.os,
10995
      "vcpus": self.vcpus,
10996
      "memory": self.mem_size,
10997
      "disks": self.disks,
10998
      "disk_space_total": disk_space,
10999
      "nics": self.nics,
11000
      "required_nodes": self.required_nodes,
11001
      }
11002
    return request
11003

    
11004
  def _AddRelocateInstance(self):
11005
    """Add relocate instance data to allocator structure.
11006

11007
    This in combination with _IAllocatorGetClusterData will create the
11008
    correct structure needed as input for the allocator.
11009

11010
    The checks for the completeness of the opcode must have already been
11011
    done.
11012

11013
    """
11014
    instance = self.cfg.GetInstanceInfo(self.name)
11015
    if instance is None:
11016
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
11017
                                   " IAllocator" % self.name)
11018

    
11019
    if instance.disk_template not in constants.DTS_NET_MIRROR:
11020
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11021
                                 errors.ECODE_INVAL)
11022

    
11023
    if len(instance.secondary_nodes) != 1:
11024
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
11025
                                 errors.ECODE_STATE)
11026

    
11027
    self.required_nodes = 1
11028
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
11029
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11030

    
11031
    request = {
11032
      "name": self.name,
11033
      "disk_space_total": disk_space,
11034
      "required_nodes": self.required_nodes,
11035
      "relocate_from": self.relocate_from,
11036
      }
11037
    return request
11038

    
11039
  def _AddEvacuateNodes(self):
11040
    """Add evacuate nodes data to allocator structure.
11041

11042
    """
11043
    request = {
11044
      "evac_nodes": self.evac_nodes
11045
      }
11046
    return request
11047

    
11048
  def _BuildInputData(self, fn):
11049
    """Build input data structures.
11050

11051
    """
11052
    self._ComputeClusterData()
11053

    
11054
    request = fn()
11055
    request["type"] = self.mode
11056
    self.in_data["request"] = request
11057

    
11058
    self.in_text = serializer.Dump(self.in_data)
11059

    
11060
  def Run(self, name, validate=True, call_fn=None):
11061
    """Run an instance allocator and return the results.
11062

11063
    """
11064
    if call_fn is None:
11065
      call_fn = self.rpc.call_iallocator_runner
11066

    
11067
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11068
    result.Raise("Failure while running the iallocator script")
11069

    
11070
    self.out_text = result.payload
11071
    if validate:
11072
      self._ValidateResult()
11073

    
11074
  def _ValidateResult(self):
11075
    """Process the allocator results.
11076

11077
    This will process and if successful save the result in
11078
    self.out_data and the other parameters.
11079

11080
    """
11081
    try:
11082
      rdict = serializer.Load(self.out_text)
11083
    except Exception, err:
11084
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11085

    
11086
    if not isinstance(rdict, dict):
11087
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
11088

    
11089
    # TODO: remove backwards compatiblity in later versions
11090
    if "nodes" in rdict and "result" not in rdict:
11091
      rdict["result"] = rdict["nodes"]
11092
      del rdict["nodes"]
11093

    
11094
    for key in "success", "info", "result":
11095
      if key not in rdict:
11096
        raise errors.OpExecError("Can't parse iallocator results:"
11097
                                 " missing key '%s'" % key)
11098
      setattr(self, key, rdict[key])
11099

    
11100
    if not isinstance(rdict["result"], list):
11101
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11102
                               " is not a list")
11103
    self.out_data = rdict
11104

    
11105

    
11106
class LUTestAllocator(NoHooksLU):
11107
  """Run allocator tests.
11108

11109
  This LU runs the allocator tests
11110

11111
  """
11112
  def CheckPrereq(self):
11113
    """Check prerequisites.
11114

11115
    This checks the opcode parameters depending on the director and mode test.
11116

11117
    """
11118
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11119
      for attr in ["mem_size", "disks", "disk_template",
11120
                   "os", "tags", "nics", "vcpus"]:
11121
        if not hasattr(self.op, attr):
11122
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11123
                                     attr, errors.ECODE_INVAL)
11124
      iname = self.cfg.ExpandInstanceName(self.op.name)
11125
      if iname is not None:
11126
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11127
                                   iname, errors.ECODE_EXISTS)
11128
      if not isinstance(self.op.nics, list):
11129
        raise errors.OpPrereqError("Invalid parameter 'nics'",
11130
                                   errors.ECODE_INVAL)
11131
      if not isinstance(self.op.disks, list):
11132
        raise errors.OpPrereqError("Invalid parameter 'disks'",
11133
                                   errors.ECODE_INVAL)
11134
      for row in self.op.disks:
11135
        if (not isinstance(row, dict) or
11136
            "size" not in row or
11137
            not isinstance(row["size"], int) or
11138
            "mode" not in row or
11139
            row["mode"] not in ['r', 'w']):
11140
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
11141
                                     " parameter", errors.ECODE_INVAL)
11142
      if self.op.hypervisor is None:
11143
        self.op.hypervisor = self.cfg.GetHypervisorType()
11144
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11145
      fname = _ExpandInstanceName(self.cfg, self.op.name)
11146
      self.op.name = fname
11147
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11148
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11149
      if not hasattr(self.op, "evac_nodes"):
11150
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11151
                                   " opcode input", errors.ECODE_INVAL)
11152
    else:
11153
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11154
                                 self.op.mode, errors.ECODE_INVAL)
11155

    
11156
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11157
      if self.op.allocator is None:
11158
        raise errors.OpPrereqError("Missing allocator name",
11159
                                   errors.ECODE_INVAL)
11160
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11161
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
11162
                                 self.op.direction, errors.ECODE_INVAL)
11163

    
11164
  def Exec(self, feedback_fn):
11165
    """Run the allocator test.
11166

11167
    """
11168
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11169
      ial = IAllocator(self.cfg, self.rpc,
11170
                       mode=self.op.mode,
11171
                       name=self.op.name,
11172
                       mem_size=self.op.mem_size,
11173
                       disks=self.op.disks,
11174
                       disk_template=self.op.disk_template,
11175
                       os=self.op.os,
11176
                       tags=self.op.tags,
11177
                       nics=self.op.nics,
11178
                       vcpus=self.op.vcpus,
11179
                       hypervisor=self.op.hypervisor,
11180
                       )
11181
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11182
      ial = IAllocator(self.cfg, self.rpc,
11183
                       mode=self.op.mode,
11184
                       name=self.op.name,
11185
                       relocate_from=list(self.relocate_from),
11186
                       )
11187
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11188
      ial = IAllocator(self.cfg, self.rpc,
11189
                       mode=self.op.mode,
11190
                       evac_nodes=self.op.evac_nodes)
11191
    else:
11192
      raise errors.ProgrammerError("Uncatched mode %s in"
11193
                                   " LUTestAllocator.Exec", self.op.mode)
11194

    
11195
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
11196
      result = ial.in_text
11197
    else:
11198
      ial.Run(self.op.allocator, validate=False)
11199
      result = ial.out_text
11200
    return result
11201

    
11202

    
11203
#: Query type implementations
11204
_QUERY_IMPL = {
11205
  constants.QR_INSTANCE: _InstanceQuery,
11206
  constants.QR_NODE: _NodeQuery,
11207
  constants.QR_GROUP: _GroupQuery,
11208
  }
11209

    
11210

    
11211
def _GetQueryImplementation(name):
11212
  """Returns the implemtnation for a query type.
11213

11214
  @param name: Query type, must be one of L{constants.QR_OP_QUERY}
11215

11216
  """
11217
  try:
11218
    return _QUERY_IMPL[name]
11219
  except KeyError:
11220
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11221
                               errors.ECODE_INVAL)