Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 25ce3ec4

History | View | Annotate | Download (385.3 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import query
58
from ganeti import qlang
59
from ganeti import opcodes
60

    
61
import ganeti.masterd.instance # pylint: disable-msg=W0611
62

    
63

    
64
def _SupportsOob(cfg, node):
65
  """Tells if node supports OOB.
66

67
  @type cfg: L{config.ConfigWriter}
68
  @param cfg: The cluster configuration
69
  @type node: L{objects.Node}
70
  @param node: The node
71
  @return: The OOB script if supported or an empty string otherwise
72

73
  """
74
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
75

    
76

    
77
# End types
78
class LogicalUnit(object):
79
  """Logical Unit base class.
80

81
  Subclasses must follow these rules:
82
    - implement ExpandNames
83
    - implement CheckPrereq (except when tasklets are used)
84
    - implement Exec (except when tasklets are used)
85
    - implement BuildHooksEnv
86
    - redefine HPATH and HTYPE
87
    - optionally redefine their run requirements:
88
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
89

90
  Note that all commands require root permissions.
91

92
  @ivar dry_run_result: the value (if any) that will be returned to the caller
93
      in dry-run mode (signalled by opcode dry_run parameter)
94

95
  """
96
  HPATH = None
97
  HTYPE = None
98
  REQ_BGL = True
99

    
100
  def __init__(self, processor, op, context, rpc):
101
    """Constructor for LogicalUnit.
102

103
    This needs to be overridden in derived classes in order to check op
104
    validity.
105

106
    """
107
    self.proc = processor
108
    self.op = op
109
    self.cfg = context.cfg
110
    self.context = context
111
    self.rpc = rpc
112
    # Dicts used to declare locking needs to mcpu
113
    self.needed_locks = None
114
    self.acquired_locks = {}
115
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
116
    self.add_locks = {}
117
    self.remove_locks = {}
118
    # Used to force good behavior when calling helper functions
119
    self.recalculate_locks = {}
120
    self.__ssh = None
121
    # logging
122
    self.Log = processor.Log # pylint: disable-msg=C0103
123
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
124
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
125
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
126
    # support for dry-run
127
    self.dry_run_result = None
128
    # support for generic debug attribute
129
    if (not hasattr(self.op, "debug_level") or
130
        not isinstance(self.op.debug_level, int)):
131
      self.op.debug_level = 0
132

    
133
    # Tasklets
134
    self.tasklets = None
135

    
136
    # Validate opcode parameters and set defaults
137
    self.op.Validate(True)
138

    
139
    self.CheckArguments()
140

    
141
  def __GetSSH(self):
142
    """Returns the SshRunner object
143

144
    """
145
    if not self.__ssh:
146
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
147
    return self.__ssh
148

    
149
  ssh = property(fget=__GetSSH)
150

    
151
  def CheckArguments(self):
152
    """Check syntactic validity for the opcode arguments.
153

154
    This method is for doing a simple syntactic check and ensure
155
    validity of opcode parameters, without any cluster-related
156
    checks. While the same can be accomplished in ExpandNames and/or
157
    CheckPrereq, doing these separate is better because:
158

159
      - ExpandNames is left as as purely a lock-related function
160
      - CheckPrereq is run after we have acquired locks (and possible
161
        waited for them)
162

163
    The function is allowed to change the self.op attribute so that
164
    later methods can no longer worry about missing parameters.
165

166
    """
167
    pass
168

    
169
  def ExpandNames(self):
170
    """Expand names for this LU.
171

172
    This method is called before starting to execute the opcode, and it should
173
    update all the parameters of the opcode to their canonical form (e.g. a
174
    short node name must be fully expanded after this method has successfully
175
    completed). This way locking, hooks, logging, etc. can work correctly.
176

177
    LUs which implement this method must also populate the self.needed_locks
178
    member, as a dict with lock levels as keys, and a list of needed lock names
179
    as values. Rules:
180

181
      - use an empty dict if you don't need any lock
182
      - if you don't need any lock at a particular level omit that level
183
      - don't put anything for the BGL level
184
      - if you want all locks at a level use locking.ALL_SET as a value
185

186
    If you need to share locks (rather than acquire them exclusively) at one
187
    level you can modify self.share_locks, setting a true value (usually 1) for
188
    that level. By default locks are not shared.
189

190
    This function can also define a list of tasklets, which then will be
191
    executed in order instead of the usual LU-level CheckPrereq and Exec
192
    functions, if those are not defined by the LU.
193

194
    Examples::
195

196
      # Acquire all nodes and one instance
197
      self.needed_locks = {
198
        locking.LEVEL_NODE: locking.ALL_SET,
199
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
200
      }
201
      # Acquire just two nodes
202
      self.needed_locks = {
203
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
204
      }
205
      # Acquire no locks
206
      self.needed_locks = {} # No, you can't leave it to the default value None
207

208
    """
209
    # The implementation of this method is mandatory only if the new LU is
210
    # concurrent, so that old LUs don't need to be changed all at the same
211
    # time.
212
    if self.REQ_BGL:
213
      self.needed_locks = {} # Exclusive LUs don't need locks.
214
    else:
215
      raise NotImplementedError
216

    
217
  def DeclareLocks(self, level):
218
    """Declare LU locking needs for a level
219

220
    While most LUs can just declare their locking needs at ExpandNames time,
221
    sometimes there's the need to calculate some locks after having acquired
222
    the ones before. This function is called just before acquiring locks at a
223
    particular level, but after acquiring the ones at lower levels, and permits
224
    such calculations. It can be used to modify self.needed_locks, and by
225
    default it does nothing.
226

227
    This function is only called if you have something already set in
228
    self.needed_locks for the level.
229

230
    @param level: Locking level which is going to be locked
231
    @type level: member of ganeti.locking.LEVELS
232

233
    """
234

    
235
  def CheckPrereq(self):
236
    """Check prerequisites for this LU.
237

238
    This method should check that the prerequisites for the execution
239
    of this LU are fulfilled. It can do internode communication, but
240
    it should be idempotent - no cluster or system changes are
241
    allowed.
242

243
    The method should raise errors.OpPrereqError in case something is
244
    not fulfilled. Its return value is ignored.
245

246
    This method should also update all the parameters of the opcode to
247
    their canonical form if it hasn't been done by ExpandNames before.
248

249
    """
250
    if self.tasklets is not None:
251
      for (idx, tl) in enumerate(self.tasklets):
252
        logging.debug("Checking prerequisites for tasklet %s/%s",
253
                      idx + 1, len(self.tasklets))
254
        tl.CheckPrereq()
255
    else:
256
      pass
257

    
258
  def Exec(self, feedback_fn):
259
    """Execute the LU.
260

261
    This method should implement the actual work. It should raise
262
    errors.OpExecError for failures that are somewhat dealt with in
263
    code, or expected.
264

265
    """
266
    if self.tasklets is not None:
267
      for (idx, tl) in enumerate(self.tasklets):
268
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
269
        tl.Exec(feedback_fn)
270
    else:
271
      raise NotImplementedError
272

    
273
  def BuildHooksEnv(self):
274
    """Build hooks environment for this LU.
275

276
    This method should return a three-node tuple consisting of: a dict
277
    containing the environment that will be used for running the
278
    specific hook for this LU, a list of node names on which the hook
279
    should run before the execution, and a list of node names on which
280
    the hook should run after the execution.
281

282
    The keys of the dict must not have 'GANETI_' prefixed as this will
283
    be handled in the hooks runner. Also note additional keys will be
284
    added by the hooks runner. If the LU doesn't define any
285
    environment, an empty dict (and not None) should be returned.
286

287
    No nodes should be returned as an empty list (and not None).
288

289
    Note that if the HPATH for a LU class is None, this function will
290
    not be called.
291

292
    """
293
    raise NotImplementedError
294

    
295
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
296
    """Notify the LU about the results of its hooks.
297

298
    This method is called every time a hooks phase is executed, and notifies
299
    the Logical Unit about the hooks' result. The LU can then use it to alter
300
    its result based on the hooks.  By default the method does nothing and the
301
    previous result is passed back unchanged but any LU can define it if it
302
    wants to use the local cluster hook-scripts somehow.
303

304
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
305
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
306
    @param hook_results: the results of the multi-node hooks rpc call
307
    @param feedback_fn: function used send feedback back to the caller
308
    @param lu_result: the previous Exec result this LU had, or None
309
        in the PRE phase
310
    @return: the new Exec result, based on the previous result
311
        and hook results
312

313
    """
314
    # API must be kept, thus we ignore the unused argument and could
315
    # be a function warnings
316
    # pylint: disable-msg=W0613,R0201
317
    return lu_result
318

    
319
  def _ExpandAndLockInstance(self):
320
    """Helper function to expand and lock an instance.
321

322
    Many LUs that work on an instance take its name in self.op.instance_name
323
    and need to expand it and then declare the expanded name for locking. This
324
    function does it, and then updates self.op.instance_name to the expanded
325
    name. It also initializes needed_locks as a dict, if this hasn't been done
326
    before.
327

328
    """
329
    if self.needed_locks is None:
330
      self.needed_locks = {}
331
    else:
332
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
333
        "_ExpandAndLockInstance called with instance-level locks set"
334
    self.op.instance_name = _ExpandInstanceName(self.cfg,
335
                                                self.op.instance_name)
336
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
337

    
338
  def _LockInstancesNodes(self, primary_only=False):
339
    """Helper function to declare instances' nodes for locking.
340

341
    This function should be called after locking one or more instances to lock
342
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
343
    with all primary or secondary nodes for instances already locked and
344
    present in self.needed_locks[locking.LEVEL_INSTANCE].
345

346
    It should be called from DeclareLocks, and for safety only works if
347
    self.recalculate_locks[locking.LEVEL_NODE] is set.
348

349
    In the future it may grow parameters to just lock some instance's nodes, or
350
    to just lock primaries or secondary nodes, if needed.
351

352
    If should be called in DeclareLocks in a way similar to::
353

354
      if level == locking.LEVEL_NODE:
355
        self._LockInstancesNodes()
356

357
    @type primary_only: boolean
358
    @param primary_only: only lock primary nodes of locked instances
359

360
    """
361
    assert locking.LEVEL_NODE in self.recalculate_locks, \
362
      "_LockInstancesNodes helper function called with no nodes to recalculate"
363

    
364
    # TODO: check if we're really been called with the instance locks held
365

    
366
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
367
    # future we might want to have different behaviors depending on the value
368
    # of self.recalculate_locks[locking.LEVEL_NODE]
369
    wanted_nodes = []
370
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
371
      instance = self.context.cfg.GetInstanceInfo(instance_name)
372
      wanted_nodes.append(instance.primary_node)
373
      if not primary_only:
374
        wanted_nodes.extend(instance.secondary_nodes)
375

    
376
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
377
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
378
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
379
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
380

    
381
    del self.recalculate_locks[locking.LEVEL_NODE]
382

    
383

    
384
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
385
  """Simple LU which runs no hooks.
386

387
  This LU is intended as a parent for other LogicalUnits which will
388
  run no hooks, in order to reduce duplicate code.
389

390
  """
391
  HPATH = None
392
  HTYPE = None
393

    
394
  def BuildHooksEnv(self):
395
    """Empty BuildHooksEnv for NoHooksLu.
396

397
    This just raises an error.
398

399
    """
400
    assert False, "BuildHooksEnv called for NoHooksLUs"
401

    
402

    
403
class Tasklet:
404
  """Tasklet base class.
405

406
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
407
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
408
  tasklets know nothing about locks.
409

410
  Subclasses must follow these rules:
411
    - Implement CheckPrereq
412
    - Implement Exec
413

414
  """
415
  def __init__(self, lu):
416
    self.lu = lu
417

    
418
    # Shortcuts
419
    self.cfg = lu.cfg
420
    self.rpc = lu.rpc
421

    
422
  def CheckPrereq(self):
423
    """Check prerequisites for this tasklets.
424

425
    This method should check whether the prerequisites for the execution of
426
    this tasklet are fulfilled. It can do internode communication, but it
427
    should be idempotent - no cluster or system changes are allowed.
428

429
    The method should raise errors.OpPrereqError in case something is not
430
    fulfilled. Its return value is ignored.
431

432
    This method should also update all parameters to their canonical form if it
433
    hasn't been done before.
434

435
    """
436
    pass
437

    
438
  def Exec(self, feedback_fn):
439
    """Execute the tasklet.
440

441
    This method should implement the actual work. It should raise
442
    errors.OpExecError for failures that are somewhat dealt with in code, or
443
    expected.
444

445
    """
446
    raise NotImplementedError
447

    
448

    
449
class _QueryBase:
450
  """Base for query utility classes.
451

452
  """
453
  #: Attribute holding field definitions
454
  FIELDS = None
455

    
456
  def __init__(self, names, fields, use_locking):
457
    """Initializes this class.
458

459
    """
460
    self.names = names
461
    self.use_locking = use_locking
462

    
463
    self.query = query.Query(self.FIELDS, fields)
464
    self.requested_data = self.query.RequestedData()
465

    
466
    self.do_locking = None
467
    self.wanted = None
468

    
469
  def _GetNames(self, lu, all_names, lock_level):
470
    """Helper function to determine names asked for in the query.
471

472
    """
473
    if self.do_locking:
474
      names = lu.acquired_locks[lock_level]
475
    else:
476
      names = all_names
477

    
478
    if self.wanted == locking.ALL_SET:
479
      assert not self.names
480
      # caller didn't specify names, so ordering is not important
481
      return utils.NiceSort(names)
482

    
483
    # caller specified names and we must keep the same order
484
    assert self.names
485
    assert not self.do_locking or lu.acquired_locks[lock_level]
486

    
487
    missing = set(self.wanted).difference(names)
488
    if missing:
489
      raise errors.OpExecError("Some items were removed before retrieving"
490
                               " their data: %s" % missing)
491

    
492
    # Return expanded names
493
    return self.wanted
494

    
495
  @classmethod
496
  def FieldsQuery(cls, fields):
497
    """Returns list of available fields.
498

499
    @return: List of L{objects.QueryFieldDefinition}
500

501
    """
502
    return query.QueryFields(cls.FIELDS, fields)
503

    
504
  def ExpandNames(self, lu):
505
    """Expand names for this query.
506

507
    See L{LogicalUnit.ExpandNames}.
508

509
    """
510
    raise NotImplementedError()
511

    
512
  def DeclareLocks(self, lu, level):
513
    """Declare locks for this query.
514

515
    See L{LogicalUnit.DeclareLocks}.
516

517
    """
518
    raise NotImplementedError()
519

    
520
  def _GetQueryData(self, lu):
521
    """Collects all data for this query.
522

523
    @return: Query data object
524

525
    """
526
    raise NotImplementedError()
527

    
528
  def NewStyleQuery(self, lu):
529
    """Collect data and execute query.
530

531
    """
532
    return query.GetQueryResponse(self.query, self._GetQueryData(lu))
533

    
534
  def OldStyleQuery(self, lu):
535
    """Collect data and execute query.
536

537
    """
538
    return self.query.OldStyleQuery(self._GetQueryData(lu))
539

    
540

    
541
def _GetWantedNodes(lu, nodes):
542
  """Returns list of checked and expanded node names.
543

544
  @type lu: L{LogicalUnit}
545
  @param lu: the logical unit on whose behalf we execute
546
  @type nodes: list
547
  @param nodes: list of node names or None for all nodes
548
  @rtype: list
549
  @return: the list of nodes, sorted
550
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
551

552
  """
553
  if nodes:
554
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
555

    
556
  return utils.NiceSort(lu.cfg.GetNodeList())
557

    
558

    
559
def _GetWantedInstances(lu, instances):
560
  """Returns list of checked and expanded instance names.
561

562
  @type lu: L{LogicalUnit}
563
  @param lu: the logical unit on whose behalf we execute
564
  @type instances: list
565
  @param instances: list of instance names or None for all instances
566
  @rtype: list
567
  @return: the list of instances, sorted
568
  @raise errors.OpPrereqError: if the instances parameter is wrong type
569
  @raise errors.OpPrereqError: if any of the passed instances is not found
570

571
  """
572
  if instances:
573
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
574
  else:
575
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
576
  return wanted
577

    
578

    
579
def _GetUpdatedParams(old_params, update_dict,
580
                      use_default=True, use_none=False):
581
  """Return the new version of a parameter dictionary.
582

583
  @type old_params: dict
584
  @param old_params: old parameters
585
  @type update_dict: dict
586
  @param update_dict: dict containing new parameter values, or
587
      constants.VALUE_DEFAULT to reset the parameter to its default
588
      value
589
  @param use_default: boolean
590
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
591
      values as 'to be deleted' values
592
  @param use_none: boolean
593
  @type use_none: whether to recognise C{None} values as 'to be
594
      deleted' values
595
  @rtype: dict
596
  @return: the new parameter dictionary
597

598
  """
599
  params_copy = copy.deepcopy(old_params)
600
  for key, val in update_dict.iteritems():
601
    if ((use_default and val == constants.VALUE_DEFAULT) or
602
        (use_none and val is None)):
603
      try:
604
        del params_copy[key]
605
      except KeyError:
606
        pass
607
    else:
608
      params_copy[key] = val
609
  return params_copy
610

    
611

    
612
def _CheckOutputFields(static, dynamic, selected):
613
  """Checks whether all selected fields are valid.
614

615
  @type static: L{utils.FieldSet}
616
  @param static: static fields set
617
  @type dynamic: L{utils.FieldSet}
618
  @param dynamic: dynamic fields set
619

620
  """
621
  f = utils.FieldSet()
622
  f.Extend(static)
623
  f.Extend(dynamic)
624

    
625
  delta = f.NonMatching(selected)
626
  if delta:
627
    raise errors.OpPrereqError("Unknown output fields selected: %s"
628
                               % ",".join(delta), errors.ECODE_INVAL)
629

    
630

    
631
def _CheckGlobalHvParams(params):
632
  """Validates that given hypervisor params are not global ones.
633

634
  This will ensure that instances don't get customised versions of
635
  global params.
636

637
  """
638
  used_globals = constants.HVC_GLOBALS.intersection(params)
639
  if used_globals:
640
    msg = ("The following hypervisor parameters are global and cannot"
641
           " be customized at instance level, please modify them at"
642
           " cluster level: %s" % utils.CommaJoin(used_globals))
643
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
644

    
645

    
646
def _CheckNodeOnline(lu, node, msg=None):
647
  """Ensure that a given node is online.
648

649
  @param lu: the LU on behalf of which we make the check
650
  @param node: the node to check
651
  @param msg: if passed, should be a message to replace the default one
652
  @raise errors.OpPrereqError: if the node is offline
653

654
  """
655
  if msg is None:
656
    msg = "Can't use offline node"
657
  if lu.cfg.GetNodeInfo(node).offline:
658
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
659

    
660

    
661
def _CheckNodeNotDrained(lu, node):
662
  """Ensure that a given node is not drained.
663

664
  @param lu: the LU on behalf of which we make the check
665
  @param node: the node to check
666
  @raise errors.OpPrereqError: if the node is drained
667

668
  """
669
  if lu.cfg.GetNodeInfo(node).drained:
670
    raise errors.OpPrereqError("Can't use drained node %s" % node,
671
                               errors.ECODE_STATE)
672

    
673

    
674
def _CheckNodeVmCapable(lu, node):
675
  """Ensure that a given node is vm capable.
676

677
  @param lu: the LU on behalf of which we make the check
678
  @param node: the node to check
679
  @raise errors.OpPrereqError: if the node is not vm capable
680

681
  """
682
  if not lu.cfg.GetNodeInfo(node).vm_capable:
683
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
684
                               errors.ECODE_STATE)
685

    
686

    
687
def _CheckNodeHasOS(lu, node, os_name, force_variant):
688
  """Ensure that a node supports a given OS.
689

690
  @param lu: the LU on behalf of which we make the check
691
  @param node: the node to check
692
  @param os_name: the OS to query about
693
  @param force_variant: whether to ignore variant errors
694
  @raise errors.OpPrereqError: if the node is not supporting the OS
695

696
  """
697
  result = lu.rpc.call_os_get(node, os_name)
698
  result.Raise("OS '%s' not in supported OS list for node %s" %
699
               (os_name, node),
700
               prereq=True, ecode=errors.ECODE_INVAL)
701
  if not force_variant:
702
    _CheckOSVariant(result.payload, os_name)
703

    
704

    
705
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
706
  """Ensure that a node has the given secondary ip.
707

708
  @type lu: L{LogicalUnit}
709
  @param lu: the LU on behalf of which we make the check
710
  @type node: string
711
  @param node: the node to check
712
  @type secondary_ip: string
713
  @param secondary_ip: the ip to check
714
  @type prereq: boolean
715
  @param prereq: whether to throw a prerequisite or an execute error
716
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
717
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
718

719
  """
720
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
721
  result.Raise("Failure checking secondary ip on node %s" % node,
722
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
723
  if not result.payload:
724
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
725
           " please fix and re-run this command" % secondary_ip)
726
    if prereq:
727
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
728
    else:
729
      raise errors.OpExecError(msg)
730

    
731

    
732
def _GetClusterDomainSecret():
733
  """Reads the cluster domain secret.
734

735
  """
736
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
737
                               strict=True)
738

    
739

    
740
def _CheckInstanceDown(lu, instance, reason):
741
  """Ensure that an instance is not running."""
742
  if instance.admin_up:
743
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
744
                               (instance.name, reason), errors.ECODE_STATE)
745

    
746
  pnode = instance.primary_node
747
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
748
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
749
              prereq=True, ecode=errors.ECODE_ENVIRON)
750

    
751
  if instance.name in ins_l.payload:
752
    raise errors.OpPrereqError("Instance %s is running, %s" %
753
                               (instance.name, reason), errors.ECODE_STATE)
754

    
755

    
756
def _ExpandItemName(fn, name, kind):
757
  """Expand an item name.
758

759
  @param fn: the function to use for expansion
760
  @param name: requested item name
761
  @param kind: text description ('Node' or 'Instance')
762
  @return: the resolved (full) name
763
  @raise errors.OpPrereqError: if the item is not found
764

765
  """
766
  full_name = fn(name)
767
  if full_name is None:
768
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
769
                               errors.ECODE_NOENT)
770
  return full_name
771

    
772

    
773
def _ExpandNodeName(cfg, name):
774
  """Wrapper over L{_ExpandItemName} for nodes."""
775
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
776

    
777

    
778
def _ExpandInstanceName(cfg, name):
779
  """Wrapper over L{_ExpandItemName} for instance."""
780
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
781

    
782

    
783
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
784
                          memory, vcpus, nics, disk_template, disks,
785
                          bep, hvp, hypervisor_name):
786
  """Builds instance related env variables for hooks
787

788
  This builds the hook environment from individual variables.
789

790
  @type name: string
791
  @param name: the name of the instance
792
  @type primary_node: string
793
  @param primary_node: the name of the instance's primary node
794
  @type secondary_nodes: list
795
  @param secondary_nodes: list of secondary nodes as strings
796
  @type os_type: string
797
  @param os_type: the name of the instance's OS
798
  @type status: boolean
799
  @param status: the should_run status of the instance
800
  @type memory: string
801
  @param memory: the memory size of the instance
802
  @type vcpus: string
803
  @param vcpus: the count of VCPUs the instance has
804
  @type nics: list
805
  @param nics: list of tuples (ip, mac, mode, link) representing
806
      the NICs the instance has
807
  @type disk_template: string
808
  @param disk_template: the disk template of the instance
809
  @type disks: list
810
  @param disks: the list of (size, mode) pairs
811
  @type bep: dict
812
  @param bep: the backend parameters for the instance
813
  @type hvp: dict
814
  @param hvp: the hypervisor parameters for the instance
815
  @type hypervisor_name: string
816
  @param hypervisor_name: the hypervisor for the instance
817
  @rtype: dict
818
  @return: the hook environment for this instance
819

820
  """
821
  if status:
822
    str_status = "up"
823
  else:
824
    str_status = "down"
825
  env = {
826
    "OP_TARGET": name,
827
    "INSTANCE_NAME": name,
828
    "INSTANCE_PRIMARY": primary_node,
829
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
830
    "INSTANCE_OS_TYPE": os_type,
831
    "INSTANCE_STATUS": str_status,
832
    "INSTANCE_MEMORY": memory,
833
    "INSTANCE_VCPUS": vcpus,
834
    "INSTANCE_DISK_TEMPLATE": disk_template,
835
    "INSTANCE_HYPERVISOR": hypervisor_name,
836
  }
837

    
838
  if nics:
839
    nic_count = len(nics)
840
    for idx, (ip, mac, mode, link) in enumerate(nics):
841
      if ip is None:
842
        ip = ""
843
      env["INSTANCE_NIC%d_IP" % idx] = ip
844
      env["INSTANCE_NIC%d_MAC" % idx] = mac
845
      env["INSTANCE_NIC%d_MODE" % idx] = mode
846
      env["INSTANCE_NIC%d_LINK" % idx] = link
847
      if mode == constants.NIC_MODE_BRIDGED:
848
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
849
  else:
850
    nic_count = 0
851

    
852
  env["INSTANCE_NIC_COUNT"] = nic_count
853

    
854
  if disks:
855
    disk_count = len(disks)
856
    for idx, (size, mode) in enumerate(disks):
857
      env["INSTANCE_DISK%d_SIZE" % idx] = size
858
      env["INSTANCE_DISK%d_MODE" % idx] = mode
859
  else:
860
    disk_count = 0
861

    
862
  env["INSTANCE_DISK_COUNT"] = disk_count
863

    
864
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
865
    for key, value in source.items():
866
      env["INSTANCE_%s_%s" % (kind, key)] = value
867

    
868
  return env
869

    
870

    
871
def _NICListToTuple(lu, nics):
872
  """Build a list of nic information tuples.
873

874
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
875
  value in LUQueryInstanceData.
876

877
  @type lu:  L{LogicalUnit}
878
  @param lu: the logical unit on whose behalf we execute
879
  @type nics: list of L{objects.NIC}
880
  @param nics: list of nics to convert to hooks tuples
881

882
  """
883
  hooks_nics = []
884
  cluster = lu.cfg.GetClusterInfo()
885
  for nic in nics:
886
    ip = nic.ip
887
    mac = nic.mac
888
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
889
    mode = filled_params[constants.NIC_MODE]
890
    link = filled_params[constants.NIC_LINK]
891
    hooks_nics.append((ip, mac, mode, link))
892
  return hooks_nics
893

    
894

    
895
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
896
  """Builds instance related env variables for hooks from an object.
897

898
  @type lu: L{LogicalUnit}
899
  @param lu: the logical unit on whose behalf we execute
900
  @type instance: L{objects.Instance}
901
  @param instance: the instance for which we should build the
902
      environment
903
  @type override: dict
904
  @param override: dictionary with key/values that will override
905
      our values
906
  @rtype: dict
907
  @return: the hook environment dictionary
908

909
  """
910
  cluster = lu.cfg.GetClusterInfo()
911
  bep = cluster.FillBE(instance)
912
  hvp = cluster.FillHV(instance)
913
  args = {
914
    'name': instance.name,
915
    'primary_node': instance.primary_node,
916
    'secondary_nodes': instance.secondary_nodes,
917
    'os_type': instance.os,
918
    'status': instance.admin_up,
919
    'memory': bep[constants.BE_MEMORY],
920
    'vcpus': bep[constants.BE_VCPUS],
921
    'nics': _NICListToTuple(lu, instance.nics),
922
    'disk_template': instance.disk_template,
923
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
924
    'bep': bep,
925
    'hvp': hvp,
926
    'hypervisor_name': instance.hypervisor,
927
  }
928
  if override:
929
    args.update(override)
930
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
931

    
932

    
933
def _AdjustCandidatePool(lu, exceptions):
934
  """Adjust the candidate pool after node operations.
935

936
  """
937
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
938
  if mod_list:
939
    lu.LogInfo("Promoted nodes to master candidate role: %s",
940
               utils.CommaJoin(node.name for node in mod_list))
941
    for name in mod_list:
942
      lu.context.ReaddNode(name)
943
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
944
  if mc_now > mc_max:
945
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
946
               (mc_now, mc_max))
947

    
948

    
949
def _DecideSelfPromotion(lu, exceptions=None):
950
  """Decide whether I should promote myself as a master candidate.
951

952
  """
953
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
954
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
955
  # the new node will increase mc_max with one, so:
956
  mc_should = min(mc_should + 1, cp_size)
957
  return mc_now < mc_should
958

    
959

    
960
def _CheckNicsBridgesExist(lu, target_nics, target_node):
961
  """Check that the brigdes needed by a list of nics exist.
962

963
  """
964
  cluster = lu.cfg.GetClusterInfo()
965
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
966
  brlist = [params[constants.NIC_LINK] for params in paramslist
967
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
968
  if brlist:
969
    result = lu.rpc.call_bridges_exist(target_node, brlist)
970
    result.Raise("Error checking bridges on destination node '%s'" %
971
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
972

    
973

    
974
def _CheckInstanceBridgesExist(lu, instance, node=None):
975
  """Check that the brigdes needed by an instance exist.
976

977
  """
978
  if node is None:
979
    node = instance.primary_node
980
  _CheckNicsBridgesExist(lu, instance.nics, node)
981

    
982

    
983
def _CheckOSVariant(os_obj, name):
984
  """Check whether an OS name conforms to the os variants specification.
985

986
  @type os_obj: L{objects.OS}
987
  @param os_obj: OS object to check
988
  @type name: string
989
  @param name: OS name passed by the user, to check for validity
990

991
  """
992
  if not os_obj.supported_variants:
993
    return
994
  variant = objects.OS.GetVariant(name)
995
  if not variant:
996
    raise errors.OpPrereqError("OS name must include a variant",
997
                               errors.ECODE_INVAL)
998

    
999
  if variant not in os_obj.supported_variants:
1000
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1001

    
1002

    
1003
def _GetNodeInstancesInner(cfg, fn):
1004
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1005

    
1006

    
1007
def _GetNodeInstances(cfg, node_name):
1008
  """Returns a list of all primary and secondary instances on a node.
1009

1010
  """
1011

    
1012
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1013

    
1014

    
1015
def _GetNodePrimaryInstances(cfg, node_name):
1016
  """Returns primary instances on a node.
1017

1018
  """
1019
  return _GetNodeInstancesInner(cfg,
1020
                                lambda inst: node_name == inst.primary_node)
1021

    
1022

    
1023
def _GetNodeSecondaryInstances(cfg, node_name):
1024
  """Returns secondary instances on a node.
1025

1026
  """
1027
  return _GetNodeInstancesInner(cfg,
1028
                                lambda inst: node_name in inst.secondary_nodes)
1029

    
1030

    
1031
def _GetStorageTypeArgs(cfg, storage_type):
1032
  """Returns the arguments for a storage type.
1033

1034
  """
1035
  # Special case for file storage
1036
  if storage_type == constants.ST_FILE:
1037
    # storage.FileStorage wants a list of storage directories
1038
    return [[cfg.GetFileStorageDir()]]
1039

    
1040
  return []
1041

    
1042

    
1043
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1044
  faulty = []
1045

    
1046
  for dev in instance.disks:
1047
    cfg.SetDiskID(dev, node_name)
1048

    
1049
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1050
  result.Raise("Failed to get disk status from node %s" % node_name,
1051
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1052

    
1053
  for idx, bdev_status in enumerate(result.payload):
1054
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1055
      faulty.append(idx)
1056

    
1057
  return faulty
1058

    
1059

    
1060
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1061
  """Check the sanity of iallocator and node arguments and use the
1062
  cluster-wide iallocator if appropriate.
1063

1064
  Check that at most one of (iallocator, node) is specified. If none is
1065
  specified, then the LU's opcode's iallocator slot is filled with the
1066
  cluster-wide default iallocator.
1067

1068
  @type iallocator_slot: string
1069
  @param iallocator_slot: the name of the opcode iallocator slot
1070
  @type node_slot: string
1071
  @param node_slot: the name of the opcode target node slot
1072

1073
  """
1074
  node = getattr(lu.op, node_slot, None)
1075
  iallocator = getattr(lu.op, iallocator_slot, None)
1076

    
1077
  if node is not None and iallocator is not None:
1078
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1079
                               errors.ECODE_INVAL)
1080
  elif node is None and iallocator is None:
1081
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1082
    if default_iallocator:
1083
      setattr(lu.op, iallocator_slot, default_iallocator)
1084
    else:
1085
      raise errors.OpPrereqError("No iallocator or node given and no"
1086
                                 " cluster-wide default iallocator found."
1087
                                 " Please specify either an iallocator or a"
1088
                                 " node, or set a cluster-wide default"
1089
                                 " iallocator.")
1090

    
1091

    
1092
class LUPostInitCluster(LogicalUnit):
1093
  """Logical unit for running hooks after cluster initialization.
1094

1095
  """
1096
  HPATH = "cluster-init"
1097
  HTYPE = constants.HTYPE_CLUSTER
1098

    
1099
  def BuildHooksEnv(self):
1100
    """Build hooks env.
1101

1102
    """
1103
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1104
    mn = self.cfg.GetMasterNode()
1105
    return env, [], [mn]
1106

    
1107
  def Exec(self, feedback_fn):
1108
    """Nothing to do.
1109

1110
    """
1111
    return True
1112

    
1113

    
1114
class LUDestroyCluster(LogicalUnit):
1115
  """Logical unit for destroying the cluster.
1116

1117
  """
1118
  HPATH = "cluster-destroy"
1119
  HTYPE = constants.HTYPE_CLUSTER
1120

    
1121
  def BuildHooksEnv(self):
1122
    """Build hooks env.
1123

1124
    """
1125
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1126
    return env, [], []
1127

    
1128
  def CheckPrereq(self):
1129
    """Check prerequisites.
1130

1131
    This checks whether the cluster is empty.
1132

1133
    Any errors are signaled by raising errors.OpPrereqError.
1134

1135
    """
1136
    master = self.cfg.GetMasterNode()
1137

    
1138
    nodelist = self.cfg.GetNodeList()
1139
    if len(nodelist) != 1 or nodelist[0] != master:
1140
      raise errors.OpPrereqError("There are still %d node(s) in"
1141
                                 " this cluster." % (len(nodelist) - 1),
1142
                                 errors.ECODE_INVAL)
1143
    instancelist = self.cfg.GetInstanceList()
1144
    if instancelist:
1145
      raise errors.OpPrereqError("There are still %d instance(s) in"
1146
                                 " this cluster." % len(instancelist),
1147
                                 errors.ECODE_INVAL)
1148

    
1149
  def Exec(self, feedback_fn):
1150
    """Destroys the cluster.
1151

1152
    """
1153
    master = self.cfg.GetMasterNode()
1154

    
1155
    # Run post hooks on master node before it's removed
1156
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1157
    try:
1158
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1159
    except:
1160
      # pylint: disable-msg=W0702
1161
      self.LogWarning("Errors occurred running hooks on %s" % master)
1162

    
1163
    result = self.rpc.call_node_stop_master(master, False)
1164
    result.Raise("Could not disable the master role")
1165

    
1166
    return master
1167

    
1168

    
1169
def _VerifyCertificate(filename):
1170
  """Verifies a certificate for LUVerifyCluster.
1171

1172
  @type filename: string
1173
  @param filename: Path to PEM file
1174

1175
  """
1176
  try:
1177
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1178
                                           utils.ReadFile(filename))
1179
  except Exception, err: # pylint: disable-msg=W0703
1180
    return (LUVerifyCluster.ETYPE_ERROR,
1181
            "Failed to load X509 certificate %s: %s" % (filename, err))
1182

    
1183
  (errcode, msg) = \
1184
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1185
                                constants.SSL_CERT_EXPIRATION_ERROR)
1186

    
1187
  if msg:
1188
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1189
  else:
1190
    fnamemsg = None
1191

    
1192
  if errcode is None:
1193
    return (None, fnamemsg)
1194
  elif errcode == utils.CERT_WARNING:
1195
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1196
  elif errcode == utils.CERT_ERROR:
1197
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1198

    
1199
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1200

    
1201

    
1202
class LUVerifyCluster(LogicalUnit):
1203
  """Verifies the cluster status.
1204

1205
  """
1206
  HPATH = "cluster-verify"
1207
  HTYPE = constants.HTYPE_CLUSTER
1208
  REQ_BGL = False
1209

    
1210
  TCLUSTER = "cluster"
1211
  TNODE = "node"
1212
  TINSTANCE = "instance"
1213

    
1214
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1215
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1216
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1217
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1218
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1219
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1220
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1221
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1222
  ENODEDRBD = (TNODE, "ENODEDRBD")
1223
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1224
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1225
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1226
  ENODEHV = (TNODE, "ENODEHV")
1227
  ENODELVM = (TNODE, "ENODELVM")
1228
  ENODEN1 = (TNODE, "ENODEN1")
1229
  ENODENET = (TNODE, "ENODENET")
1230
  ENODEOS = (TNODE, "ENODEOS")
1231
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1232
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1233
  ENODERPC = (TNODE, "ENODERPC")
1234
  ENODESSH = (TNODE, "ENODESSH")
1235
  ENODEVERSION = (TNODE, "ENODEVERSION")
1236
  ENODESETUP = (TNODE, "ENODESETUP")
1237
  ENODETIME = (TNODE, "ENODETIME")
1238
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1239

    
1240
  ETYPE_FIELD = "code"
1241
  ETYPE_ERROR = "ERROR"
1242
  ETYPE_WARNING = "WARNING"
1243

    
1244
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1245

    
1246
  class NodeImage(object):
1247
    """A class representing the logical and physical status of a node.
1248

1249
    @type name: string
1250
    @ivar name: the node name to which this object refers
1251
    @ivar volumes: a structure as returned from
1252
        L{ganeti.backend.GetVolumeList} (runtime)
1253
    @ivar instances: a list of running instances (runtime)
1254
    @ivar pinst: list of configured primary instances (config)
1255
    @ivar sinst: list of configured secondary instances (config)
1256
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1257
        of this node (config)
1258
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1259
    @ivar dfree: free disk, as reported by the node (runtime)
1260
    @ivar offline: the offline status (config)
1261
    @type rpc_fail: boolean
1262
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1263
        not whether the individual keys were correct) (runtime)
1264
    @type lvm_fail: boolean
1265
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1266
    @type hyp_fail: boolean
1267
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1268
    @type ghost: boolean
1269
    @ivar ghost: whether this is a known node or not (config)
1270
    @type os_fail: boolean
1271
    @ivar os_fail: whether the RPC call didn't return valid OS data
1272
    @type oslist: list
1273
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1274
    @type vm_capable: boolean
1275
    @ivar vm_capable: whether the node can host instances
1276

1277
    """
1278
    def __init__(self, offline=False, name=None, vm_capable=True):
1279
      self.name = name
1280
      self.volumes = {}
1281
      self.instances = []
1282
      self.pinst = []
1283
      self.sinst = []
1284
      self.sbp = {}
1285
      self.mfree = 0
1286
      self.dfree = 0
1287
      self.offline = offline
1288
      self.vm_capable = vm_capable
1289
      self.rpc_fail = False
1290
      self.lvm_fail = False
1291
      self.hyp_fail = False
1292
      self.ghost = False
1293
      self.os_fail = False
1294
      self.oslist = {}
1295

    
1296
  def ExpandNames(self):
1297
    self.needed_locks = {
1298
      locking.LEVEL_NODE: locking.ALL_SET,
1299
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1300
    }
1301
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1302

    
1303
  def _Error(self, ecode, item, msg, *args, **kwargs):
1304
    """Format an error message.
1305

1306
    Based on the opcode's error_codes parameter, either format a
1307
    parseable error code, or a simpler error string.
1308

1309
    This must be called only from Exec and functions called from Exec.
1310

1311
    """
1312
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1313
    itype, etxt = ecode
1314
    # first complete the msg
1315
    if args:
1316
      msg = msg % args
1317
    # then format the whole message
1318
    if self.op.error_codes:
1319
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1320
    else:
1321
      if item:
1322
        item = " " + item
1323
      else:
1324
        item = ""
1325
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1326
    # and finally report it via the feedback_fn
1327
    self._feedback_fn("  - %s" % msg)
1328

    
1329
  def _ErrorIf(self, cond, *args, **kwargs):
1330
    """Log an error message if the passed condition is True.
1331

1332
    """
1333
    cond = bool(cond) or self.op.debug_simulate_errors
1334
    if cond:
1335
      self._Error(*args, **kwargs)
1336
    # do not mark the operation as failed for WARN cases only
1337
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1338
      self.bad = self.bad or cond
1339

    
1340
  def _VerifyNode(self, ninfo, nresult):
1341
    """Perform some basic validation on data returned from a node.
1342

1343
      - check the result data structure is well formed and has all the
1344
        mandatory fields
1345
      - check ganeti version
1346

1347
    @type ninfo: L{objects.Node}
1348
    @param ninfo: the node to check
1349
    @param nresult: the results from the node
1350
    @rtype: boolean
1351
    @return: whether overall this call was successful (and we can expect
1352
         reasonable values in the respose)
1353

1354
    """
1355
    node = ninfo.name
1356
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1357

    
1358
    # main result, nresult should be a non-empty dict
1359
    test = not nresult or not isinstance(nresult, dict)
1360
    _ErrorIf(test, self.ENODERPC, node,
1361
                  "unable to verify node: no data returned")
1362
    if test:
1363
      return False
1364

    
1365
    # compares ganeti version
1366
    local_version = constants.PROTOCOL_VERSION
1367
    remote_version = nresult.get("version", None)
1368
    test = not (remote_version and
1369
                isinstance(remote_version, (list, tuple)) and
1370
                len(remote_version) == 2)
1371
    _ErrorIf(test, self.ENODERPC, node,
1372
             "connection to node returned invalid data")
1373
    if test:
1374
      return False
1375

    
1376
    test = local_version != remote_version[0]
1377
    _ErrorIf(test, self.ENODEVERSION, node,
1378
             "incompatible protocol versions: master %s,"
1379
             " node %s", local_version, remote_version[0])
1380
    if test:
1381
      return False
1382

    
1383
    # node seems compatible, we can actually try to look into its results
1384

    
1385
    # full package version
1386
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1387
                  self.ENODEVERSION, node,
1388
                  "software version mismatch: master %s, node %s",
1389
                  constants.RELEASE_VERSION, remote_version[1],
1390
                  code=self.ETYPE_WARNING)
1391

    
1392
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1393
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1394
      for hv_name, hv_result in hyp_result.iteritems():
1395
        test = hv_result is not None
1396
        _ErrorIf(test, self.ENODEHV, node,
1397
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1398

    
1399
    test = nresult.get(constants.NV_NODESETUP,
1400
                           ["Missing NODESETUP results"])
1401
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1402
             "; ".join(test))
1403

    
1404
    return True
1405

    
1406
  def _VerifyNodeTime(self, ninfo, nresult,
1407
                      nvinfo_starttime, nvinfo_endtime):
1408
    """Check the node time.
1409

1410
    @type ninfo: L{objects.Node}
1411
    @param ninfo: the node to check
1412
    @param nresult: the remote results for the node
1413
    @param nvinfo_starttime: the start time of the RPC call
1414
    @param nvinfo_endtime: the end time of the RPC call
1415

1416
    """
1417
    node = ninfo.name
1418
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1419

    
1420
    ntime = nresult.get(constants.NV_TIME, None)
1421
    try:
1422
      ntime_merged = utils.MergeTime(ntime)
1423
    except (ValueError, TypeError):
1424
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1425
      return
1426

    
1427
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1428
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1429
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1430
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1431
    else:
1432
      ntime_diff = None
1433

    
1434
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1435
             "Node time diverges by at least %s from master node time",
1436
             ntime_diff)
1437

    
1438
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1439
    """Check the node time.
1440

1441
    @type ninfo: L{objects.Node}
1442
    @param ninfo: the node to check
1443
    @param nresult: the remote results for the node
1444
    @param vg_name: the configured VG name
1445

1446
    """
1447
    if vg_name is None:
1448
      return
1449

    
1450
    node = ninfo.name
1451
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1452

    
1453
    # checks vg existence and size > 20G
1454
    vglist = nresult.get(constants.NV_VGLIST, None)
1455
    test = not vglist
1456
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1457
    if not test:
1458
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1459
                                            constants.MIN_VG_SIZE)
1460
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1461

    
1462
    # check pv names
1463
    pvlist = nresult.get(constants.NV_PVLIST, None)
1464
    test = pvlist is None
1465
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1466
    if not test:
1467
      # check that ':' is not present in PV names, since it's a
1468
      # special character for lvcreate (denotes the range of PEs to
1469
      # use on the PV)
1470
      for _, pvname, owner_vg in pvlist:
1471
        test = ":" in pvname
1472
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1473
                 " '%s' of VG '%s'", pvname, owner_vg)
1474

    
1475
  def _VerifyNodeNetwork(self, ninfo, nresult):
1476
    """Check the node time.
1477

1478
    @type ninfo: L{objects.Node}
1479
    @param ninfo: the node to check
1480
    @param nresult: the remote results for the node
1481

1482
    """
1483
    node = ninfo.name
1484
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1485

    
1486
    test = constants.NV_NODELIST not in nresult
1487
    _ErrorIf(test, self.ENODESSH, node,
1488
             "node hasn't returned node ssh connectivity data")
1489
    if not test:
1490
      if nresult[constants.NV_NODELIST]:
1491
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1492
          _ErrorIf(True, self.ENODESSH, node,
1493
                   "ssh communication with node '%s': %s", a_node, a_msg)
1494

    
1495
    test = constants.NV_NODENETTEST not in nresult
1496
    _ErrorIf(test, self.ENODENET, node,
1497
             "node hasn't returned node tcp connectivity data")
1498
    if not test:
1499
      if nresult[constants.NV_NODENETTEST]:
1500
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1501
        for anode in nlist:
1502
          _ErrorIf(True, self.ENODENET, node,
1503
                   "tcp communication with node '%s': %s",
1504
                   anode, nresult[constants.NV_NODENETTEST][anode])
1505

    
1506
    test = constants.NV_MASTERIP not in nresult
1507
    _ErrorIf(test, self.ENODENET, node,
1508
             "node hasn't returned node master IP reachability data")
1509
    if not test:
1510
      if not nresult[constants.NV_MASTERIP]:
1511
        if node == self.master_node:
1512
          msg = "the master node cannot reach the master IP (not configured?)"
1513
        else:
1514
          msg = "cannot reach the master IP"
1515
        _ErrorIf(True, self.ENODENET, node, msg)
1516

    
1517
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1518
                      diskstatus):
1519
    """Verify an instance.
1520

1521
    This function checks to see if the required block devices are
1522
    available on the instance's node.
1523

1524
    """
1525
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1526
    node_current = instanceconfig.primary_node
1527

    
1528
    node_vol_should = {}
1529
    instanceconfig.MapLVsByNode(node_vol_should)
1530

    
1531
    for node in node_vol_should:
1532
      n_img = node_image[node]
1533
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1534
        # ignore missing volumes on offline or broken nodes
1535
        continue
1536
      for volume in node_vol_should[node]:
1537
        test = volume not in n_img.volumes
1538
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1539
                 "volume %s missing on node %s", volume, node)
1540

    
1541
    if instanceconfig.admin_up:
1542
      pri_img = node_image[node_current]
1543
      test = instance not in pri_img.instances and not pri_img.offline
1544
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1545
               "instance not running on its primary node %s",
1546
               node_current)
1547

    
1548
    for node, n_img in node_image.items():
1549
      if (not node == node_current):
1550
        test = instance in n_img.instances
1551
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1552
                 "instance should not run on node %s", node)
1553

    
1554
    diskdata = [(nname, success, status, idx)
1555
                for (nname, disks) in diskstatus.items()
1556
                for idx, (success, status) in enumerate(disks)]
1557

    
1558
    for nname, success, bdev_status, idx in diskdata:
1559
      _ErrorIf(instanceconfig.admin_up and not success,
1560
               self.EINSTANCEFAULTYDISK, instance,
1561
               "couldn't retrieve status for disk/%s on %s: %s",
1562
               idx, nname, bdev_status)
1563
      _ErrorIf((instanceconfig.admin_up and success and
1564
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1565
               self.EINSTANCEFAULTYDISK, instance,
1566
               "disk/%s on %s is faulty", idx, nname)
1567

    
1568
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1569
    """Verify if there are any unknown volumes in the cluster.
1570

1571
    The .os, .swap and backup volumes are ignored. All other volumes are
1572
    reported as unknown.
1573

1574
    @type reserved: L{ganeti.utils.FieldSet}
1575
    @param reserved: a FieldSet of reserved volume names
1576

1577
    """
1578
    for node, n_img in node_image.items():
1579
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1580
        # skip non-healthy nodes
1581
        continue
1582
      for volume in n_img.volumes:
1583
        test = ((node not in node_vol_should or
1584
                volume not in node_vol_should[node]) and
1585
                not reserved.Matches(volume))
1586
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1587
                      "volume %s is unknown", volume)
1588

    
1589
  def _VerifyOrphanInstances(self, instancelist, node_image):
1590
    """Verify the list of running instances.
1591

1592
    This checks what instances are running but unknown to the cluster.
1593

1594
    """
1595
    for node, n_img in node_image.items():
1596
      for o_inst in n_img.instances:
1597
        test = o_inst not in instancelist
1598
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1599
                      "instance %s on node %s should not exist", o_inst, node)
1600

    
1601
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1602
    """Verify N+1 Memory Resilience.
1603

1604
    Check that if one single node dies we can still start all the
1605
    instances it was primary for.
1606

1607
    """
1608
    for node, n_img in node_image.items():
1609
      # This code checks that every node which is now listed as
1610
      # secondary has enough memory to host all instances it is
1611
      # supposed to should a single other node in the cluster fail.
1612
      # FIXME: not ready for failover to an arbitrary node
1613
      # FIXME: does not support file-backed instances
1614
      # WARNING: we currently take into account down instances as well
1615
      # as up ones, considering that even if they're down someone
1616
      # might want to start them even in the event of a node failure.
1617
      for prinode, instances in n_img.sbp.items():
1618
        needed_mem = 0
1619
        for instance in instances:
1620
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1621
          if bep[constants.BE_AUTO_BALANCE]:
1622
            needed_mem += bep[constants.BE_MEMORY]
1623
        test = n_img.mfree < needed_mem
1624
        self._ErrorIf(test, self.ENODEN1, node,
1625
                      "not enough memory to accomodate instance failovers"
1626
                      " should node %s fail", prinode)
1627

    
1628
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1629
                       master_files):
1630
    """Verifies and computes the node required file checksums.
1631

1632
    @type ninfo: L{objects.Node}
1633
    @param ninfo: the node to check
1634
    @param nresult: the remote results for the node
1635
    @param file_list: required list of files
1636
    @param local_cksum: dictionary of local files and their checksums
1637
    @param master_files: list of files that only masters should have
1638

1639
    """
1640
    node = ninfo.name
1641
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1642

    
1643
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1644
    test = not isinstance(remote_cksum, dict)
1645
    _ErrorIf(test, self.ENODEFILECHECK, node,
1646
             "node hasn't returned file checksum data")
1647
    if test:
1648
      return
1649

    
1650
    for file_name in file_list:
1651
      node_is_mc = ninfo.master_candidate
1652
      must_have = (file_name not in master_files) or node_is_mc
1653
      # missing
1654
      test1 = file_name not in remote_cksum
1655
      # invalid checksum
1656
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1657
      # existing and good
1658
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1659
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1660
               "file '%s' missing", file_name)
1661
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1662
               "file '%s' has wrong checksum", file_name)
1663
      # not candidate and this is not a must-have file
1664
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1665
               "file '%s' should not exist on non master"
1666
               " candidates (and the file is outdated)", file_name)
1667
      # all good, except non-master/non-must have combination
1668
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1669
               "file '%s' should not exist"
1670
               " on non master candidates", file_name)
1671

    
1672
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1673
                      drbd_map):
1674
    """Verifies and the node DRBD status.
1675

1676
    @type ninfo: L{objects.Node}
1677
    @param ninfo: the node to check
1678
    @param nresult: the remote results for the node
1679
    @param instanceinfo: the dict of instances
1680
    @param drbd_helper: the configured DRBD usermode helper
1681
    @param drbd_map: the DRBD map as returned by
1682
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1683

1684
    """
1685
    node = ninfo.name
1686
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1687

    
1688
    if drbd_helper:
1689
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1690
      test = (helper_result == None)
1691
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1692
               "no drbd usermode helper returned")
1693
      if helper_result:
1694
        status, payload = helper_result
1695
        test = not status
1696
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1697
                 "drbd usermode helper check unsuccessful: %s", payload)
1698
        test = status and (payload != drbd_helper)
1699
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1700
                 "wrong drbd usermode helper: %s", payload)
1701

    
1702
    # compute the DRBD minors
1703
    node_drbd = {}
1704
    for minor, instance in drbd_map[node].items():
1705
      test = instance not in instanceinfo
1706
      _ErrorIf(test, self.ECLUSTERCFG, None,
1707
               "ghost instance '%s' in temporary DRBD map", instance)
1708
        # ghost instance should not be running, but otherwise we
1709
        # don't give double warnings (both ghost instance and
1710
        # unallocated minor in use)
1711
      if test:
1712
        node_drbd[minor] = (instance, False)
1713
      else:
1714
        instance = instanceinfo[instance]
1715
        node_drbd[minor] = (instance.name, instance.admin_up)
1716

    
1717
    # and now check them
1718
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1719
    test = not isinstance(used_minors, (tuple, list))
1720
    _ErrorIf(test, self.ENODEDRBD, node,
1721
             "cannot parse drbd status file: %s", str(used_minors))
1722
    if test:
1723
      # we cannot check drbd status
1724
      return
1725

    
1726
    for minor, (iname, must_exist) in node_drbd.items():
1727
      test = minor not in used_minors and must_exist
1728
      _ErrorIf(test, self.ENODEDRBD, node,
1729
               "drbd minor %d of instance %s is not active", minor, iname)
1730
    for minor in used_minors:
1731
      test = minor not in node_drbd
1732
      _ErrorIf(test, self.ENODEDRBD, node,
1733
               "unallocated drbd minor %d is in use", minor)
1734

    
1735
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1736
    """Builds the node OS structures.
1737

1738
    @type ninfo: L{objects.Node}
1739
    @param ninfo: the node to check
1740
    @param nresult: the remote results for the node
1741
    @param nimg: the node image object
1742

1743
    """
1744
    node = ninfo.name
1745
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1746

    
1747
    remote_os = nresult.get(constants.NV_OSLIST, None)
1748
    test = (not isinstance(remote_os, list) or
1749
            not compat.all(isinstance(v, list) and len(v) == 7
1750
                           for v in remote_os))
1751

    
1752
    _ErrorIf(test, self.ENODEOS, node,
1753
             "node hasn't returned valid OS data")
1754

    
1755
    nimg.os_fail = test
1756

    
1757
    if test:
1758
      return
1759

    
1760
    os_dict = {}
1761

    
1762
    for (name, os_path, status, diagnose,
1763
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1764

    
1765
      if name not in os_dict:
1766
        os_dict[name] = []
1767

    
1768
      # parameters is a list of lists instead of list of tuples due to
1769
      # JSON lacking a real tuple type, fix it:
1770
      parameters = [tuple(v) for v in parameters]
1771
      os_dict[name].append((os_path, status, diagnose,
1772
                            set(variants), set(parameters), set(api_ver)))
1773

    
1774
    nimg.oslist = os_dict
1775

    
1776
  def _VerifyNodeOS(self, ninfo, nimg, base):
1777
    """Verifies the node OS list.
1778

1779
    @type ninfo: L{objects.Node}
1780
    @param ninfo: the node to check
1781
    @param nimg: the node image object
1782
    @param base: the 'template' node we match against (e.g. from the master)
1783

1784
    """
1785
    node = ninfo.name
1786
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1787

    
1788
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1789

    
1790
    for os_name, os_data in nimg.oslist.items():
1791
      assert os_data, "Empty OS status for OS %s?!" % os_name
1792
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1793
      _ErrorIf(not f_status, self.ENODEOS, node,
1794
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1795
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1796
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1797
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1798
      # this will catched in backend too
1799
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1800
               and not f_var, self.ENODEOS, node,
1801
               "OS %s with API at least %d does not declare any variant",
1802
               os_name, constants.OS_API_V15)
1803
      # comparisons with the 'base' image
1804
      test = os_name not in base.oslist
1805
      _ErrorIf(test, self.ENODEOS, node,
1806
               "Extra OS %s not present on reference node (%s)",
1807
               os_name, base.name)
1808
      if test:
1809
        continue
1810
      assert base.oslist[os_name], "Base node has empty OS status?"
1811
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1812
      if not b_status:
1813
        # base OS is invalid, skipping
1814
        continue
1815
      for kind, a, b in [("API version", f_api, b_api),
1816
                         ("variants list", f_var, b_var),
1817
                         ("parameters", f_param, b_param)]:
1818
        _ErrorIf(a != b, self.ENODEOS, node,
1819
                 "OS %s %s differs from reference node %s: %s vs. %s",
1820
                 kind, os_name, base.name,
1821
                 utils.CommaJoin(a), utils.CommaJoin(b))
1822

    
1823
    # check any missing OSes
1824
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1825
    _ErrorIf(missing, self.ENODEOS, node,
1826
             "OSes present on reference node %s but missing on this node: %s",
1827
             base.name, utils.CommaJoin(missing))
1828

    
1829
  def _VerifyOob(self, ninfo, nresult):
1830
    """Verifies out of band functionality of a node.
1831

1832
    @type ninfo: L{objects.Node}
1833
    @param ninfo: the node to check
1834
    @param nresult: the remote results for the node
1835

1836
    """
1837
    node = ninfo.name
1838
    # We just have to verify the paths on master and/or master candidates
1839
    # as the oob helper is invoked on the master
1840
    if ((ninfo.master_candidate or ninfo.master) and
1841
        constants.NV_OOB_PATHS in nresult):
1842
      for path_result in nresult[constants.NV_OOB_PATHS]:
1843
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1844

    
1845
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1846
    """Verifies and updates the node volume data.
1847

1848
    This function will update a L{NodeImage}'s internal structures
1849
    with data from the remote call.
1850

1851
    @type ninfo: L{objects.Node}
1852
    @param ninfo: the node to check
1853
    @param nresult: the remote results for the node
1854
    @param nimg: the node image object
1855
    @param vg_name: the configured VG name
1856

1857
    """
1858
    node = ninfo.name
1859
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1860

    
1861
    nimg.lvm_fail = True
1862
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1863
    if vg_name is None:
1864
      pass
1865
    elif isinstance(lvdata, basestring):
1866
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1867
               utils.SafeEncode(lvdata))
1868
    elif not isinstance(lvdata, dict):
1869
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1870
    else:
1871
      nimg.volumes = lvdata
1872
      nimg.lvm_fail = False
1873

    
1874
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1875
    """Verifies and updates the node instance list.
1876

1877
    If the listing was successful, then updates this node's instance
1878
    list. Otherwise, it marks the RPC call as failed for the instance
1879
    list key.
1880

1881
    @type ninfo: L{objects.Node}
1882
    @param ninfo: the node to check
1883
    @param nresult: the remote results for the node
1884
    @param nimg: the node image object
1885

1886
    """
1887
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1888
    test = not isinstance(idata, list)
1889
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1890
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1891
    if test:
1892
      nimg.hyp_fail = True
1893
    else:
1894
      nimg.instances = idata
1895

    
1896
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1897
    """Verifies and computes a node information map
1898

1899
    @type ninfo: L{objects.Node}
1900
    @param ninfo: the node to check
1901
    @param nresult: the remote results for the node
1902
    @param nimg: the node image object
1903
    @param vg_name: the configured VG name
1904

1905
    """
1906
    node = ninfo.name
1907
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1908

    
1909
    # try to read free memory (from the hypervisor)
1910
    hv_info = nresult.get(constants.NV_HVINFO, None)
1911
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1912
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1913
    if not test:
1914
      try:
1915
        nimg.mfree = int(hv_info["memory_free"])
1916
      except (ValueError, TypeError):
1917
        _ErrorIf(True, self.ENODERPC, node,
1918
                 "node returned invalid nodeinfo, check hypervisor")
1919

    
1920
    # FIXME: devise a free space model for file based instances as well
1921
    if vg_name is not None:
1922
      test = (constants.NV_VGLIST not in nresult or
1923
              vg_name not in nresult[constants.NV_VGLIST])
1924
      _ErrorIf(test, self.ENODELVM, node,
1925
               "node didn't return data for the volume group '%s'"
1926
               " - it is either missing or broken", vg_name)
1927
      if not test:
1928
        try:
1929
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1930
        except (ValueError, TypeError):
1931
          _ErrorIf(True, self.ENODERPC, node,
1932
                   "node returned invalid LVM info, check LVM status")
1933

    
1934
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1935
    """Gets per-disk status information for all instances.
1936

1937
    @type nodelist: list of strings
1938
    @param nodelist: Node names
1939
    @type node_image: dict of (name, L{objects.Node})
1940
    @param node_image: Node objects
1941
    @type instanceinfo: dict of (name, L{objects.Instance})
1942
    @param instanceinfo: Instance objects
1943
    @rtype: {instance: {node: [(succes, payload)]}}
1944
    @return: a dictionary of per-instance dictionaries with nodes as
1945
        keys and disk information as values; the disk information is a
1946
        list of tuples (success, payload)
1947

1948
    """
1949
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1950

    
1951
    node_disks = {}
1952
    node_disks_devonly = {}
1953
    diskless_instances = set()
1954
    diskless = constants.DT_DISKLESS
1955

    
1956
    for nname in nodelist:
1957
      node_instances = list(itertools.chain(node_image[nname].pinst,
1958
                                            node_image[nname].sinst))
1959
      diskless_instances.update(inst for inst in node_instances
1960
                                if instanceinfo[inst].disk_template == diskless)
1961
      disks = [(inst, disk)
1962
               for inst in node_instances
1963
               for disk in instanceinfo[inst].disks]
1964

    
1965
      if not disks:
1966
        # No need to collect data
1967
        continue
1968

    
1969
      node_disks[nname] = disks
1970

    
1971
      # Creating copies as SetDiskID below will modify the objects and that can
1972
      # lead to incorrect data returned from nodes
1973
      devonly = [dev.Copy() for (_, dev) in disks]
1974

    
1975
      for dev in devonly:
1976
        self.cfg.SetDiskID(dev, nname)
1977

    
1978
      node_disks_devonly[nname] = devonly
1979

    
1980
    assert len(node_disks) == len(node_disks_devonly)
1981

    
1982
    # Collect data from all nodes with disks
1983
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
1984
                                                          node_disks_devonly)
1985

    
1986
    assert len(result) == len(node_disks)
1987

    
1988
    instdisk = {}
1989

    
1990
    for (nname, nres) in result.items():
1991
      disks = node_disks[nname]
1992

    
1993
      if nres.offline:
1994
        # No data from this node
1995
        data = len(disks) * [(False, "node offline")]
1996
      else:
1997
        msg = nres.fail_msg
1998
        _ErrorIf(msg, self.ENODERPC, nname,
1999
                 "while getting disk information: %s", msg)
2000
        if msg:
2001
          # No data from this node
2002
          data = len(disks) * [(False, msg)]
2003
        else:
2004
          data = []
2005
          for idx, i in enumerate(nres.payload):
2006
            if isinstance(i, (tuple, list)) and len(i) == 2:
2007
              data.append(i)
2008
            else:
2009
              logging.warning("Invalid result from node %s, entry %d: %s",
2010
                              nname, idx, i)
2011
              data.append((False, "Invalid result from the remote node"))
2012

    
2013
      for ((inst, _), status) in zip(disks, data):
2014
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2015

    
2016
    # Add empty entries for diskless instances.
2017
    for inst in diskless_instances:
2018
      assert inst not in instdisk
2019
      instdisk[inst] = {}
2020

    
2021
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2022
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2023
                      compat.all(isinstance(s, (tuple, list)) and
2024
                                 len(s) == 2 for s in statuses)
2025
                      for inst, nnames in instdisk.items()
2026
                      for nname, statuses in nnames.items())
2027
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2028

    
2029
    return instdisk
2030

    
2031
  def BuildHooksEnv(self):
2032
    """Build hooks env.
2033

2034
    Cluster-Verify hooks just ran in the post phase and their failure makes
2035
    the output be logged in the verify output and the verification to fail.
2036

2037
    """
2038
    all_nodes = self.cfg.GetNodeList()
2039
    env = {
2040
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2041
      }
2042
    for node in self.cfg.GetAllNodesInfo().values():
2043
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2044

    
2045
    return env, [], all_nodes
2046

    
2047
  def Exec(self, feedback_fn):
2048
    """Verify integrity of cluster, performing various test on nodes.
2049

2050
    """
2051
    self.bad = False
2052
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2053
    verbose = self.op.verbose
2054
    self._feedback_fn = feedback_fn
2055
    feedback_fn("* Verifying global settings")
2056
    for msg in self.cfg.VerifyConfig():
2057
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2058

    
2059
    # Check the cluster certificates
2060
    for cert_filename in constants.ALL_CERT_FILES:
2061
      (errcode, msg) = _VerifyCertificate(cert_filename)
2062
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2063

    
2064
    vg_name = self.cfg.GetVGName()
2065
    drbd_helper = self.cfg.GetDRBDHelper()
2066
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2067
    cluster = self.cfg.GetClusterInfo()
2068
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2069
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2070
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2071
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2072
                        for iname in instancelist)
2073
    i_non_redundant = [] # Non redundant instances
2074
    i_non_a_balanced = [] # Non auto-balanced instances
2075
    n_offline = 0 # Count of offline nodes
2076
    n_drained = 0 # Count of nodes being drained
2077
    node_vol_should = {}
2078

    
2079
    # FIXME: verify OS list
2080
    # do local checksums
2081
    master_files = [constants.CLUSTER_CONF_FILE]
2082
    master_node = self.master_node = self.cfg.GetMasterNode()
2083
    master_ip = self.cfg.GetMasterIP()
2084

    
2085
    file_names = ssconf.SimpleStore().GetFileList()
2086
    file_names.extend(constants.ALL_CERT_FILES)
2087
    file_names.extend(master_files)
2088
    if cluster.modify_etc_hosts:
2089
      file_names.append(constants.ETC_HOSTS)
2090

    
2091
    local_checksums = utils.FingerprintFiles(file_names)
2092

    
2093
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2094
    node_verify_param = {
2095
      constants.NV_FILELIST: file_names,
2096
      constants.NV_NODELIST: [node.name for node in nodeinfo
2097
                              if not node.offline],
2098
      constants.NV_HYPERVISOR: hypervisors,
2099
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2100
                                  node.secondary_ip) for node in nodeinfo
2101
                                 if not node.offline],
2102
      constants.NV_INSTANCELIST: hypervisors,
2103
      constants.NV_VERSION: None,
2104
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2105
      constants.NV_NODESETUP: None,
2106
      constants.NV_TIME: None,
2107
      constants.NV_MASTERIP: (master_node, master_ip),
2108
      constants.NV_OSLIST: None,
2109
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2110
      }
2111

    
2112
    if vg_name is not None:
2113
      node_verify_param[constants.NV_VGLIST] = None
2114
      node_verify_param[constants.NV_LVLIST] = vg_name
2115
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2116
      node_verify_param[constants.NV_DRBDLIST] = None
2117

    
2118
    if drbd_helper:
2119
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2120

    
2121
    # Build our expected cluster state
2122
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2123
                                                 name=node.name,
2124
                                                 vm_capable=node.vm_capable))
2125
                      for node in nodeinfo)
2126

    
2127
    # Gather OOB paths
2128
    oob_paths = []
2129
    for node in nodeinfo:
2130
      path = _SupportsOob(self.cfg, node)
2131
      if path and path not in oob_paths:
2132
        oob_paths.append(path)
2133

    
2134
    if oob_paths:
2135
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2136

    
2137
    for instance in instancelist:
2138
      inst_config = instanceinfo[instance]
2139

    
2140
      for nname in inst_config.all_nodes:
2141
        if nname not in node_image:
2142
          # ghost node
2143
          gnode = self.NodeImage(name=nname)
2144
          gnode.ghost = True
2145
          node_image[nname] = gnode
2146

    
2147
      inst_config.MapLVsByNode(node_vol_should)
2148

    
2149
      pnode = inst_config.primary_node
2150
      node_image[pnode].pinst.append(instance)
2151

    
2152
      for snode in inst_config.secondary_nodes:
2153
        nimg = node_image[snode]
2154
        nimg.sinst.append(instance)
2155
        if pnode not in nimg.sbp:
2156
          nimg.sbp[pnode] = []
2157
        nimg.sbp[pnode].append(instance)
2158

    
2159
    # At this point, we have the in-memory data structures complete,
2160
    # except for the runtime information, which we'll gather next
2161

    
2162
    # Due to the way our RPC system works, exact response times cannot be
2163
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2164
    # time before and after executing the request, we can at least have a time
2165
    # window.
2166
    nvinfo_starttime = time.time()
2167
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2168
                                           self.cfg.GetClusterName())
2169
    nvinfo_endtime = time.time()
2170

    
2171
    all_drbd_map = self.cfg.ComputeDRBDMap()
2172

    
2173
    feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2174
    instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2175

    
2176
    feedback_fn("* Verifying node status")
2177

    
2178
    refos_img = None
2179

    
2180
    for node_i in nodeinfo:
2181
      node = node_i.name
2182
      nimg = node_image[node]
2183

    
2184
      if node_i.offline:
2185
        if verbose:
2186
          feedback_fn("* Skipping offline node %s" % (node,))
2187
        n_offline += 1
2188
        continue
2189

    
2190
      if node == master_node:
2191
        ntype = "master"
2192
      elif node_i.master_candidate:
2193
        ntype = "master candidate"
2194
      elif node_i.drained:
2195
        ntype = "drained"
2196
        n_drained += 1
2197
      else:
2198
        ntype = "regular"
2199
      if verbose:
2200
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2201

    
2202
      msg = all_nvinfo[node].fail_msg
2203
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2204
      if msg:
2205
        nimg.rpc_fail = True
2206
        continue
2207

    
2208
      nresult = all_nvinfo[node].payload
2209

    
2210
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2211
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2212
      self._VerifyNodeNetwork(node_i, nresult)
2213
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2214
                            master_files)
2215

    
2216
      self._VerifyOob(node_i, nresult)
2217

    
2218
      if nimg.vm_capable:
2219
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2220
        self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2221
                             all_drbd_map)
2222

    
2223
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2224
        self._UpdateNodeInstances(node_i, nresult, nimg)
2225
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2226
        self._UpdateNodeOS(node_i, nresult, nimg)
2227
        if not nimg.os_fail:
2228
          if refos_img is None:
2229
            refos_img = nimg
2230
          self._VerifyNodeOS(node_i, nimg, refos_img)
2231

    
2232
    feedback_fn("* Verifying instance status")
2233
    for instance in instancelist:
2234
      if verbose:
2235
        feedback_fn("* Verifying instance %s" % instance)
2236
      inst_config = instanceinfo[instance]
2237
      self._VerifyInstance(instance, inst_config, node_image,
2238
                           instdisk[instance])
2239
      inst_nodes_offline = []
2240

    
2241
      pnode = inst_config.primary_node
2242
      pnode_img = node_image[pnode]
2243
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2244
               self.ENODERPC, pnode, "instance %s, connection to"
2245
               " primary node failed", instance)
2246

    
2247
      if pnode_img.offline:
2248
        inst_nodes_offline.append(pnode)
2249

    
2250
      # If the instance is non-redundant we cannot survive losing its primary
2251
      # node, so we are not N+1 compliant. On the other hand we have no disk
2252
      # templates with more than one secondary so that situation is not well
2253
      # supported either.
2254
      # FIXME: does not support file-backed instances
2255
      if not inst_config.secondary_nodes:
2256
        i_non_redundant.append(instance)
2257
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2258
               instance, "instance has multiple secondary nodes: %s",
2259
               utils.CommaJoin(inst_config.secondary_nodes),
2260
               code=self.ETYPE_WARNING)
2261

    
2262
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2263
        i_non_a_balanced.append(instance)
2264

    
2265
      for snode in inst_config.secondary_nodes:
2266
        s_img = node_image[snode]
2267
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2268
                 "instance %s, connection to secondary node failed", instance)
2269

    
2270
        if s_img.offline:
2271
          inst_nodes_offline.append(snode)
2272

    
2273
      # warn that the instance lives on offline nodes
2274
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2275
               "instance lives on offline node(s) %s",
2276
               utils.CommaJoin(inst_nodes_offline))
2277
      # ... or ghost/non-vm_capable nodes
2278
      for node in inst_config.all_nodes:
2279
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2280
                 "instance lives on ghost node %s", node)
2281
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2282
                 instance, "instance lives on non-vm_capable node %s", node)
2283

    
2284
    feedback_fn("* Verifying orphan volumes")
2285
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2286
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2287

    
2288
    feedback_fn("* Verifying orphan instances")
2289
    self._VerifyOrphanInstances(instancelist, node_image)
2290

    
2291
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2292
      feedback_fn("* Verifying N+1 Memory redundancy")
2293
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2294

    
2295
    feedback_fn("* Other Notes")
2296
    if i_non_redundant:
2297
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2298
                  % len(i_non_redundant))
2299

    
2300
    if i_non_a_balanced:
2301
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2302
                  % len(i_non_a_balanced))
2303

    
2304
    if n_offline:
2305
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2306

    
2307
    if n_drained:
2308
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2309

    
2310
    return not self.bad
2311

    
2312
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2313
    """Analyze the post-hooks' result
2314

2315
    This method analyses the hook result, handles it, and sends some
2316
    nicely-formatted feedback back to the user.
2317

2318
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2319
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2320
    @param hooks_results: the results of the multi-node hooks rpc call
2321
    @param feedback_fn: function used send feedback back to the caller
2322
    @param lu_result: previous Exec result
2323
    @return: the new Exec result, based on the previous result
2324
        and hook results
2325

2326
    """
2327
    # We only really run POST phase hooks, and are only interested in
2328
    # their results
2329
    if phase == constants.HOOKS_PHASE_POST:
2330
      # Used to change hooks' output to proper indentation
2331
      feedback_fn("* Hooks Results")
2332
      assert hooks_results, "invalid result from hooks"
2333

    
2334
      for node_name in hooks_results:
2335
        res = hooks_results[node_name]
2336
        msg = res.fail_msg
2337
        test = msg and not res.offline
2338
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2339
                      "Communication failure in hooks execution: %s", msg)
2340
        if res.offline or msg:
2341
          # No need to investigate payload if node is offline or gave an error.
2342
          # override manually lu_result here as _ErrorIf only
2343
          # overrides self.bad
2344
          lu_result = 1
2345
          continue
2346
        for script, hkr, output in res.payload:
2347
          test = hkr == constants.HKR_FAIL
2348
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2349
                        "Script %s failed, output:", script)
2350
          if test:
2351
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2352
            feedback_fn("%s" % output)
2353
            lu_result = 0
2354

    
2355
      return lu_result
2356

    
2357

    
2358
class LUVerifyDisks(NoHooksLU):
2359
  """Verifies the cluster disks status.
2360

2361
  """
2362
  REQ_BGL = False
2363

    
2364
  def ExpandNames(self):
2365
    self.needed_locks = {
2366
      locking.LEVEL_NODE: locking.ALL_SET,
2367
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2368
    }
2369
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2370

    
2371
  def Exec(self, feedback_fn):
2372
    """Verify integrity of cluster disks.
2373

2374
    @rtype: tuple of three items
2375
    @return: a tuple of (dict of node-to-node_error, list of instances
2376
        which need activate-disks, dict of instance: (node, volume) for
2377
        missing volumes
2378

2379
    """
2380
    result = res_nodes, res_instances, res_missing = {}, [], {}
2381

    
2382
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2383
    instances = [self.cfg.GetInstanceInfo(name)
2384
                 for name in self.cfg.GetInstanceList()]
2385

    
2386
    nv_dict = {}
2387
    for inst in instances:
2388
      inst_lvs = {}
2389
      if (not inst.admin_up or
2390
          inst.disk_template not in constants.DTS_NET_MIRROR):
2391
        continue
2392
      inst.MapLVsByNode(inst_lvs)
2393
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2394
      for node, vol_list in inst_lvs.iteritems():
2395
        for vol in vol_list:
2396
          nv_dict[(node, vol)] = inst
2397

    
2398
    if not nv_dict:
2399
      return result
2400

    
2401
    vg_names = self.rpc.call_vg_list(nodes)
2402
    vg_names.Raise("Cannot get list of VGs")
2403

    
2404
    for node in nodes:
2405
      # node_volume
2406
      node_res = self.rpc.call_lv_list([node],
2407
                                       vg_names[node].payload.keys())[node]
2408
      if node_res.offline:
2409
        continue
2410
      msg = node_res.fail_msg
2411
      if msg:
2412
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2413
        res_nodes[node] = msg
2414
        continue
2415

    
2416
      lvs = node_res.payload
2417
      for lv_name, (_, _, lv_online) in lvs.items():
2418
        inst = nv_dict.pop((node, lv_name), None)
2419
        if (not lv_online and inst is not None
2420
            and inst.name not in res_instances):
2421
          res_instances.append(inst.name)
2422

    
2423
    # any leftover items in nv_dict are missing LVs, let's arrange the
2424
    # data better
2425
    for key, inst in nv_dict.iteritems():
2426
      if inst.name not in res_missing:
2427
        res_missing[inst.name] = []
2428
      res_missing[inst.name].append(key)
2429

    
2430
    return result
2431

    
2432

    
2433
class LURepairDiskSizes(NoHooksLU):
2434
  """Verifies the cluster disks sizes.
2435

2436
  """
2437
  REQ_BGL = False
2438

    
2439
  def ExpandNames(self):
2440
    if self.op.instances:
2441
      self.wanted_names = []
2442
      for name in self.op.instances:
2443
        full_name = _ExpandInstanceName(self.cfg, name)
2444
        self.wanted_names.append(full_name)
2445
      self.needed_locks = {
2446
        locking.LEVEL_NODE: [],
2447
        locking.LEVEL_INSTANCE: self.wanted_names,
2448
        }
2449
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2450
    else:
2451
      self.wanted_names = None
2452
      self.needed_locks = {
2453
        locking.LEVEL_NODE: locking.ALL_SET,
2454
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2455
        }
2456
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2457

    
2458
  def DeclareLocks(self, level):
2459
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2460
      self._LockInstancesNodes(primary_only=True)
2461

    
2462
  def CheckPrereq(self):
2463
    """Check prerequisites.
2464

2465
    This only checks the optional instance list against the existing names.
2466

2467
    """
2468
    if self.wanted_names is None:
2469
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2470

    
2471
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2472
                             in self.wanted_names]
2473

    
2474
  def _EnsureChildSizes(self, disk):
2475
    """Ensure children of the disk have the needed disk size.
2476

2477
    This is valid mainly for DRBD8 and fixes an issue where the
2478
    children have smaller disk size.
2479

2480
    @param disk: an L{ganeti.objects.Disk} object
2481

2482
    """
2483
    if disk.dev_type == constants.LD_DRBD8:
2484
      assert disk.children, "Empty children for DRBD8?"
2485
      fchild = disk.children[0]
2486
      mismatch = fchild.size < disk.size
2487
      if mismatch:
2488
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2489
                     fchild.size, disk.size)
2490
        fchild.size = disk.size
2491

    
2492
      # and we recurse on this child only, not on the metadev
2493
      return self._EnsureChildSizes(fchild) or mismatch
2494
    else:
2495
      return False
2496

    
2497
  def Exec(self, feedback_fn):
2498
    """Verify the size of cluster disks.
2499

2500
    """
2501
    # TODO: check child disks too
2502
    # TODO: check differences in size between primary/secondary nodes
2503
    per_node_disks = {}
2504
    for instance in self.wanted_instances:
2505
      pnode = instance.primary_node
2506
      if pnode not in per_node_disks:
2507
        per_node_disks[pnode] = []
2508
      for idx, disk in enumerate(instance.disks):
2509
        per_node_disks[pnode].append((instance, idx, disk))
2510

    
2511
    changed = []
2512
    for node, dskl in per_node_disks.items():
2513
      newl = [v[2].Copy() for v in dskl]
2514
      for dsk in newl:
2515
        self.cfg.SetDiskID(dsk, node)
2516
      result = self.rpc.call_blockdev_getsizes(node, newl)
2517
      if result.fail_msg:
2518
        self.LogWarning("Failure in blockdev_getsizes call to node"
2519
                        " %s, ignoring", node)
2520
        continue
2521
      if len(result.data) != len(dskl):
2522
        self.LogWarning("Invalid result from node %s, ignoring node results",
2523
                        node)
2524
        continue
2525
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2526
        if size is None:
2527
          self.LogWarning("Disk %d of instance %s did not return size"
2528
                          " information, ignoring", idx, instance.name)
2529
          continue
2530
        if not isinstance(size, (int, long)):
2531
          self.LogWarning("Disk %d of instance %s did not return valid"
2532
                          " size information, ignoring", idx, instance.name)
2533
          continue
2534
        size = size >> 20
2535
        if size != disk.size:
2536
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2537
                       " correcting: recorded %d, actual %d", idx,
2538
                       instance.name, disk.size, size)
2539
          disk.size = size
2540
          self.cfg.Update(instance, feedback_fn)
2541
          changed.append((instance.name, idx, size))
2542
        if self._EnsureChildSizes(disk):
2543
          self.cfg.Update(instance, feedback_fn)
2544
          changed.append((instance.name, idx, disk.size))
2545
    return changed
2546

    
2547

    
2548
class LURenameCluster(LogicalUnit):
2549
  """Rename the cluster.
2550

2551
  """
2552
  HPATH = "cluster-rename"
2553
  HTYPE = constants.HTYPE_CLUSTER
2554

    
2555
  def BuildHooksEnv(self):
2556
    """Build hooks env.
2557

2558
    """
2559
    env = {
2560
      "OP_TARGET": self.cfg.GetClusterName(),
2561
      "NEW_NAME": self.op.name,
2562
      }
2563
    mn = self.cfg.GetMasterNode()
2564
    all_nodes = self.cfg.GetNodeList()
2565
    return env, [mn], all_nodes
2566

    
2567
  def CheckPrereq(self):
2568
    """Verify that the passed name is a valid one.
2569

2570
    """
2571
    hostname = netutils.GetHostname(name=self.op.name,
2572
                                    family=self.cfg.GetPrimaryIPFamily())
2573

    
2574
    new_name = hostname.name
2575
    self.ip = new_ip = hostname.ip
2576
    old_name = self.cfg.GetClusterName()
2577
    old_ip = self.cfg.GetMasterIP()
2578
    if new_name == old_name and new_ip == old_ip:
2579
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2580
                                 " cluster has changed",
2581
                                 errors.ECODE_INVAL)
2582
    if new_ip != old_ip:
2583
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2584
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2585
                                   " reachable on the network" %
2586
                                   new_ip, errors.ECODE_NOTUNIQUE)
2587

    
2588
    self.op.name = new_name
2589

    
2590
  def Exec(self, feedback_fn):
2591
    """Rename the cluster.
2592

2593
    """
2594
    clustername = self.op.name
2595
    ip = self.ip
2596

    
2597
    # shutdown the master IP
2598
    master = self.cfg.GetMasterNode()
2599
    result = self.rpc.call_node_stop_master(master, False)
2600
    result.Raise("Could not disable the master role")
2601

    
2602
    try:
2603
      cluster = self.cfg.GetClusterInfo()
2604
      cluster.cluster_name = clustername
2605
      cluster.master_ip = ip
2606
      self.cfg.Update(cluster, feedback_fn)
2607

    
2608
      # update the known hosts file
2609
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2610
      node_list = self.cfg.GetOnlineNodeList()
2611
      try:
2612
        node_list.remove(master)
2613
      except ValueError:
2614
        pass
2615
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2616
    finally:
2617
      result = self.rpc.call_node_start_master(master, False, False)
2618
      msg = result.fail_msg
2619
      if msg:
2620
        self.LogWarning("Could not re-enable the master role on"
2621
                        " the master, please restart manually: %s", msg)
2622

    
2623
    return clustername
2624

    
2625

    
2626
class LUSetClusterParams(LogicalUnit):
2627
  """Change the parameters of the cluster.
2628

2629
  """
2630
  HPATH = "cluster-modify"
2631
  HTYPE = constants.HTYPE_CLUSTER
2632
  REQ_BGL = False
2633

    
2634
  def CheckArguments(self):
2635
    """Check parameters
2636

2637
    """
2638
    if self.op.uid_pool:
2639
      uidpool.CheckUidPool(self.op.uid_pool)
2640

    
2641
    if self.op.add_uids:
2642
      uidpool.CheckUidPool(self.op.add_uids)
2643

    
2644
    if self.op.remove_uids:
2645
      uidpool.CheckUidPool(self.op.remove_uids)
2646

    
2647
  def ExpandNames(self):
2648
    # FIXME: in the future maybe other cluster params won't require checking on
2649
    # all nodes to be modified.
2650
    self.needed_locks = {
2651
      locking.LEVEL_NODE: locking.ALL_SET,
2652
    }
2653
    self.share_locks[locking.LEVEL_NODE] = 1
2654

    
2655
  def BuildHooksEnv(self):
2656
    """Build hooks env.
2657

2658
    """
2659
    env = {
2660
      "OP_TARGET": self.cfg.GetClusterName(),
2661
      "NEW_VG_NAME": self.op.vg_name,
2662
      }
2663
    mn = self.cfg.GetMasterNode()
2664
    return env, [mn], [mn]
2665

    
2666
  def CheckPrereq(self):
2667
    """Check prerequisites.
2668

2669
    This checks whether the given params don't conflict and
2670
    if the given volume group is valid.
2671

2672
    """
2673
    if self.op.vg_name is not None and not self.op.vg_name:
2674
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2675
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2676
                                   " instances exist", errors.ECODE_INVAL)
2677

    
2678
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2679
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2680
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2681
                                   " drbd-based instances exist",
2682
                                   errors.ECODE_INVAL)
2683

    
2684
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2685

    
2686
    # if vg_name not None, checks given volume group on all nodes
2687
    if self.op.vg_name:
2688
      vglist = self.rpc.call_vg_list(node_list)
2689
      for node in node_list:
2690
        msg = vglist[node].fail_msg
2691
        if msg:
2692
          # ignoring down node
2693
          self.LogWarning("Error while gathering data on node %s"
2694
                          " (ignoring node): %s", node, msg)
2695
          continue
2696
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2697
                                              self.op.vg_name,
2698
                                              constants.MIN_VG_SIZE)
2699
        if vgstatus:
2700
          raise errors.OpPrereqError("Error on node '%s': %s" %
2701
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2702

    
2703
    if self.op.drbd_helper:
2704
      # checks given drbd helper on all nodes
2705
      helpers = self.rpc.call_drbd_helper(node_list)
2706
      for node in node_list:
2707
        ninfo = self.cfg.GetNodeInfo(node)
2708
        if ninfo.offline:
2709
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2710
          continue
2711
        msg = helpers[node].fail_msg
2712
        if msg:
2713
          raise errors.OpPrereqError("Error checking drbd helper on node"
2714
                                     " '%s': %s" % (node, msg),
2715
                                     errors.ECODE_ENVIRON)
2716
        node_helper = helpers[node].payload
2717
        if node_helper != self.op.drbd_helper:
2718
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2719
                                     (node, node_helper), errors.ECODE_ENVIRON)
2720

    
2721
    self.cluster = cluster = self.cfg.GetClusterInfo()
2722
    # validate params changes
2723
    if self.op.beparams:
2724
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2725
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2726

    
2727
    if self.op.ndparams:
2728
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2729
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2730

    
2731
    if self.op.nicparams:
2732
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2733
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2734
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2735
      nic_errors = []
2736

    
2737
      # check all instances for consistency
2738
      for instance in self.cfg.GetAllInstancesInfo().values():
2739
        for nic_idx, nic in enumerate(instance.nics):
2740
          params_copy = copy.deepcopy(nic.nicparams)
2741
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2742

    
2743
          # check parameter syntax
2744
          try:
2745
            objects.NIC.CheckParameterSyntax(params_filled)
2746
          except errors.ConfigurationError, err:
2747
            nic_errors.append("Instance %s, nic/%d: %s" %
2748
                              (instance.name, nic_idx, err))
2749

    
2750
          # if we're moving instances to routed, check that they have an ip
2751
          target_mode = params_filled[constants.NIC_MODE]
2752
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2753
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2754
                              (instance.name, nic_idx))
2755
      if nic_errors:
2756
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2757
                                   "\n".join(nic_errors))
2758

    
2759
    # hypervisor list/parameters
2760
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2761
    if self.op.hvparams:
2762
      for hv_name, hv_dict in self.op.hvparams.items():
2763
        if hv_name not in self.new_hvparams:
2764
          self.new_hvparams[hv_name] = hv_dict
2765
        else:
2766
          self.new_hvparams[hv_name].update(hv_dict)
2767

    
2768
    # os hypervisor parameters
2769
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2770
    if self.op.os_hvp:
2771
      for os_name, hvs in self.op.os_hvp.items():
2772
        if os_name not in self.new_os_hvp:
2773
          self.new_os_hvp[os_name] = hvs
2774
        else:
2775
          for hv_name, hv_dict in hvs.items():
2776
            if hv_name not in self.new_os_hvp[os_name]:
2777
              self.new_os_hvp[os_name][hv_name] = hv_dict
2778
            else:
2779
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2780

    
2781
    # os parameters
2782
    self.new_osp = objects.FillDict(cluster.osparams, {})
2783
    if self.op.osparams:
2784
      for os_name, osp in self.op.osparams.items():
2785
        if os_name not in self.new_osp:
2786
          self.new_osp[os_name] = {}
2787

    
2788
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2789
                                                  use_none=True)
2790

    
2791
        if not self.new_osp[os_name]:
2792
          # we removed all parameters
2793
          del self.new_osp[os_name]
2794
        else:
2795
          # check the parameter validity (remote check)
2796
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2797
                         os_name, self.new_osp[os_name])
2798

    
2799
    # changes to the hypervisor list
2800
    if self.op.enabled_hypervisors is not None:
2801
      self.hv_list = self.op.enabled_hypervisors
2802
      for hv in self.hv_list:
2803
        # if the hypervisor doesn't already exist in the cluster
2804
        # hvparams, we initialize it to empty, and then (in both
2805
        # cases) we make sure to fill the defaults, as we might not
2806
        # have a complete defaults list if the hypervisor wasn't
2807
        # enabled before
2808
        if hv not in new_hvp:
2809
          new_hvp[hv] = {}
2810
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2811
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2812
    else:
2813
      self.hv_list = cluster.enabled_hypervisors
2814

    
2815
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2816
      # either the enabled list has changed, or the parameters have, validate
2817
      for hv_name, hv_params in self.new_hvparams.items():
2818
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2819
            (self.op.enabled_hypervisors and
2820
             hv_name in self.op.enabled_hypervisors)):
2821
          # either this is a new hypervisor, or its parameters have changed
2822
          hv_class = hypervisor.GetHypervisor(hv_name)
2823
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2824
          hv_class.CheckParameterSyntax(hv_params)
2825
          _CheckHVParams(self, node_list, hv_name, hv_params)
2826

    
2827
    if self.op.os_hvp:
2828
      # no need to check any newly-enabled hypervisors, since the
2829
      # defaults have already been checked in the above code-block
2830
      for os_name, os_hvp in self.new_os_hvp.items():
2831
        for hv_name, hv_params in os_hvp.items():
2832
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2833
          # we need to fill in the new os_hvp on top of the actual hv_p
2834
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2835
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2836
          hv_class = hypervisor.GetHypervisor(hv_name)
2837
          hv_class.CheckParameterSyntax(new_osp)
2838
          _CheckHVParams(self, node_list, hv_name, new_osp)
2839

    
2840
    if self.op.default_iallocator:
2841
      alloc_script = utils.FindFile(self.op.default_iallocator,
2842
                                    constants.IALLOCATOR_SEARCH_PATH,
2843
                                    os.path.isfile)
2844
      if alloc_script is None:
2845
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2846
                                   " specified" % self.op.default_iallocator,
2847
                                   errors.ECODE_INVAL)
2848

    
2849
  def Exec(self, feedback_fn):
2850
    """Change the parameters of the cluster.
2851

2852
    """
2853
    if self.op.vg_name is not None:
2854
      new_volume = self.op.vg_name
2855
      if not new_volume:
2856
        new_volume = None
2857
      if new_volume != self.cfg.GetVGName():
2858
        self.cfg.SetVGName(new_volume)
2859
      else:
2860
        feedback_fn("Cluster LVM configuration already in desired"
2861
                    " state, not changing")
2862
    if self.op.drbd_helper is not None:
2863
      new_helper = self.op.drbd_helper
2864
      if not new_helper:
2865
        new_helper = None
2866
      if new_helper != self.cfg.GetDRBDHelper():
2867
        self.cfg.SetDRBDHelper(new_helper)
2868
      else:
2869
        feedback_fn("Cluster DRBD helper already in desired state,"
2870
                    " not changing")
2871
    if self.op.hvparams:
2872
      self.cluster.hvparams = self.new_hvparams
2873
    if self.op.os_hvp:
2874
      self.cluster.os_hvp = self.new_os_hvp
2875
    if self.op.enabled_hypervisors is not None:
2876
      self.cluster.hvparams = self.new_hvparams
2877
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2878
    if self.op.beparams:
2879
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2880
    if self.op.nicparams:
2881
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2882
    if self.op.osparams:
2883
      self.cluster.osparams = self.new_osp
2884
    if self.op.ndparams:
2885
      self.cluster.ndparams = self.new_ndparams
2886

    
2887
    if self.op.candidate_pool_size is not None:
2888
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2889
      # we need to update the pool size here, otherwise the save will fail
2890
      _AdjustCandidatePool(self, [])
2891

    
2892
    if self.op.maintain_node_health is not None:
2893
      self.cluster.maintain_node_health = self.op.maintain_node_health
2894

    
2895
    if self.op.prealloc_wipe_disks is not None:
2896
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2897

    
2898
    if self.op.add_uids is not None:
2899
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2900

    
2901
    if self.op.remove_uids is not None:
2902
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2903

    
2904
    if self.op.uid_pool is not None:
2905
      self.cluster.uid_pool = self.op.uid_pool
2906

    
2907
    if self.op.default_iallocator is not None:
2908
      self.cluster.default_iallocator = self.op.default_iallocator
2909

    
2910
    if self.op.reserved_lvs is not None:
2911
      self.cluster.reserved_lvs = self.op.reserved_lvs
2912

    
2913
    def helper_os(aname, mods, desc):
2914
      desc += " OS list"
2915
      lst = getattr(self.cluster, aname)
2916
      for key, val in mods:
2917
        if key == constants.DDM_ADD:
2918
          if val in lst:
2919
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2920
          else:
2921
            lst.append(val)
2922
        elif key == constants.DDM_REMOVE:
2923
          if val in lst:
2924
            lst.remove(val)
2925
          else:
2926
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
2927
        else:
2928
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
2929

    
2930
    if self.op.hidden_os:
2931
      helper_os("hidden_os", self.op.hidden_os, "hidden")
2932

    
2933
    if self.op.blacklisted_os:
2934
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2935

    
2936
    if self.op.master_netdev:
2937
      master = self.cfg.GetMasterNode()
2938
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
2939
                  self.cluster.master_netdev)
2940
      result = self.rpc.call_node_stop_master(master, False)
2941
      result.Raise("Could not disable the master ip")
2942
      feedback_fn("Changing master_netdev from %s to %s" %
2943
                  (self.cluster.master_netdev, self.op.master_netdev))
2944
      self.cluster.master_netdev = self.op.master_netdev
2945

    
2946
    self.cfg.Update(self.cluster, feedback_fn)
2947

    
2948
    if self.op.master_netdev:
2949
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
2950
                  self.op.master_netdev)
2951
      result = self.rpc.call_node_start_master(master, False, False)
2952
      if result.fail_msg:
2953
        self.LogWarning("Could not re-enable the master ip on"
2954
                        " the master, please restart manually: %s",
2955
                        result.fail_msg)
2956

    
2957

    
2958
def _UploadHelper(lu, nodes, fname):
2959
  """Helper for uploading a file and showing warnings.
2960

2961
  """
2962
  if os.path.exists(fname):
2963
    result = lu.rpc.call_upload_file(nodes, fname)
2964
    for to_node, to_result in result.items():
2965
      msg = to_result.fail_msg
2966
      if msg:
2967
        msg = ("Copy of file %s to node %s failed: %s" %
2968
               (fname, to_node, msg))
2969
        lu.proc.LogWarning(msg)
2970

    
2971

    
2972
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
2973
  """Distribute additional files which are part of the cluster configuration.
2974

2975
  ConfigWriter takes care of distributing the config and ssconf files, but
2976
  there are more files which should be distributed to all nodes. This function
2977
  makes sure those are copied.
2978

2979
  @param lu: calling logical unit
2980
  @param additional_nodes: list of nodes not in the config to distribute to
2981
  @type additional_vm: boolean
2982
  @param additional_vm: whether the additional nodes are vm-capable or not
2983

2984
  """
2985
  # 1. Gather target nodes
2986
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2987
  dist_nodes = lu.cfg.GetOnlineNodeList()
2988
  nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
2989
  vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
2990
  if additional_nodes is not None:
2991
    dist_nodes.extend(additional_nodes)
2992
    if additional_vm:
2993
      vm_nodes.extend(additional_nodes)
2994
  if myself.name in dist_nodes:
2995
    dist_nodes.remove(myself.name)
2996
  if myself.name in vm_nodes:
2997
    vm_nodes.remove(myself.name)
2998

    
2999
  # 2. Gather files to distribute
3000
  dist_files = set([constants.ETC_HOSTS,
3001
                    constants.SSH_KNOWN_HOSTS_FILE,
3002
                    constants.RAPI_CERT_FILE,
3003
                    constants.RAPI_USERS_FILE,
3004
                    constants.CONFD_HMAC_KEY,
3005
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
3006
                   ])
3007

    
3008
  vm_files = set()
3009
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3010
  for hv_name in enabled_hypervisors:
3011
    hv_class = hypervisor.GetHypervisor(hv_name)
3012
    vm_files.update(hv_class.GetAncillaryFiles())
3013

    
3014
  # 3. Perform the files upload
3015
  for fname in dist_files:
3016
    _UploadHelper(lu, dist_nodes, fname)
3017
  for fname in vm_files:
3018
    _UploadHelper(lu, vm_nodes, fname)
3019

    
3020

    
3021
class LURedistributeConfig(NoHooksLU):
3022
  """Force the redistribution of cluster configuration.
3023

3024
  This is a very simple LU.
3025

3026
  """
3027
  REQ_BGL = False
3028

    
3029
  def ExpandNames(self):
3030
    self.needed_locks = {
3031
      locking.LEVEL_NODE: locking.ALL_SET,
3032
    }
3033
    self.share_locks[locking.LEVEL_NODE] = 1
3034

    
3035
  def Exec(self, feedback_fn):
3036
    """Redistribute the configuration.
3037

3038
    """
3039
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3040
    _RedistributeAncillaryFiles(self)
3041

    
3042

    
3043
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3044
  """Sleep and poll for an instance's disk to sync.
3045

3046
  """
3047
  if not instance.disks or disks is not None and not disks:
3048
    return True
3049

    
3050
  disks = _ExpandCheckDisks(instance, disks)
3051

    
3052
  if not oneshot:
3053
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3054

    
3055
  node = instance.primary_node
3056

    
3057
  for dev in disks:
3058
    lu.cfg.SetDiskID(dev, node)
3059

    
3060
  # TODO: Convert to utils.Retry
3061

    
3062
  retries = 0
3063
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3064
  while True:
3065
    max_time = 0
3066
    done = True
3067
    cumul_degraded = False
3068
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3069
    msg = rstats.fail_msg
3070
    if msg:
3071
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3072
      retries += 1
3073
      if retries >= 10:
3074
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3075
                                 " aborting." % node)
3076
      time.sleep(6)
3077
      continue
3078
    rstats = rstats.payload
3079
    retries = 0
3080
    for i, mstat in enumerate(rstats):
3081
      if mstat is None:
3082
        lu.LogWarning("Can't compute data for node %s/%s",
3083
                           node, disks[i].iv_name)
3084
        continue
3085

    
3086
      cumul_degraded = (cumul_degraded or
3087
                        (mstat.is_degraded and mstat.sync_percent is None))
3088
      if mstat.sync_percent is not None:
3089
        done = False
3090
        if mstat.estimated_time is not None:
3091
          rem_time = ("%s remaining (estimated)" %
3092
                      utils.FormatSeconds(mstat.estimated_time))
3093
          max_time = mstat.estimated_time
3094
        else:
3095
          rem_time = "no time estimate"
3096
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3097
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3098

    
3099
    # if we're done but degraded, let's do a few small retries, to
3100
    # make sure we see a stable and not transient situation; therefore
3101
    # we force restart of the loop
3102
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3103
      logging.info("Degraded disks found, %d retries left", degr_retries)
3104
      degr_retries -= 1
3105
      time.sleep(1)
3106
      continue
3107

    
3108
    if done or oneshot:
3109
      break
3110

    
3111
    time.sleep(min(60, max_time))
3112

    
3113
  if done:
3114
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3115
  return not cumul_degraded
3116

    
3117

    
3118
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3119
  """Check that mirrors are not degraded.
3120

3121
  The ldisk parameter, if True, will change the test from the
3122
  is_degraded attribute (which represents overall non-ok status for
3123
  the device(s)) to the ldisk (representing the local storage status).
3124

3125
  """
3126
  lu.cfg.SetDiskID(dev, node)
3127

    
3128
  result = True
3129

    
3130
  if on_primary or dev.AssembleOnSecondary():
3131
    rstats = lu.rpc.call_blockdev_find(node, dev)
3132
    msg = rstats.fail_msg
3133
    if msg:
3134
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3135
      result = False
3136
    elif not rstats.payload:
3137
      lu.LogWarning("Can't find disk on node %s", node)
3138
      result = False
3139
    else:
3140
      if ldisk:
3141
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3142
      else:
3143
        result = result and not rstats.payload.is_degraded
3144

    
3145
  if dev.children:
3146
    for child in dev.children:
3147
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3148

    
3149
  return result
3150

    
3151

    
3152
class LUOobCommand(NoHooksLU):
3153
  """Logical unit for OOB handling.
3154

3155
  """
3156
  REG_BGL = False
3157

    
3158
  def CheckPrereq(self):
3159
    """Check prerequisites.
3160

3161
    This checks:
3162
     - the node exists in the configuration
3163
     - OOB is supported
3164

3165
    Any errors are signaled by raising errors.OpPrereqError.
3166

3167
    """
3168
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3169
    node = self.cfg.GetNodeInfo(self.op.node_name)
3170

    
3171
    if node is None:
3172
      raise errors.OpPrereqError("Node %s not found" % self.op.node_name)
3173

    
3174
    self.oob_program = _SupportsOob(self.cfg, node)
3175

    
3176
    if not self.oob_program:
3177
      raise errors.OpPrereqError("OOB is not supported for node %s" %
3178
                                 self.op.node_name)
3179

    
3180
    if self.op.command == constants.OOB_POWER_OFF and not node.offline:
3181
      raise errors.OpPrereqError(("Cannot power off node %s because it is"
3182
                                  " not marked offline") % self.op.node_name)
3183

    
3184
    self.node = node
3185

    
3186
  def ExpandNames(self):
3187
    """Gather locks we need.
3188

3189
    """
3190
    node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3191
    self.needed_locks = {
3192
      locking.LEVEL_NODE: [node_name],
3193
      }
3194

    
3195
  def Exec(self, feedback_fn):
3196
    """Execute OOB and return result if we expect any.
3197

3198
    """
3199
    master_node = self.cfg.GetMasterNode()
3200
    node = self.node
3201

    
3202
    logging.info("Executing out-of-band command '%s' using '%s' on %s",
3203
                 self.op.command, self.oob_program, self.op.node_name)
3204
    result = self.rpc.call_run_oob(master_node, self.oob_program,
3205
                                   self.op.command, self.op.node_name,
3206
                                   self.op.timeout)
3207

    
3208
    result.Raise("An error occurred on execution of OOB helper")
3209

    
3210
    self._CheckPayload(result)
3211

    
3212
    if self.op.command == constants.OOB_HEALTH:
3213
      # For health we should log important events
3214
      for item, status in result.payload:
3215
        if status in [constants.OOB_STATUS_WARNING,
3216
                      constants.OOB_STATUS_CRITICAL]:
3217
          logging.warning("On node '%s' item '%s' has status '%s'",
3218
                          self.op.node_name, item, status)
3219

    
3220
    if self.op.command == constants.OOB_POWER_ON:
3221
      node.powered = True
3222
    elif self.op.command == constants.OOB_POWER_OFF:
3223
      node.powered = False
3224
    elif self.op.command == constants.OOB_POWER_STATUS:
3225
      powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3226
      if powered != self.node.powered:
3227
        logging.warning(("Recorded power state (%s) of node '%s' does not match"
3228
                         " actual power state (%s)"), node.powered,
3229
                        self.op.node_name, powered)
3230

    
3231
    self.cfg.Update(node, feedback_fn)
3232

    
3233
    return result.payload
3234

    
3235
  def _CheckPayload(self, result):
3236
    """Checks if the payload is valid.
3237

3238
    @param result: RPC result
3239
    @raises errors.OpExecError: If payload is not valid
3240

3241
    """
3242
    errs = []
3243
    if self.op.command == constants.OOB_HEALTH:
3244
      if not isinstance(result.payload, list):
3245
        errs.append("command 'health' is expected to return a list but got %s" %
3246
                    type(result.payload))
3247
      for item, status in result.payload:
3248
        if status not in constants.OOB_STATUSES:
3249
          errs.append("health item '%s' has invalid status '%s'" %
3250
                      (item, status))
3251

    
3252
    if self.op.command == constants.OOB_POWER_STATUS:
3253
      if not isinstance(result.payload, dict):
3254
        errs.append("power-status is expected to return a dict but got %s" %
3255
                    type(result.payload))
3256

    
3257
    if self.op.command in [
3258
        constants.OOB_POWER_ON,
3259
        constants.OOB_POWER_OFF,
3260
        constants.OOB_POWER_CYCLE,
3261
        ]:
3262
      if result.payload is not None:
3263
        errs.append("%s is expected to not return payload but got '%s'" %
3264
                    (self.op.command, result.payload))
3265

    
3266
    if errs:
3267
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3268
                               utils.CommaJoin(errs))
3269

    
3270

    
3271

    
3272
class LUDiagnoseOS(NoHooksLU):
3273
  """Logical unit for OS diagnose/query.
3274

3275
  """
3276
  REQ_BGL = False
3277
  _HID = "hidden"
3278
  _BLK = "blacklisted"
3279
  _VLD = "valid"
3280
  _FIELDS_STATIC = utils.FieldSet()
3281
  _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3282
                                   "parameters", "api_versions", _HID, _BLK)
3283

    
3284
  def CheckArguments(self):
3285
    if self.op.names:
3286
      raise errors.OpPrereqError("Selective OS query not supported",
3287
                                 errors.ECODE_INVAL)
3288

    
3289
    _CheckOutputFields(static=self._FIELDS_STATIC,
3290
                       dynamic=self._FIELDS_DYNAMIC,
3291
                       selected=self.op.output_fields)
3292

    
3293
  def ExpandNames(self):
3294
    # Lock all nodes, in shared mode
3295
    # Temporary removal of locks, should be reverted later
3296
    # TODO: reintroduce locks when they are lighter-weight
3297
    self.needed_locks = {}
3298
    #self.share_locks[locking.LEVEL_NODE] = 1
3299
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3300

    
3301
  @staticmethod
3302
  def _DiagnoseByOS(rlist):
3303
    """Remaps a per-node return list into an a per-os per-node dictionary
3304

3305
    @param rlist: a map with node names as keys and OS objects as values
3306

3307
    @rtype: dict
3308
    @return: a dictionary with osnames as keys and as value another
3309
        map, with nodes as keys and tuples of (path, status, diagnose,
3310
        variants, parameters, api_versions) as values, eg::
3311

3312
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3313
                                     (/srv/..., False, "invalid api")],
3314
                           "node2": [(/srv/..., True, "", [], [])]}
3315
          }
3316

3317
    """
3318
    all_os = {}
3319
    # we build here the list of nodes that didn't fail the RPC (at RPC
3320
    # level), so that nodes with a non-responding node daemon don't
3321
    # make all OSes invalid
3322
    good_nodes = [node_name for node_name in rlist
3323
                  if not rlist[node_name].fail_msg]
3324
    for node_name, nr in rlist.items():
3325
      if nr.fail_msg or not nr.payload:
3326
        continue
3327
      for (name, path, status, diagnose, variants,
3328
           params, api_versions) in nr.payload:
3329
        if name not in all_os:
3330
          # build a list of nodes for this os containing empty lists
3331
          # for each node in node_list
3332
          all_os[name] = {}
3333
          for nname in good_nodes:
3334
            all_os[name][nname] = []
3335
        # convert params from [name, help] to (name, help)
3336
        params = [tuple(v) for v in params]
3337
        all_os[name][node_name].append((path, status, diagnose,
3338
                                        variants, params, api_versions))
3339
    return all_os
3340

    
3341
  def Exec(self, feedback_fn):
3342
    """Compute the list of OSes.
3343

3344
    """
3345
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3346
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3347
    pol = self._DiagnoseByOS(node_data)
3348
    output = []
3349
    cluster = self.cfg.GetClusterInfo()
3350

    
3351
    for os_name in utils.NiceSort(pol.keys()):
3352
      os_data = pol[os_name]
3353
      row = []
3354
      valid = True
3355
      (variants, params, api_versions) = null_state = (set(), set(), set())
3356
      for idx, osl in enumerate(os_data.values()):
3357
        valid = bool(valid and osl and osl[0][1])
3358
        if not valid:
3359
          (variants, params, api_versions) = null_state
3360
          break
3361
        node_variants, node_params, node_api = osl[0][3:6]
3362
        if idx == 0: # first entry
3363
          variants = set(node_variants)
3364
          params = set(node_params)
3365
          api_versions = set(node_api)
3366
        else: # keep consistency
3367
          variants.intersection_update(node_variants)
3368
          params.intersection_update(node_params)
3369
          api_versions.intersection_update(node_api)
3370

    
3371
      is_hid = os_name in cluster.hidden_os
3372
      is_blk = os_name in cluster.blacklisted_os
3373
      if ((self._HID not in self.op.output_fields and is_hid) or
3374
          (self._BLK not in self.op.output_fields and is_blk) or
3375
          (self._VLD not in self.op.output_fields and not valid)):
3376
        continue
3377

    
3378
      for field in self.op.output_fields:
3379
        if field == "name":
3380
          val = os_name
3381
        elif field == self._VLD:
3382
          val = valid
3383
        elif field == "node_status":
3384
          # this is just a copy of the dict
3385
          val = {}
3386
          for node_name, nos_list in os_data.items():
3387
            val[node_name] = nos_list
3388
        elif field == "variants":
3389
          val = utils.NiceSort(list(variants))
3390
        elif field == "parameters":
3391
          val = list(params)
3392
        elif field == "api_versions":
3393
          val = list(api_versions)
3394
        elif field == self._HID:
3395
          val = is_hid
3396
        elif field == self._BLK:
3397
          val = is_blk
3398
        else:
3399
          raise errors.ParameterError(field)
3400
        row.append(val)
3401
      output.append(row)
3402

    
3403
    return output
3404

    
3405

    
3406
class LURemoveNode(LogicalUnit):
3407
  """Logical unit for removing a node.
3408

3409
  """
3410
  HPATH = "node-remove"
3411
  HTYPE = constants.HTYPE_NODE
3412

    
3413
  def BuildHooksEnv(self):
3414
    """Build hooks env.
3415

3416
    This doesn't run on the target node in the pre phase as a failed
3417
    node would then be impossible to remove.
3418

3419
    """
3420
    env = {
3421
      "OP_TARGET": self.op.node_name,
3422
      "NODE_NAME": self.op.node_name,
3423
      }
3424
    all_nodes = self.cfg.GetNodeList()
3425
    try:
3426
      all_nodes.remove(self.op.node_name)
3427
    except ValueError:
3428
      logging.warning("Node %s which is about to be removed not found"
3429
                      " in the all nodes list", self.op.node_name)
3430
    return env, all_nodes, all_nodes
3431

    
3432
  def CheckPrereq(self):
3433
    """Check prerequisites.
3434

3435
    This checks:
3436
     - the node exists in the configuration
3437
     - it does not have primary or secondary instances
3438
     - it's not the master
3439

3440
    Any errors are signaled by raising errors.OpPrereqError.
3441

3442
    """
3443
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3444
    node = self.cfg.GetNodeInfo(self.op.node_name)
3445
    assert node is not None
3446

    
3447
    instance_list = self.cfg.GetInstanceList()
3448

    
3449
    masternode = self.cfg.GetMasterNode()
3450
    if node.name == masternode:
3451
      raise errors.OpPrereqError("Node is the master node,"
3452
                                 " you need to failover first.",
3453
                                 errors.ECODE_INVAL)
3454

    
3455
    for instance_name in instance_list:
3456
      instance = self.cfg.GetInstanceInfo(instance_name)
3457
      if node.name in instance.all_nodes:
3458
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3459
                                   " please remove first." % instance_name,
3460
                                   errors.ECODE_INVAL)
3461
    self.op.node_name = node.name
3462
    self.node = node
3463

    
3464
  def Exec(self, feedback_fn):
3465
    """Removes the node from the cluster.
3466

3467
    """
3468
    node = self.node
3469
    logging.info("Stopping the node daemon and removing configs from node %s",
3470
                 node.name)
3471

    
3472
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3473

    
3474
    # Promote nodes to master candidate as needed
3475
    _AdjustCandidatePool(self, exceptions=[node.name])
3476
    self.context.RemoveNode(node.name)
3477

    
3478
    # Run post hooks on the node before it's removed
3479
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3480
    try:
3481
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3482
    except:
3483
      # pylint: disable-msg=W0702
3484
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3485

    
3486
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3487
    msg = result.fail_msg
3488
    if msg:
3489
      self.LogWarning("Errors encountered on the remote node while leaving"
3490
                      " the cluster: %s", msg)
3491

    
3492
    # Remove node from our /etc/hosts
3493
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3494
      master_node = self.cfg.GetMasterNode()
3495
      result = self.rpc.call_etc_hosts_modify(master_node,
3496
                                              constants.ETC_HOSTS_REMOVE,
3497
                                              node.name, None)
3498
      result.Raise("Can't update hosts file with new host data")
3499
      _RedistributeAncillaryFiles(self)
3500

    
3501

    
3502
class _NodeQuery(_QueryBase):
3503
  FIELDS = query.NODE_FIELDS
3504

    
3505
  def ExpandNames(self, lu):
3506
    lu.needed_locks = {}
3507
    lu.share_locks[locking.LEVEL_NODE] = 1
3508

    
3509
    if self.names:
3510
      self.wanted = _GetWantedNodes(lu, self.names)
3511
    else:
3512
      self.wanted = locking.ALL_SET
3513

    
3514
    self.do_locking = (self.use_locking and
3515
                       query.NQ_LIVE in self.requested_data)
3516

    
3517
    if self.do_locking:
3518
      # if we don't request only static fields, we need to lock the nodes
3519
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3520

    
3521
  def DeclareLocks(self, lu, level):
3522
    pass
3523

    
3524
  def _GetQueryData(self, lu):
3525
    """Computes the list of nodes and their attributes.
3526

3527
    """
3528
    all_info = lu.cfg.GetAllNodesInfo()
3529

    
3530
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3531

    
3532
    # Gather data as requested
3533
    if query.NQ_LIVE in self.requested_data:
3534
      node_data = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3535
                                        lu.cfg.GetHypervisorType())
3536
      live_data = dict((name, nresult.payload)
3537
                       for (name, nresult) in node_data.items()
3538
                       if not nresult.fail_msg and nresult.payload)
3539
    else:
3540
      live_data = None
3541

    
3542
    if query.NQ_INST in self.requested_data:
3543
      node_to_primary = dict([(name, set()) for name in nodenames])
3544
      node_to_secondary = dict([(name, set()) for name in nodenames])
3545

    
3546
      inst_data = lu.cfg.GetAllInstancesInfo()
3547

    
3548
      for inst in inst_data.values():
3549
        if inst.primary_node in node_to_primary:
3550
          node_to_primary[inst.primary_node].add(inst.name)
3551
        for secnode in inst.secondary_nodes:
3552
          if secnode in node_to_secondary:
3553
            node_to_secondary[secnode].add(inst.name)
3554
    else:
3555
      node_to_primary = None
3556
      node_to_secondary = None
3557

    
3558
    if query.NQ_OOB in self.requested_data:
3559
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3560
                         for name, node in all_info.iteritems())
3561
    else:
3562
      oob_support = None
3563

    
3564
    if query.NQ_GROUP in self.requested_data:
3565
      groups = lu.cfg.GetAllNodeGroupsInfo()
3566
    else:
3567
      groups = {}
3568

    
3569
    return query.NodeQueryData([all_info[name] for name in nodenames],
3570
                               live_data, lu.cfg.GetMasterNode(),
3571
                               node_to_primary, node_to_secondary, groups,
3572
                               oob_support)
3573

    
3574

    
3575
class LUQueryNodes(NoHooksLU):
3576
  """Logical unit for querying nodes.
3577

3578
  """
3579
  # pylint: disable-msg=W0142
3580
  REQ_BGL = False
3581

    
3582
  def CheckArguments(self):
3583
    self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3584
                         self.op.use_locking)
3585

    
3586
  def ExpandNames(self):
3587
    self.nq.ExpandNames(self)
3588

    
3589
  def Exec(self, feedback_fn):
3590
    return self.nq.OldStyleQuery(self)
3591

    
3592

    
3593
class LUQueryNodeVolumes(NoHooksLU):
3594
  """Logical unit for getting volumes on node(s).
3595

3596
  """
3597
  REQ_BGL = False
3598
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3599
  _FIELDS_STATIC = utils.FieldSet("node")
3600

    
3601
  def CheckArguments(self):
3602
    _CheckOutputFields(static=self._FIELDS_STATIC,
3603
                       dynamic=self._FIELDS_DYNAMIC,
3604
                       selected=self.op.output_fields)
3605

    
3606
  def ExpandNames(self):
3607
    self.needed_locks = {}
3608
    self.share_locks[locking.LEVEL_NODE] = 1
3609
    if not self.op.nodes:
3610
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3611
    else:
3612
      self.needed_locks[locking.LEVEL_NODE] = \
3613
        _GetWantedNodes(self, self.op.nodes)
3614

    
3615
  def Exec(self, feedback_fn):
3616
    """Computes the list of nodes and their attributes.
3617

3618
    """
3619
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3620
    volumes = self.rpc.call_node_volumes(nodenames)
3621

    
3622
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3623
             in self.cfg.GetInstanceList()]
3624

    
3625
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3626

    
3627
    output = []
3628
    for node in nodenames:
3629
      nresult = volumes[node]
3630
      if nresult.offline:
3631
        continue
3632
      msg = nresult.fail_msg
3633
      if msg:
3634
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3635
        continue
3636

    
3637
      node_vols = nresult.payload[:]
3638
      node_vols.sort(key=lambda vol: vol['dev'])
3639

    
3640
      for vol in node_vols:
3641
        node_output = []
3642
        for field in self.op.output_fields:
3643
          if field == "node":
3644
            val = node
3645
          elif field == "phys":
3646
            val = vol['dev']
3647
          elif field == "vg":
3648
            val = vol['vg']
3649
          elif field == "name":
3650
            val = vol['name']
3651
          elif field == "size":
3652
            val = int(float(vol['size']))
3653
          elif field == "instance":
3654
            for inst in ilist:
3655
              if node not in lv_by_node[inst]:
3656
                continue
3657
              if vol['name'] in lv_by_node[inst][node]:
3658
                val = inst.name
3659
                break
3660
            else:
3661
              val = '-'
3662
          else:
3663
            raise errors.ParameterError(field)
3664
          node_output.append(str(val))
3665

    
3666
        output.append(node_output)
3667

    
3668
    return output
3669

    
3670

    
3671
class LUQueryNodeStorage(NoHooksLU):
3672
  """Logical unit for getting information on storage units on node(s).
3673

3674
  """
3675
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3676
  REQ_BGL = False
3677

    
3678
  def CheckArguments(self):
3679
    _CheckOutputFields(static=self._FIELDS_STATIC,
3680
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3681
                       selected=self.op.output_fields)
3682

    
3683
  def ExpandNames(self):
3684
    self.needed_locks = {}
3685
    self.share_locks[locking.LEVEL_NODE] = 1
3686

    
3687
    if self.op.nodes:
3688
      self.needed_locks[locking.LEVEL_NODE] = \
3689
        _GetWantedNodes(self, self.op.nodes)
3690
    else:
3691
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3692

    
3693
  def Exec(self, feedback_fn):
3694
    """Computes the list of nodes and their attributes.
3695

3696
    """
3697
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3698

    
3699
    # Always get name to sort by
3700
    if constants.SF_NAME in self.op.output_fields:
3701
      fields = self.op.output_fields[:]
3702
    else:
3703
      fields = [constants.SF_NAME] + self.op.output_fields
3704

    
3705
    # Never ask for node or type as it's only known to the LU
3706
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3707
      while extra in fields:
3708
        fields.remove(extra)
3709

    
3710
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3711
    name_idx = field_idx[constants.SF_NAME]
3712

    
3713
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3714
    data = self.rpc.call_storage_list(self.nodes,
3715
                                      self.op.storage_type, st_args,
3716
                                      self.op.name, fields)
3717

    
3718
    result = []
3719

    
3720
    for node in utils.NiceSort(self.nodes):
3721
      nresult = data[node]
3722
      if nresult.offline:
3723
        continue
3724

    
3725
      msg = nresult.fail_msg
3726
      if msg:
3727
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3728
        continue
3729

    
3730
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3731

    
3732
      for name in utils.NiceSort(rows.keys()):
3733
        row = rows[name]
3734

    
3735
        out = []
3736

    
3737
        for field in self.op.output_fields:
3738
          if field == constants.SF_NODE:
3739
            val = node
3740
          elif field == constants.SF_TYPE:
3741
            val = self.op.storage_type
3742
          elif field in field_idx:
3743
            val = row[field_idx[field]]
3744
          else:
3745
            raise errors.ParameterError(field)
3746

    
3747
          out.append(val)
3748

    
3749
        result.append(out)
3750

    
3751
    return result
3752

    
3753

    
3754
class _InstanceQuery(_QueryBase):
3755
  FIELDS = query.INSTANCE_FIELDS
3756

    
3757
  def ExpandNames(self, lu):
3758
    lu.needed_locks = {}
3759
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
3760
    lu.share_locks[locking.LEVEL_NODE] = 1
3761

    
3762
    if self.names:
3763
      self.wanted = _GetWantedInstances(lu, self.names)
3764
    else:
3765
      self.wanted = locking.ALL_SET
3766

    
3767
    self.do_locking = (self.use_locking and
3768
                       query.IQ_LIVE in self.requested_data)
3769
    if self.do_locking:
3770
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3771
      lu.needed_locks[locking.LEVEL_NODE] = []
3772
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3773

    
3774
  def DeclareLocks(self, lu, level):
3775
    if level == locking.LEVEL_NODE and self.do_locking:
3776
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
3777

    
3778
  def _GetQueryData(self, lu):
3779
    """Computes the list of instances and their attributes.
3780

3781
    """
3782
    all_info = lu.cfg.GetAllInstancesInfo()
3783

    
3784
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3785

    
3786
    instance_list = [all_info[name] for name in instance_names]
3787
    nodes = frozenset([inst.primary_node for inst in instance_list])
3788
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3789
    bad_nodes = []
3790
    offline_nodes = []
3791

    
3792
    # Gather data as requested
3793
    if query.IQ_LIVE in self.requested_data:
3794
      live_data = {}
3795
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3796
      for name in nodes:
3797
        result = node_data[name]
3798
        if result.offline:
3799
          # offline nodes will be in both lists
3800
          assert result.fail_msg
3801
          offline_nodes.append(name)
3802
        if result.fail_msg:
3803
          bad_nodes.append(name)
3804
        elif result.payload:
3805
          live_data.update(result.payload)
3806
        # else no instance is alive
3807
    else:
3808
      live_data = {}
3809

    
3810
    if query.IQ_DISKUSAGE in self.requested_data:
3811
      disk_usage = dict((inst.name,
3812
                         _ComputeDiskSize(inst.disk_template,
3813
                                          [{"size": disk.size}
3814
                                           for disk in inst.disks]))
3815
                        for inst in instance_list)
3816
    else:
3817
      disk_usage = None
3818

    
3819
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3820
                                   disk_usage, offline_nodes, bad_nodes,
3821
                                   live_data)
3822

    
3823

    
3824
class LUQuery(NoHooksLU):
3825
  """Query for resources/items of a certain kind.
3826

3827
  """
3828
  # pylint: disable-msg=W0142
3829
  REQ_BGL = False
3830

    
3831
  def CheckArguments(self):
3832
    qcls = _GetQueryImplementation(self.op.what)
3833
    names = qlang.ReadSimpleFilter("name", self.op.filter)
3834

    
3835
    self.impl = qcls(names, self.op.fields, False)
3836

    
3837
  def ExpandNames(self):
3838
    self.impl.ExpandNames(self)
3839

    
3840
  def DeclareLocks(self, level):
3841
    self.impl.DeclareLocks(self, level)
3842

    
3843
  def Exec(self, feedback_fn):
3844
    return self.impl.NewStyleQuery(self)
3845

    
3846

    
3847
class LUQueryFields(NoHooksLU):
3848
  """Query for resources/items of a certain kind.
3849

3850
  """
3851
  # pylint: disable-msg=W0142
3852
  REQ_BGL = False
3853

    
3854
  def CheckArguments(self):
3855
    self.qcls = _GetQueryImplementation(self.op.what)
3856

    
3857
  def ExpandNames(self):
3858
    self.needed_locks = {}
3859

    
3860
  def Exec(self, feedback_fn):
3861
    return self.qcls.FieldsQuery(self.op.fields)
3862

    
3863

    
3864
class LUModifyNodeStorage(NoHooksLU):
3865
  """Logical unit for modifying a storage volume on a node.
3866

3867
  """
3868
  REQ_BGL = False
3869

    
3870
  def CheckArguments(self):
3871
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3872

    
3873
    storage_type = self.op.storage_type
3874

    
3875
    try:
3876
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3877
    except KeyError:
3878
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3879
                                 " modified" % storage_type,
3880
                                 errors.ECODE_INVAL)
3881

    
3882
    diff = set(self.op.changes.keys()) - modifiable
3883
    if diff:
3884
      raise errors.OpPrereqError("The following fields can not be modified for"
3885
                                 " storage units of type '%s': %r" %
3886
                                 (storage_type, list(diff)),
3887
                                 errors.ECODE_INVAL)
3888

    
3889
  def ExpandNames(self):
3890
    self.needed_locks = {
3891
      locking.LEVEL_NODE: self.op.node_name,
3892
      }
3893

    
3894
  def Exec(self, feedback_fn):
3895
    """Computes the list of nodes and their attributes.
3896

3897
    """
3898
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3899
    result = self.rpc.call_storage_modify(self.op.node_name,
3900
                                          self.op.storage_type, st_args,
3901
                                          self.op.name, self.op.changes)
3902
    result.Raise("Failed to modify storage unit '%s' on %s" %
3903
                 (self.op.name, self.op.node_name))
3904

    
3905

    
3906
class LUAddNode(LogicalUnit):
3907
  """Logical unit for adding node to the cluster.
3908

3909
  """
3910
  HPATH = "node-add"
3911
  HTYPE = constants.HTYPE_NODE
3912
  _NFLAGS = ["master_capable", "vm_capable"]
3913

    
3914
  def CheckArguments(self):
3915
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3916
    # validate/normalize the node name
3917
    self.hostname = netutils.GetHostname(name=self.op.node_name,
3918
                                         family=self.primary_ip_family)
3919
    self.op.node_name = self.hostname.name
3920
    if self.op.readd and self.op.group:
3921
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
3922
                                 " being readded", errors.ECODE_INVAL)
3923

    
3924
  def BuildHooksEnv(self):
3925
    """Build hooks env.
3926

3927
    This will run on all nodes before, and on all nodes + the new node after.
3928

3929
    """
3930
    env = {
3931
      "OP_TARGET": self.op.node_name,
3932
      "NODE_NAME": self.op.node_name,
3933
      "NODE_PIP": self.op.primary_ip,
3934
      "NODE_SIP": self.op.secondary_ip,
3935
      "MASTER_CAPABLE": str(self.op.master_capable),
3936
      "VM_CAPABLE": str(self.op.vm_capable),
3937
      }
3938
    nodes_0 = self.cfg.GetNodeList()
3939
    nodes_1 = nodes_0 + [self.op.node_name, ]
3940
    return env, nodes_0, nodes_1
3941

    
3942
  def CheckPrereq(self):
3943
    """Check prerequisites.
3944

3945
    This checks:
3946
     - the new node is not already in the config
3947
     - it is resolvable
3948
     - its parameters (single/dual homed) matches the cluster
3949

3950
    Any errors are signaled by raising errors.OpPrereqError.
3951

3952
    """
3953
    cfg = self.cfg
3954
    hostname = self.hostname
3955
    node = hostname.name
3956
    primary_ip = self.op.primary_ip = hostname.ip
3957
    if self.op.secondary_ip is None:
3958
      if self.primary_ip_family == netutils.IP6Address.family:
3959
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3960
                                   " IPv4 address must be given as secondary",
3961
                                   errors.ECODE_INVAL)
3962
      self.op.secondary_ip = primary_ip
3963

    
3964
    secondary_ip = self.op.secondary_ip
3965
    if not netutils.IP4Address.IsValid(secondary_ip):
3966
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3967
                                 " address" % secondary_ip, errors.ECODE_INVAL)
3968

    
3969
    node_list = cfg.GetNodeList()
3970
    if not self.op.readd and node in node_list:
3971
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3972
                                 node, errors.ECODE_EXISTS)
3973
    elif self.op.readd and node not in node_list:
3974
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3975
                                 errors.ECODE_NOENT)
3976

    
3977
    self.changed_primary_ip = False
3978

    
3979
    for existing_node_name in node_list:
3980
      existing_node = cfg.GetNodeInfo(existing_node_name)
3981

    
3982
      if self.op.readd and node == existing_node_name:
3983
        if existing_node.secondary_ip != secondary_ip:
3984
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3985
                                     " address configuration as before",
3986
                                     errors.ECODE_INVAL)
3987
        if existing_node.primary_ip != primary_ip:
3988
          self.changed_primary_ip = True
3989

    
3990
        continue
3991

    
3992
      if (existing_node.primary_ip == primary_ip or
3993
          existing_node.secondary_ip == primary_ip or
3994
          existing_node.primary_ip == secondary_ip or
3995
          existing_node.secondary_ip == secondary_ip):
3996
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3997
                                   " existing node %s" % existing_node.name,
3998
                                   errors.ECODE_NOTUNIQUE)
3999

    
4000
    # After this 'if' block, None is no longer a valid value for the
4001
    # _capable op attributes
4002
    if self.op.readd:
4003
      old_node = self.cfg.GetNodeInfo(node)
4004
      assert old_node is not None, "Can't retrieve locked node %s" % node
4005
      for attr in self._NFLAGS:
4006
        if getattr(self.op, attr) is None:
4007
          setattr(self.op, attr, getattr(old_node, attr))
4008
    else:
4009
      for attr in self._NFLAGS:
4010
        if getattr(self.op, attr) is None:
4011
          setattr(self.op, attr, True)
4012

    
4013
    if self.op.readd and not self.op.vm_capable:
4014
      pri, sec = cfg.GetNodeInstances(node)
4015
      if pri or sec:
4016
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4017
                                   " flag set to false, but it already holds"
4018
                                   " instances" % node,
4019
                                   errors.ECODE_STATE)
4020

    
4021
    # check that the type of the node (single versus dual homed) is the
4022
    # same as for the master
4023
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4024
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4025
    newbie_singlehomed = secondary_ip == primary_ip
4026
    if master_singlehomed != newbie_singlehomed:
4027
      if master_singlehomed:
4028
        raise errors.OpPrereqError("The master has no secondary ip but the"
4029
                                   " new node has one",
4030
                                   errors.ECODE_INVAL)
4031
      else:
4032
        raise errors.OpPrereqError("The master has a secondary ip but the"
4033
                                   " new node doesn't have one",
4034
                                   errors.ECODE_INVAL)
4035

    
4036
    # checks reachability
4037
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4038
      raise errors.OpPrereqError("Node not reachable by ping",
4039
                                 errors.ECODE_ENVIRON)
4040

    
4041
    if not newbie_singlehomed:
4042
      # check reachability from my secondary ip to newbie's secondary ip
4043
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4044
                           source=myself.secondary_ip):
4045
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4046
                                   " based ping to node daemon port",
4047
                                   errors.ECODE_ENVIRON)
4048

    
4049
    if self.op.readd:
4050
      exceptions = [node]
4051
    else:
4052
      exceptions = []
4053

    
4054
    if self.op.master_capable:
4055
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4056
    else:
4057
      self.master_candidate = False
4058

    
4059
    if self.op.readd:
4060
      self.new_node = old_node
4061
    else:
4062
      node_group = cfg.LookupNodeGroup(self.op.group)
4063
      self.new_node = objects.Node(name=node,
4064
                                   primary_ip=primary_ip,
4065
                                   secondary_ip=secondary_ip,
4066
                                   master_candidate=self.master_candidate,
4067
                                   offline=False, drained=False,
4068
                                   group=node_group)
4069

    
4070
    if self.op.ndparams:
4071
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4072

    
4073
  def Exec(self, feedback_fn):
4074
    """Adds the new node to the cluster.
4075

4076
    """
4077
    new_node = self.new_node
4078
    node = new_node.name
4079

    
4080
    # We adding a new node so we assume it's powered
4081
    new_node.powered = True
4082

    
4083
    # for re-adds, reset the offline/drained/master-candidate flags;
4084
    # we need to reset here, otherwise offline would prevent RPC calls
4085
    # later in the procedure; this also means that if the re-add
4086
    # fails, we are left with a non-offlined, broken node
4087
    if self.op.readd:
4088
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4089
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4090
      # if we demote the node, we do cleanup later in the procedure
4091
      new_node.master_candidate = self.master_candidate
4092
      if self.changed_primary_ip:
4093
        new_node.primary_ip = self.op.primary_ip
4094

    
4095
    # copy the master/vm_capable flags
4096
    for attr in self._NFLAGS:
4097
      setattr(new_node, attr, getattr(self.op, attr))
4098

    
4099
    # notify the user about any possible mc promotion
4100
    if new_node.master_candidate:
4101
      self.LogInfo("Node will be a master candidate")
4102

    
4103
    if self.op.ndparams:
4104
      new_node.ndparams = self.op.ndparams
4105
    else:
4106
      new_node.ndparams = {}
4107

    
4108
    # check connectivity
4109
    result = self.rpc.call_version([node])[node]
4110
    result.Raise("Can't get version information from node %s" % node)
4111
    if constants.PROTOCOL_VERSION == result.payload:
4112
      logging.info("Communication to node %s fine, sw version %s match",
4113
                   node, result.payload)
4114
    else:
4115
      raise errors.OpExecError("Version mismatch master version %s,"
4116
                               " node version %s" %
4117
                               (constants.PROTOCOL_VERSION, result.payload))
4118

    
4119
    # Add node to our /etc/hosts, and add key to known_hosts
4120
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4121
      master_node = self.cfg.GetMasterNode()
4122
      result = self.rpc.call_etc_hosts_modify(master_node,
4123
                                              constants.ETC_HOSTS_ADD,
4124
                                              self.hostname.name,
4125
                                              self.hostname.ip)
4126
      result.Raise("Can't update hosts file with new host data")
4127

    
4128
    if new_node.secondary_ip != new_node.primary_ip:
4129
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4130
                               False)
4131

    
4132
    node_verify_list = [self.cfg.GetMasterNode()]
4133
    node_verify_param = {
4134
      constants.NV_NODELIST: [node],
4135
      # TODO: do a node-net-test as well?
4136
    }
4137

    
4138
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4139
                                       self.cfg.GetClusterName())
4140
    for verifier in node_verify_list:
4141
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4142
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4143
      if nl_payload:
4144
        for failed in nl_payload:
4145
          feedback_fn("ssh/hostname verification failed"
4146
                      " (checking from %s): %s" %
4147
                      (verifier, nl_payload[failed]))
4148
        raise errors.OpExecError("ssh/hostname verification failed.")
4149

    
4150
    if self.op.readd:
4151
      _RedistributeAncillaryFiles(self)
4152
      self.context.ReaddNode(new_node)
4153
      # make sure we redistribute the config
4154
      self.cfg.Update(new_node, feedback_fn)
4155
      # and make sure the new node will not have old files around
4156
      if not new_node.master_candidate:
4157
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4158
        msg = result.fail_msg
4159
        if msg:
4160
          self.LogWarning("Node failed to demote itself from master"
4161
                          " candidate status: %s" % msg)
4162
    else:
4163
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4164
                                  additional_vm=self.op.vm_capable)
4165
      self.context.AddNode(new_node, self.proc.GetECId())
4166

    
4167

    
4168
class LUSetNodeParams(LogicalUnit):
4169
  """Modifies the parameters of a node.
4170

4171
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4172
      to the node role (as _ROLE_*)
4173
  @cvar _R2F: a dictionary from node role to tuples of flags
4174
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4175

4176
  """
4177
  HPATH = "node-modify"
4178
  HTYPE = constants.HTYPE_NODE
4179
  REQ_BGL = False
4180
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4181
  _F2R = {
4182
    (True, False, False): _ROLE_CANDIDATE,
4183
    (False, True, False): _ROLE_DRAINED,
4184
    (False, False, True): _ROLE_OFFLINE,
4185
    (False, False, False): _ROLE_REGULAR,
4186
    }
4187
  _R2F = dict((v, k) for k, v in _F2R.items())
4188
  _FLAGS = ["master_candidate", "drained", "offline"]
4189

    
4190
  def CheckArguments(self):
4191
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4192
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4193
                self.op.master_capable, self.op.vm_capable,
4194
                self.op.secondary_ip, self.op.ndparams]
4195
    if all_mods.count(None) == len(all_mods):
4196
      raise errors.OpPrereqError("Please pass at least one modification",
4197
                                 errors.ECODE_INVAL)
4198
    if all_mods.count(True) > 1:
4199
      raise errors.OpPrereqError("Can't set the node into more than one"
4200
                                 " state at the same time",
4201
                                 errors.ECODE_INVAL)
4202

    
4203
    # Boolean value that tells us whether we might be demoting from MC
4204
    self.might_demote = (self.op.master_candidate == False or
4205
                         self.op.offline == True or
4206
                         self.op.drained == True or
4207
                         self.op.master_capable == False)
4208

    
4209
    if self.op.secondary_ip:
4210
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4211
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4212
                                   " address" % self.op.secondary_ip,
4213
                                   errors.ECODE_INVAL)
4214

    
4215
    self.lock_all = self.op.auto_promote and self.might_demote
4216
    self.lock_instances = self.op.secondary_ip is not None
4217

    
4218
  def ExpandNames(self):
4219
    if self.lock_all:
4220
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4221
    else:
4222
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4223

    
4224
    if self.lock_instances:
4225
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4226

    
4227
  def DeclareLocks(self, level):
4228
    # If we have locked all instances, before waiting to lock nodes, release
4229
    # all the ones living on nodes unrelated to the current operation.
4230
    if level == locking.LEVEL_NODE and self.lock_instances:
4231
      instances_release = []
4232
      instances_keep = []
4233
      self.affected_instances = []
4234
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4235
        for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4236
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4237
          i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4238
          if i_mirrored and self.op.node_name in instance.all_nodes:
4239
            instances_keep.append(instance_name)
4240
            self.affected_instances.append(instance)
4241
          else:
4242
            instances_release.append(instance_name)
4243
        if instances_release:
4244
          self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4245
          self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4246

    
4247
  def BuildHooksEnv(self):
4248
    """Build hooks env.
4249

4250
    This runs on the master node.
4251

4252
    """
4253
    env = {
4254
      "OP_TARGET": self.op.node_name,
4255
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4256
      "OFFLINE": str(self.op.offline),
4257
      "DRAINED": str(self.op.drained),
4258
      "MASTER_CAPABLE": str(self.op.master_capable),
4259
      "VM_CAPABLE": str(self.op.vm_capable),
4260
      }
4261
    nl = [self.cfg.GetMasterNode(),
4262
          self.op.node_name]
4263
    return env, nl, nl
4264

    
4265
  def CheckPrereq(self):
4266
    """Check prerequisites.
4267

4268
    This only checks the instance list against the existing names.
4269

4270
    """
4271
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4272

    
4273
    if (self.op.master_candidate is not None or
4274
        self.op.drained is not None or
4275
        self.op.offline is not None):
4276
      # we can't change the master's node flags
4277
      if self.op.node_name == self.cfg.GetMasterNode():
4278
        raise errors.OpPrereqError("The master role can be changed"
4279
                                   " only via master-failover",
4280
                                   errors.ECODE_INVAL)
4281

    
4282
    if self.op.master_candidate and not node.master_capable:
4283
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4284
                                 " it a master candidate" % node.name,
4285
                                 errors.ECODE_STATE)
4286

    
4287
    if self.op.vm_capable == False:
4288
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4289
      if ipri or isec:
4290
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4291
                                   " the vm_capable flag" % node.name,
4292
                                   errors.ECODE_STATE)
4293

    
4294
    if node.master_candidate and self.might_demote and not self.lock_all:
4295
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
4296
      # check if after removing the current node, we're missing master
4297
      # candidates
4298
      (mc_remaining, mc_should, _) = \
4299
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4300
      if mc_remaining < mc_should:
4301
        raise errors.OpPrereqError("Not enough master candidates, please"
4302
                                   " pass auto_promote to allow promotion",
4303
                                   errors.ECODE_STATE)
4304

    
4305
    self.old_flags = old_flags = (node.master_candidate,
4306
                                  node.drained, node.offline)
4307
    assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4308
    self.old_role = old_role = self._F2R[old_flags]
4309

    
4310
    # Check for ineffective changes
4311
    for attr in self._FLAGS:
4312
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4313
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4314
        setattr(self.op, attr, None)
4315

    
4316
    # Past this point, any flag change to False means a transition
4317
    # away from the respective state, as only real changes are kept
4318

    
4319
    # TODO: We might query the real power state if it supports OOB
4320
    if _SupportsOob(self.cfg, node):
4321
      if self.op.offline is False and not (node.powered or
4322
                                           self.op.powered == True):
4323
        raise errors.OpPrereqError(("Please power on node %s first before you"
4324
                                    " can reset offline state") %
4325
                                   self.op.node_name)
4326
    elif self.op.powered is not None:
4327
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
4328
                                  " which does not support out-of-band"
4329
                                  " handling") % self.op.node_name)
4330

    
4331
    # If we're being deofflined/drained, we'll MC ourself if needed
4332
    if (self.op.drained == False or self.op.offline == False or
4333
        (self.op.master_capable and not node.master_capable)):
4334
      if _DecideSelfPromotion(self):
4335
        self.op.master_candidate = True
4336
        self.LogInfo("Auto-promoting node to master candidate")
4337

    
4338
    # If we're no longer master capable, we'll demote ourselves from MC
4339
    if self.op.master_capable == False and node.master_candidate:
4340
      self.LogInfo("Demoting from master candidate")
4341
      self.op.master_candidate = False
4342

    
4343
    # Compute new role
4344
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4345
    if self.op.master_candidate:
4346
      new_role = self._ROLE_CANDIDATE
4347
    elif self.op.drained:
4348
      new_role = self._ROLE_DRAINED
4349
    elif self.op.offline:
4350
      new_role = self._ROLE_OFFLINE
4351
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4352
      # False is still in new flags, which means we're un-setting (the
4353
      # only) True flag
4354
      new_role = self._ROLE_REGULAR
4355
    else: # no new flags, nothing, keep old role
4356
      new_role = old_role
4357

    
4358
    self.new_role = new_role
4359

    
4360
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4361
      # Trying to transition out of offline status
4362
      result = self.rpc.call_version([node.name])[node.name]
4363
      if result.fail_msg:
4364
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4365
                                   " to report its version: %s" %
4366
                                   (node.name, result.fail_msg),
4367
                                   errors.ECODE_STATE)
4368
      else:
4369
        self.LogWarning("Transitioning node from offline to online state"
4370
                        " without using re-add. Please make sure the node"
4371
                        " is healthy!")
4372

    
4373
    if self.op.secondary_ip:
4374
      # Ok even without locking, because this can't be changed by any LU
4375
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4376
      master_singlehomed = master.secondary_ip == master.primary_ip
4377
      if master_singlehomed and self.op.secondary_ip:
4378
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4379
                                   " homed cluster", errors.ECODE_INVAL)
4380

    
4381
      if node.offline:
4382
        if self.affected_instances:
4383
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4384
                                     " node has instances (%s) configured"
4385
                                     " to use it" % self.affected_instances)
4386
      else:
4387
        # On online nodes, check that no instances are running, and that
4388
        # the node has the new ip and we can reach it.
4389
        for instance in self.affected_instances:
4390
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4391

    
4392
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4393
        if master.name != node.name:
4394
          # check reachability from master secondary ip to new secondary ip
4395
          if not netutils.TcpPing(self.op.secondary_ip,
4396
                                  constants.DEFAULT_NODED_PORT,
4397
                                  source=master.secondary_ip):
4398
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4399
                                       " based ping to node daemon port",
4400
                                       errors.ECODE_ENVIRON)
4401

    
4402
    if self.op.ndparams:
4403
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4404
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4405
      self.new_ndparams = new_ndparams
4406

    
4407
  def Exec(self, feedback_fn):
4408
    """Modifies a node.
4409

4410
    """
4411
    node = self.node
4412
    old_role = self.old_role
4413
    new_role = self.new_role
4414

    
4415
    result = []
4416

    
4417
    if self.op.ndparams:
4418
      node.ndparams = self.new_ndparams
4419

    
4420
    if self.op.powered is not None:
4421
      node.powered = self.op.powered
4422

    
4423
    for attr in ["master_capable", "vm_capable"]:
4424
      val = getattr(self.op, attr)
4425
      if val is not None:
4426
        setattr(node, attr, val)
4427
        result.append((attr, str(val)))
4428

    
4429
    if new_role != old_role:
4430
      # Tell the node to demote itself, if no longer MC and not offline
4431
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4432
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4433
        if msg:
4434
          self.LogWarning("Node failed to demote itself: %s", msg)
4435

    
4436
      new_flags = self._R2F[new_role]
4437
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4438
        if of != nf:
4439
          result.append((desc, str(nf)))
4440
      (node.master_candidate, node.drained, node.offline) = new_flags
4441

    
4442
      # we locked all nodes, we adjust the CP before updating this node
4443
      if self.lock_all:
4444
        _AdjustCandidatePool(self, [node.name])
4445

    
4446
    if self.op.secondary_ip:
4447
      node.secondary_ip = self.op.secondary_ip
4448
      result.append(("secondary_ip", self.op.secondary_ip))
4449

    
4450
    # this will trigger configuration file update, if needed
4451
    self.cfg.Update(node, feedback_fn)
4452

    
4453
    # this will trigger job queue propagation or cleanup if the mc
4454
    # flag changed
4455
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4456
      self.context.ReaddNode(node)
4457

    
4458
    return result
4459

    
4460

    
4461
class LUPowercycleNode(NoHooksLU):
4462
  """Powercycles a node.
4463

4464
  """
4465
  REQ_BGL = False
4466

    
4467
  def CheckArguments(self):
4468
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4469
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4470
      raise errors.OpPrereqError("The node is the master and the force"
4471
                                 " parameter was not set",
4472
                                 errors.ECODE_INVAL)
4473

    
4474
  def ExpandNames(self):
4475
    """Locking for PowercycleNode.
4476

4477
    This is a last-resort option and shouldn't block on other
4478
    jobs. Therefore, we grab no locks.
4479

4480
    """
4481
    self.needed_locks = {}
4482

    
4483
  def Exec(self, feedback_fn):
4484
    """Reboots a node.
4485

4486
    """
4487
    result = self.rpc.call_node_powercycle(self.op.node_name,
4488
                                           self.cfg.GetHypervisorType())
4489
    result.Raise("Failed to schedule the reboot")
4490
    return result.payload
4491

    
4492

    
4493
class LUQueryClusterInfo(NoHooksLU):
4494
  """Query cluster configuration.
4495

4496
  """
4497
  REQ_BGL = False
4498

    
4499
  def ExpandNames(self):
4500
    self.needed_locks = {}
4501

    
4502
  def Exec(self, feedback_fn):
4503
    """Return cluster config.
4504

4505
    """
4506
    cluster = self.cfg.GetClusterInfo()
4507
    os_hvp = {}
4508

    
4509
    # Filter just for enabled hypervisors
4510
    for os_name, hv_dict in cluster.os_hvp.items():
4511
      os_hvp[os_name] = {}
4512
      for hv_name, hv_params in hv_dict.items():
4513
        if hv_name in cluster.enabled_hypervisors:
4514
          os_hvp[os_name][hv_name] = hv_params
4515

    
4516
    # Convert ip_family to ip_version
4517
    primary_ip_version = constants.IP4_VERSION
4518
    if cluster.primary_ip_family == netutils.IP6Address.family:
4519
      primary_ip_version = constants.IP6_VERSION
4520

    
4521
    result = {
4522
      "software_version": constants.RELEASE_VERSION,
4523
      "protocol_version": constants.PROTOCOL_VERSION,
4524
      "config_version": constants.CONFIG_VERSION,
4525
      "os_api_version": max(constants.OS_API_VERSIONS),
4526
      "export_version": constants.EXPORT_VERSION,
4527
      "architecture": (platform.architecture()[0], platform.machine()),
4528
      "name": cluster.cluster_name,
4529
      "master": cluster.master_node,
4530
      "default_hypervisor": cluster.enabled_hypervisors[0],
4531
      "enabled_hypervisors": cluster.enabled_hypervisors,
4532
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4533
                        for hypervisor_name in cluster.enabled_hypervisors]),
4534
      "os_hvp": os_hvp,
4535
      "beparams": cluster.beparams,
4536
      "osparams": cluster.osparams,
4537
      "nicparams": cluster.nicparams,
4538
      "candidate_pool_size": cluster.candidate_pool_size,
4539
      "master_netdev": cluster.master_netdev,
4540
      "volume_group_name": cluster.volume_group_name,
4541
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4542
      "file_storage_dir": cluster.file_storage_dir,
4543
      "maintain_node_health": cluster.maintain_node_health,
4544
      "ctime": cluster.ctime,
4545
      "mtime": cluster.mtime,
4546
      "uuid": cluster.uuid,
4547
      "tags": list(cluster.GetTags()),
4548
      "uid_pool": cluster.uid_pool,
4549
      "default_iallocator": cluster.default_iallocator,
4550
      "reserved_lvs": cluster.reserved_lvs,
4551
      "primary_ip_version": primary_ip_version,
4552
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4553
      }
4554

    
4555
    return result
4556

    
4557

    
4558
class LUQueryConfigValues(NoHooksLU):
4559
  """Return configuration values.
4560

4561
  """
4562
  REQ_BGL = False
4563
  _FIELDS_DYNAMIC = utils.FieldSet()
4564
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4565
                                  "watcher_pause", "volume_group_name")
4566

    
4567
  def CheckArguments(self):
4568
    _CheckOutputFields(static=self._FIELDS_STATIC,
4569
                       dynamic=self._FIELDS_DYNAMIC,
4570
                       selected=self.op.output_fields)
4571

    
4572
  def ExpandNames(self):
4573
    self.needed_locks = {}
4574

    
4575
  def Exec(self, feedback_fn):
4576
    """Dump a representation of the cluster config to the standard output.
4577

4578
    """
4579
    values = []
4580
    for field in self.op.output_fields:
4581
      if field == "cluster_name":
4582
        entry = self.cfg.GetClusterName()
4583
      elif field == "master_node":
4584
        entry = self.cfg.GetMasterNode()
4585
      elif field == "drain_flag":
4586
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4587
      elif field == "watcher_pause":
4588
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4589
      elif field == "volume_group_name":
4590
        entry = self.cfg.GetVGName()
4591
      else:
4592
        raise errors.ParameterError(field)
4593
      values.append(entry)
4594
    return values
4595

    
4596

    
4597
class LUActivateInstanceDisks(NoHooksLU):
4598
  """Bring up an instance's disks.
4599

4600
  """
4601
  REQ_BGL = False
4602

    
4603
  def ExpandNames(self):
4604
    self._ExpandAndLockInstance()
4605
    self.needed_locks[locking.LEVEL_NODE] = []
4606
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4607

    
4608
  def DeclareLocks(self, level):
4609
    if level == locking.LEVEL_NODE:
4610
      self._LockInstancesNodes()
4611

    
4612
  def CheckPrereq(self):
4613
    """Check prerequisites.
4614

4615
    This checks that the instance is in the cluster.
4616

4617
    """
4618
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4619
    assert self.instance is not None, \
4620
      "Cannot retrieve locked instance %s" % self.op.instance_name
4621
    _CheckNodeOnline(self, self.instance.primary_node)
4622

    
4623
  def Exec(self, feedback_fn):
4624
    """Activate the disks.
4625

4626
    """
4627
    disks_ok, disks_info = \
4628
              _AssembleInstanceDisks(self, self.instance,
4629
                                     ignore_size=self.op.ignore_size)
4630
    if not disks_ok:
4631
      raise errors.OpExecError("Cannot activate block devices")
4632

    
4633
    return disks_info
4634

    
4635

    
4636
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4637
                           ignore_size=False):
4638
  """Prepare the block devices for an instance.
4639

4640
  This sets up the block devices on all nodes.
4641

4642
  @type lu: L{LogicalUnit}
4643
  @param lu: the logical unit on whose behalf we execute
4644
  @type instance: L{objects.Instance}
4645
  @param instance: the instance for whose disks we assemble
4646
  @type disks: list of L{objects.Disk} or None
4647
  @param disks: which disks to assemble (or all, if None)
4648
  @type ignore_secondaries: boolean
4649
  @param ignore_secondaries: if true, errors on secondary nodes
4650
      won't result in an error return from the function
4651
  @type ignore_size: boolean
4652
  @param ignore_size: if true, the current known size of the disk
4653
      will not be used during the disk activation, useful for cases
4654
      when the size is wrong
4655
  @return: False if the operation failed, otherwise a list of
4656
      (host, instance_visible_name, node_visible_name)
4657
      with the mapping from node devices to instance devices
4658

4659
  """
4660
  device_info = []
4661
  disks_ok = True
4662
  iname = instance.name
4663
  disks = _ExpandCheckDisks(instance, disks)
4664

    
4665
  # With the two passes mechanism we try to reduce the window of
4666
  # opportunity for the race condition of switching DRBD to primary
4667
  # before handshaking occured, but we do not eliminate it
4668

    
4669
  # The proper fix would be to wait (with some limits) until the
4670
  # connection has been made and drbd transitions from WFConnection
4671
  # into any other network-connected state (Connected, SyncTarget,
4672
  # SyncSource, etc.)
4673

    
4674
  # 1st pass, assemble on all nodes in secondary mode
4675
  for inst_disk in disks:
4676
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4677
      if ignore_size:
4678
        node_disk = node_disk.Copy()
4679
        node_disk.UnsetSize()
4680
      lu.cfg.SetDiskID(node_disk, node)
4681
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4682
      msg = result.fail_msg
4683
      if msg:
4684
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4685
                           " (is_primary=False, pass=1): %s",
4686
                           inst_disk.iv_name, node, msg)
4687
        if not ignore_secondaries:
4688
          disks_ok = False
4689

    
4690
  # FIXME: race condition on drbd migration to primary
4691

    
4692
  # 2nd pass, do only the primary node
4693
  for inst_disk in disks:
4694
    dev_path = None
4695

    
4696
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4697
      if node != instance.primary_node:
4698
        continue
4699
      if ignore_size:
4700
        node_disk = node_disk.Copy()
4701
        node_disk.UnsetSize()
4702
      lu.cfg.SetDiskID(node_disk, node)
4703
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4704
      msg = result.fail_msg
4705
      if msg:
4706
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4707
                           " (is_primary=True, pass=2): %s",
4708
                           inst_disk.iv_name, node, msg)
4709
        disks_ok = False
4710
      else:
4711
        dev_path = result.payload
4712

    
4713
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4714

    
4715
  # leave the disks configured for the primary node
4716
  # this is a workaround that would be fixed better by
4717
  # improving the logical/physical id handling
4718
  for disk in disks:
4719
    lu.cfg.SetDiskID(disk, instance.primary_node)
4720

    
4721
  return disks_ok, device_info
4722

    
4723

    
4724
def _StartInstanceDisks(lu, instance, force):
4725
  """Start the disks of an instance.
4726

4727
  """
4728
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4729
                                           ignore_secondaries=force)
4730
  if not disks_ok:
4731
    _ShutdownInstanceDisks(lu, instance)
4732
    if force is not None and not force:
4733
      lu.proc.LogWarning("", hint="If the message above refers to a"
4734
                         " secondary node,"
4735
                         " you can retry the operation using '--force'.")
4736
    raise errors.OpExecError("Disk consistency error")
4737

    
4738

    
4739
class LUDeactivateInstanceDisks(NoHooksLU):
4740
  """Shutdown an instance's disks.
4741

4742
  """
4743
  REQ_BGL = False
4744

    
4745
  def ExpandNames(self):
4746
    self._ExpandAndLockInstance()
4747
    self.needed_locks[locking.LEVEL_NODE] = []
4748
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4749

    
4750
  def DeclareLocks(self, level):
4751
    if level == locking.LEVEL_NODE:
4752
      self._LockInstancesNodes()
4753

    
4754
  def CheckPrereq(self):
4755
    """Check prerequisites.
4756

4757
    This checks that the instance is in the cluster.
4758

4759
    """
4760
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4761
    assert self.instance is not None, \
4762
      "Cannot retrieve locked instance %s" % self.op.instance_name
4763

    
4764
  def Exec(self, feedback_fn):
4765
    """Deactivate the disks
4766

4767
    """
4768
    instance = self.instance
4769
    _SafeShutdownInstanceDisks(self, instance)
4770

    
4771

    
4772
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4773
  """Shutdown block devices of an instance.
4774

4775
  This function checks if an instance is running, before calling
4776
  _ShutdownInstanceDisks.
4777

4778
  """
4779
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4780
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4781

    
4782

    
4783
def _ExpandCheckDisks(instance, disks):
4784
  """Return the instance disks selected by the disks list
4785

4786
  @type disks: list of L{objects.Disk} or None
4787
  @param disks: selected disks
4788
  @rtype: list of L{objects.Disk}
4789
  @return: selected instance disks to act on
4790

4791
  """
4792
  if disks is None:
4793
    return instance.disks
4794
  else:
4795
    if not set(disks).issubset(instance.disks):
4796
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4797
                                   " target instance")
4798
    return disks
4799

    
4800

    
4801
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4802
  """Shutdown block devices of an instance.
4803

4804
  This does the shutdown on all nodes of the instance.
4805

4806
  If the ignore_primary is false, errors on the primary node are
4807
  ignored.
4808

4809
  """
4810
  all_result = True
4811
  disks = _ExpandCheckDisks(instance, disks)
4812

    
4813
  for disk in disks:
4814
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4815
      lu.cfg.SetDiskID(top_disk, node)
4816
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4817
      msg = result.fail_msg
4818
      if msg:
4819
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4820
                      disk.iv_name, node, msg)
4821
        if ((node == instance.primary_node and not ignore_primary) or
4822
            (node != instance.primary_node and not result.offline)):
4823
          all_result = False
4824
  return all_result
4825

    
4826

    
4827
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4828
  """Checks if a node has enough free memory.
4829

4830
  This function check if a given node has the needed amount of free
4831
  memory. In case the node has less memory or we cannot get the
4832
  information from the node, this function raise an OpPrereqError
4833
  exception.
4834

4835
  @type lu: C{LogicalUnit}
4836
  @param lu: a logical unit from which we get configuration data
4837
  @type node: C{str}
4838
  @param node: the node to check
4839
  @type reason: C{str}
4840
  @param reason: string to use in the error message
4841
  @type requested: C{int}
4842
  @param requested: the amount of memory in MiB to check for
4843
  @type hypervisor_name: C{str}
4844
  @param hypervisor_name: the hypervisor to ask for memory stats
4845
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4846
      we cannot check the node
4847

4848
  """
4849
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
4850
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4851
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4852
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4853
  if not isinstance(free_mem, int):
4854
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4855
                               " was '%s'" % (node, free_mem),
4856
                               errors.ECODE_ENVIRON)
4857
  if requested > free_mem:
4858
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4859
                               " needed %s MiB, available %s MiB" %
4860
                               (node, reason, requested, free_mem),
4861
                               errors.ECODE_NORES)
4862

    
4863

    
4864
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
4865
  """Checks if nodes have enough free disk space in the all VGs.
4866

4867
  This function check if all given nodes have the needed amount of
4868
  free disk. In case any node has less disk or we cannot get the
4869
  information from the node, this function raise an OpPrereqError
4870
  exception.
4871

4872
  @type lu: C{LogicalUnit}
4873
  @param lu: a logical unit from which we get configuration data
4874
  @type nodenames: C{list}
4875
  @param nodenames: the list of node names to check
4876
  @type req_sizes: C{dict}
4877
  @param req_sizes: the hash of vg and corresponding amount of disk in
4878
      MiB to check for
4879
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
4880
      or we cannot check the node
4881

4882
  """
4883
  if req_sizes is not None:
4884
    for vg, req_size in req_sizes.iteritems():
4885
      _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
4886

    
4887

    
4888
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
4889
  """Checks if nodes have enough free disk space in the specified VG.
4890

4891
  This function check if all given nodes have the needed amount of
4892
  free disk. In case any node has less disk or we cannot get the
4893
  information from the node, this function raise an OpPrereqError
4894
  exception.
4895

4896
  @type lu: C{LogicalUnit}
4897
  @param lu: a logical unit from which we get configuration data
4898
  @type nodenames: C{list}
4899
  @param nodenames: the list of node names to check
4900
  @type vg: C{str}
4901
  @param vg: the volume group to check
4902
  @type requested: C{int}
4903
  @param requested: the amount of disk in MiB to check for
4904
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
4905
      or we cannot check the node
4906

4907
  """
4908
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
4909
  for node in nodenames:
4910
    info = nodeinfo[node]
4911
    info.Raise("Cannot get current information from node %s" % node,
4912
               prereq=True, ecode=errors.ECODE_ENVIRON)
4913
    vg_free = info.payload.get("vg_free", None)
4914
    if not isinstance(vg_free, int):
4915
      raise errors.OpPrereqError("Can't compute free disk space on node"
4916
                                 " %s for vg %s, result was '%s'" %
4917
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
4918
    if requested > vg_free:
4919
      raise errors.OpPrereqError("Not enough disk space on target node %s"
4920
                                 " vg %s: required %d MiB, available %d MiB" %
4921
                                 (node, vg, requested, vg_free),
4922
                                 errors.ECODE_NORES)
4923

    
4924

    
4925
class LUStartupInstance(LogicalUnit):
4926
  """Starts an instance.
4927

4928
  """
4929
  HPATH = "instance-start"
4930
  HTYPE = constants.HTYPE_INSTANCE
4931
  REQ_BGL = False
4932

    
4933
  def CheckArguments(self):
4934
    # extra beparams
4935
    if self.op.beparams:
4936
      # fill the beparams dict
4937
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4938

    
4939
  def ExpandNames(self):
4940
    self._ExpandAndLockInstance()
4941

    
4942
  def BuildHooksEnv(self):
4943
    """Build hooks env.
4944

4945
    This runs on master, primary and secondary nodes of the instance.
4946

4947
    """
4948
    env = {
4949
      "FORCE": self.op.force,
4950
      }
4951
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4952
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4953
    return env, nl, nl
4954

    
4955
  def CheckPrereq(self):
4956
    """Check prerequisites.
4957

4958
    This checks that the instance is in the cluster.
4959

4960
    """
4961
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4962
    assert self.instance is not None, \
4963
      "Cannot retrieve locked instance %s" % self.op.instance_name
4964

    
4965
    # extra hvparams
4966
    if self.op.hvparams:
4967
      # check hypervisor parameter syntax (locally)
4968
      cluster = self.cfg.GetClusterInfo()
4969
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4970
      filled_hvp = cluster.FillHV(instance)
4971
      filled_hvp.update(self.op.hvparams)
4972
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4973
      hv_type.CheckParameterSyntax(filled_hvp)
4974
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4975

    
4976
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
4977

    
4978
    if self.primary_offline and self.op.ignore_offline_nodes:
4979
      self.proc.LogWarning("Ignoring offline primary node")
4980

    
4981
      if self.op.hvparams or self.op.beparams:
4982
        self.proc.LogWarning("Overridden parameters are ignored")
4983
    else:
4984
      _CheckNodeOnline(self, instance.primary_node)
4985

    
4986
      bep = self.cfg.GetClusterInfo().FillBE(instance)
4987

    
4988
      # check bridges existence
4989
      _CheckInstanceBridgesExist(self, instance)
4990

    
4991
      remote_info = self.rpc.call_instance_info(instance.primary_node,
4992
                                                instance.name,
4993
                                                instance.hypervisor)
4994
      remote_info.Raise("Error checking node %s" % instance.primary_node,
4995
                        prereq=True, ecode=errors.ECODE_ENVIRON)
4996
      if not remote_info.payload: # not running already
4997
        _CheckNodeFreeMemory(self, instance.primary_node,
4998
                             "starting instance %s" % instance.name,
4999
                             bep[constants.BE_MEMORY], instance.hypervisor)
5000

    
5001
  def Exec(self, feedback_fn):
5002
    """Start the instance.
5003

5004
    """
5005
    instance = self.instance
5006
    force = self.op.force
5007

    
5008
    self.cfg.MarkInstanceUp(instance.name)
5009

    
5010
    if self.primary_offline:
5011
      assert self.op.ignore_offline_nodes
5012
      self.proc.LogInfo("Primary node offline, marked instance as started")
5013
    else:
5014
      node_current = instance.primary_node
5015

    
5016
      _StartInstanceDisks(self, instance, force)
5017

    
5018
      result = self.rpc.call_instance_start(node_current, instance,
5019
                                            self.op.hvparams, self.op.beparams)
5020
      msg = result.fail_msg
5021
      if msg:
5022
        _ShutdownInstanceDisks(self, instance)
5023
        raise errors.OpExecError("Could not start instance: %s" % msg)
5024

    
5025

    
5026
class LURebootInstance(LogicalUnit):
5027
  """Reboot an instance.
5028

5029
  """
5030
  HPATH = "instance-reboot"
5031
  HTYPE = constants.HTYPE_INSTANCE
5032
  REQ_BGL = False
5033

    
5034
  def ExpandNames(self):
5035
    self._ExpandAndLockInstance()
5036

    
5037
  def BuildHooksEnv(self):
5038
    """Build hooks env.
5039

5040
    This runs on master, primary and secondary nodes of the instance.
5041

5042
    """
5043
    env = {
5044
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5045
      "REBOOT_TYPE": self.op.reboot_type,
5046
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5047
      }
5048
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5049
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5050
    return env, nl, nl
5051

    
5052
  def CheckPrereq(self):
5053
    """Check prerequisites.
5054

5055
    This checks that the instance is in the cluster.
5056

5057
    """
5058
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5059
    assert self.instance is not None, \
5060
      "Cannot retrieve locked instance %s" % self.op.instance_name
5061

    
5062
    _CheckNodeOnline(self, instance.primary_node)
5063

    
5064
    # check bridges existence
5065
    _CheckInstanceBridgesExist(self, instance)
5066

    
5067
  def Exec(self, feedback_fn):
5068
    """Reboot the instance.
5069

5070
    """
5071
    instance = self.instance
5072
    ignore_secondaries = self.op.ignore_secondaries
5073
    reboot_type = self.op.reboot_type
5074

    
5075
    node_current = instance.primary_node
5076

    
5077
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5078
                       constants.INSTANCE_REBOOT_HARD]:
5079
      for disk in instance.disks:
5080
        self.cfg.SetDiskID(disk, node_current)
5081
      result = self.rpc.call_instance_reboot(node_current, instance,
5082
                                             reboot_type,
5083
                                             self.op.shutdown_timeout)
5084
      result.Raise("Could not reboot instance")
5085
    else:
5086
      result = self.rpc.call_instance_shutdown(node_current, instance,
5087
                                               self.op.shutdown_timeout)
5088
      result.Raise("Could not shutdown instance for full reboot")
5089
      _ShutdownInstanceDisks(self, instance)
5090
      _StartInstanceDisks(self, instance, ignore_secondaries)
5091
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5092
      msg = result.fail_msg
5093
      if msg:
5094
        _ShutdownInstanceDisks(self, instance)
5095
        raise errors.OpExecError("Could not start instance for"
5096
                                 " full reboot: %s" % msg)
5097

    
5098
    self.cfg.MarkInstanceUp(instance.name)
5099

    
5100

    
5101
class LUShutdownInstance(LogicalUnit):
5102
  """Shutdown an instance.
5103

5104
  """
5105
  HPATH = "instance-stop"
5106
  HTYPE = constants.HTYPE_INSTANCE
5107
  REQ_BGL = False
5108

    
5109
  def ExpandNames(self):
5110
    self._ExpandAndLockInstance()
5111

    
5112
  def BuildHooksEnv(self):
5113
    """Build hooks env.
5114

5115
    This runs on master, primary and secondary nodes of the instance.
5116

5117
    """
5118
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5119
    env["TIMEOUT"] = self.op.timeout
5120
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5121
    return env, nl, nl
5122

    
5123
  def CheckPrereq(self):
5124
    """Check prerequisites.
5125

5126
    This checks that the instance is in the cluster.
5127

5128
    """
5129
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5130
    assert self.instance is not None, \
5131
      "Cannot retrieve locked instance %s" % self.op.instance_name
5132

    
5133
    self.primary_offline = \
5134
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5135

    
5136
    if self.primary_offline and self.op.ignore_offline_nodes:
5137
      self.proc.LogWarning("Ignoring offline primary node")
5138
    else:
5139
      _CheckNodeOnline(self, self.instance.primary_node)
5140

    
5141
  def Exec(self, feedback_fn):
5142
    """Shutdown the instance.
5143

5144
    """
5145
    instance = self.instance
5146
    node_current = instance.primary_node
5147
    timeout = self.op.timeout
5148

    
5149
    self.cfg.MarkInstanceDown(instance.name)
5150

    
5151
    if self.primary_offline:
5152
      assert self.op.ignore_offline_nodes
5153
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5154
    else:
5155
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5156
      msg = result.fail_msg
5157
      if msg:
5158
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5159

    
5160
      _ShutdownInstanceDisks(self, instance)
5161

    
5162

    
5163
class LUReinstallInstance(LogicalUnit):
5164
  """Reinstall an instance.
5165

5166
  """
5167
  HPATH = "instance-reinstall"
5168
  HTYPE = constants.HTYPE_INSTANCE
5169
  REQ_BGL = False
5170

    
5171
  def ExpandNames(self):
5172
    self._ExpandAndLockInstance()
5173

    
5174
  def BuildHooksEnv(self):
5175
    """Build hooks env.
5176

5177
    This runs on master, primary and secondary nodes of the instance.
5178

5179
    """
5180
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5181
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5182
    return env, nl, nl
5183

    
5184
  def CheckPrereq(self):
5185
    """Check prerequisites.
5186

5187
    This checks that the instance is in the cluster and is not running.
5188

5189
    """
5190
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5191
    assert instance is not None, \
5192
      "Cannot retrieve locked instance %s" % self.op.instance_name
5193
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5194
                     " offline, cannot reinstall")
5195
    for node in instance.secondary_nodes:
5196
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5197
                       " cannot reinstall")
5198

    
5199
    if instance.disk_template == constants.DT_DISKLESS:
5200
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5201
                                 self.op.instance_name,
5202
                                 errors.ECODE_INVAL)
5203
    _CheckInstanceDown(self, instance, "cannot reinstall")
5204

    
5205
    if self.op.os_type is not None:
5206
      # OS verification
5207
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5208
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5209
      instance_os = self.op.os_type
5210
    else:
5211
      instance_os = instance.os
5212

    
5213
    nodelist = list(instance.all_nodes)
5214

    
5215
    if self.op.osparams:
5216
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5217
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5218
      self.os_inst = i_osdict # the new dict (without defaults)
5219
    else:
5220
      self.os_inst = None
5221

    
5222
    self.instance = instance
5223

    
5224
  def Exec(self, feedback_fn):
5225
    """Reinstall the instance.
5226

5227
    """
5228
    inst = self.instance
5229

    
5230
    if self.op.os_type is not None:
5231
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5232
      inst.os = self.op.os_type
5233
      # Write to configuration
5234
      self.cfg.Update(inst, feedback_fn)
5235

    
5236
    _StartInstanceDisks(self, inst, None)
5237
    try:
5238
      feedback_fn("Running the instance OS create scripts...")
5239
      # FIXME: pass debug option from opcode to backend
5240
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5241
                                             self.op.debug_level,
5242
                                             osparams=self.os_inst)
5243
      result.Raise("Could not install OS for instance %s on node %s" %
5244
                   (inst.name, inst.primary_node))
5245
    finally:
5246
      _ShutdownInstanceDisks(self, inst)
5247

    
5248

    
5249
class LURecreateInstanceDisks(LogicalUnit):
5250
  """Recreate an instance's missing disks.
5251

5252
  """
5253
  HPATH = "instance-recreate-disks"
5254
  HTYPE = constants.HTYPE_INSTANCE
5255
  REQ_BGL = False
5256

    
5257
  def ExpandNames(self):
5258
    self._ExpandAndLockInstance()
5259

    
5260
  def BuildHooksEnv(self):
5261
    """Build hooks env.
5262

5263
    This runs on master, primary and secondary nodes of the instance.
5264

5265
    """
5266
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5267
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5268
    return env, nl, nl
5269

    
5270
  def CheckPrereq(self):
5271
    """Check prerequisites.
5272

5273
    This checks that the instance is in the cluster and is not running.
5274

5275
    """
5276
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5277
    assert instance is not None, \
5278
      "Cannot retrieve locked instance %s" % self.op.instance_name
5279
    _CheckNodeOnline(self, instance.primary_node)
5280

    
5281
    if instance.disk_template == constants.DT_DISKLESS:
5282
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5283
                                 self.op.instance_name, errors.ECODE_INVAL)
5284
    _CheckInstanceDown(self, instance, "cannot recreate disks")
5285

    
5286
    if not self.op.disks:
5287
      self.op.disks = range(len(instance.disks))
5288
    else:
5289
      for idx in self.op.disks:
5290
        if idx >= len(instance.disks):
5291
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5292
                                     errors.ECODE_INVAL)
5293

    
5294
    self.instance = instance
5295

    
5296
  def Exec(self, feedback_fn):
5297
    """Recreate the disks.
5298

5299
    """
5300
    to_skip = []
5301
    for idx, _ in enumerate(self.instance.disks):
5302
      if idx not in self.op.disks: # disk idx has not been passed in
5303
        to_skip.append(idx)
5304
        continue
5305

    
5306
    _CreateDisks(self, self.instance, to_skip=to_skip)
5307

    
5308

    
5309
class LURenameInstance(LogicalUnit):
5310
  """Rename an instance.
5311

5312
  """
5313
  HPATH = "instance-rename"
5314
  HTYPE = constants.HTYPE_INSTANCE
5315

    
5316
  def CheckArguments(self):
5317
    """Check arguments.
5318

5319
    """
5320
    if self.op.ip_check and not self.op.name_check:
5321
      # TODO: make the ip check more flexible and not depend on the name check
5322
      raise errors.OpPrereqError("Cannot do ip check without a name check",
5323
                                 errors.ECODE_INVAL)
5324

    
5325
  def BuildHooksEnv(self):
5326
    """Build hooks env.
5327

5328
    This runs on master, primary and secondary nodes of the instance.
5329

5330
    """
5331
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5332
    env["INSTANCE_NEW_NAME"] = self.op.new_name
5333
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5334
    return env, nl, nl
5335

    
5336
  def CheckPrereq(self):
5337
    """Check prerequisites.
5338

5339
    This checks that the instance is in the cluster and is not running.
5340

5341
    """
5342
    self.op.instance_name = _ExpandInstanceName(self.cfg,
5343
                                                self.op.instance_name)
5344
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5345
    assert instance is not None
5346
    _CheckNodeOnline(self, instance.primary_node)
5347
    _CheckInstanceDown(self, instance, "cannot rename")
5348
    self.instance = instance
5349

    
5350
    new_name = self.op.new_name
5351
    if self.op.name_check:
5352
      hostname = netutils.GetHostname(name=new_name)
5353
      self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5354
                   hostname.name)
5355
      new_name = self.op.new_name = hostname.name
5356
      if (self.op.ip_check and
5357
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5358
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5359
                                   (hostname.ip, new_name),
5360
                                   errors.ECODE_NOTUNIQUE)
5361

    
5362
    instance_list = self.cfg.GetInstanceList()
5363
    if new_name in instance_list and new_name != instance.name:
5364
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5365
                                 new_name, errors.ECODE_EXISTS)
5366

    
5367
  def Exec(self, feedback_fn):
5368
    """Rename the instance.
5369

5370
    """
5371
    inst = self.instance
5372
    old_name = inst.name
5373

    
5374
    rename_file_storage = False
5375
    if (inst.disk_template == constants.DT_FILE and
5376
        self.op.new_name != inst.name):
5377
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5378
      rename_file_storage = True
5379

    
5380
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5381
    # Change the instance lock. This is definitely safe while we hold the BGL
5382
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5383
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5384

    
5385
    # re-read the instance from the configuration after rename
5386
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5387

    
5388
    if rename_file_storage:
5389
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5390
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5391
                                                     old_file_storage_dir,
5392
                                                     new_file_storage_dir)
5393
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5394
                   " (but the instance has been renamed in Ganeti)" %
5395
                   (inst.primary_node, old_file_storage_dir,
5396
                    new_file_storage_dir))
5397

    
5398
    _StartInstanceDisks(self, inst, None)
5399
    try:
5400
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5401
                                                 old_name, self.op.debug_level)
5402
      msg = result.fail_msg
5403
      if msg:
5404
        msg = ("Could not run OS rename script for instance %s on node %s"
5405
               " (but the instance has been renamed in Ganeti): %s" %
5406
               (inst.name, inst.primary_node, msg))
5407
        self.proc.LogWarning(msg)
5408
    finally:
5409
      _ShutdownInstanceDisks(self, inst)
5410

    
5411
    return inst.name
5412

    
5413

    
5414
class LURemoveInstance(LogicalUnit):
5415
  """Remove an instance.
5416

5417
  """
5418
  HPATH = "instance-remove"
5419
  HTYPE = constants.HTYPE_INSTANCE
5420
  REQ_BGL = False
5421

    
5422
  def ExpandNames(self):
5423
    self._ExpandAndLockInstance()
5424
    self.needed_locks[locking.LEVEL_NODE] = []
5425
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5426

    
5427
  def DeclareLocks(self, level):
5428
    if level == locking.LEVEL_NODE:
5429
      self._LockInstancesNodes()
5430

    
5431
  def BuildHooksEnv(self):
5432
    """Build hooks env.
5433

5434
    This runs on master, primary and secondary nodes of the instance.
5435

5436
    """
5437
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5438
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5439
    nl = [self.cfg.GetMasterNode()]
5440
    nl_post = list(self.instance.all_nodes) + nl
5441
    return env, nl, nl_post
5442

    
5443
  def CheckPrereq(self):
5444
    """Check prerequisites.
5445

5446
    This checks that the instance is in the cluster.
5447

5448
    """
5449
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5450
    assert self.instance is not None, \
5451
      "Cannot retrieve locked instance %s" % self.op.instance_name
5452

    
5453
  def Exec(self, feedback_fn):
5454
    """Remove the instance.
5455

5456
    """
5457
    instance = self.instance
5458
    logging.info("Shutting down instance %s on node %s",
5459
                 instance.name, instance.primary_node)
5460

    
5461
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5462
                                             self.op.shutdown_timeout)
5463
    msg = result.fail_msg
5464
    if msg:
5465
      if self.op.ignore_failures:
5466
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5467
      else:
5468
        raise errors.OpExecError("Could not shutdown instance %s on"
5469
                                 " node %s: %s" %
5470
                                 (instance.name, instance.primary_node, msg))
5471

    
5472
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5473

    
5474

    
5475
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5476
  """Utility function to remove an instance.
5477

5478
  """
5479
  logging.info("Removing block devices for instance %s", instance.name)
5480

    
5481
  if not _RemoveDisks(lu, instance):
5482
    if not ignore_failures:
5483
      raise errors.OpExecError("Can't remove instance's disks")
5484
    feedback_fn("Warning: can't remove instance's disks")
5485

    
5486
  logging.info("Removing instance %s out of cluster config", instance.name)
5487

    
5488
  lu.cfg.RemoveInstance(instance.name)
5489

    
5490
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5491
    "Instance lock removal conflict"
5492

    
5493
  # Remove lock for the instance
5494
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5495

    
5496

    
5497
class LUQueryInstances(NoHooksLU):
5498
  """Logical unit for querying instances.
5499

5500
  """
5501
  # pylint: disable-msg=W0142
5502
  REQ_BGL = False
5503

    
5504
  def CheckArguments(self):
5505
    self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5506
                             self.op.use_locking)
5507

    
5508
  def ExpandNames(self):
5509
    self.iq.ExpandNames(self)
5510

    
5511
  def DeclareLocks(self, level):
5512
    self.iq.DeclareLocks(self, level)
5513

    
5514
  def Exec(self, feedback_fn):
5515
    return self.iq.OldStyleQuery(self)
5516

    
5517

    
5518
class LUFailoverInstance(LogicalUnit):
5519
  """Failover an instance.
5520

5521
  """
5522
  HPATH = "instance-failover"
5523
  HTYPE = constants.HTYPE_INSTANCE
5524
  REQ_BGL = False
5525

    
5526
  def ExpandNames(self):
5527
    self._ExpandAndLockInstance()
5528
    self.needed_locks[locking.LEVEL_NODE] = []
5529
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5530

    
5531
  def DeclareLocks(self, level):
5532
    if level == locking.LEVEL_NODE:
5533
      self._LockInstancesNodes()
5534

    
5535
  def BuildHooksEnv(self):
5536
    """Build hooks env.
5537

5538
    This runs on master, primary and secondary nodes of the instance.
5539

5540
    """
5541
    instance = self.instance
5542
    source_node = instance.primary_node
5543
    target_node = instance.secondary_nodes[0]
5544
    env = {
5545
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5546
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5547
      "OLD_PRIMARY": source_node,
5548
      "OLD_SECONDARY": target_node,
5549
      "NEW_PRIMARY": target_node,
5550
      "NEW_SECONDARY": source_node,
5551
      }
5552
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5553
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5554
    nl_post = list(nl)
5555
    nl_post.append(source_node)
5556
    return env, nl, nl_post
5557

    
5558
  def CheckPrereq(self):
5559
    """Check prerequisites.
5560

5561
    This checks that the instance is in the cluster.
5562

5563
    """
5564
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5565
    assert self.instance is not None, \
5566
      "Cannot retrieve locked instance %s" % self.op.instance_name
5567

    
5568
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5569
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5570
      raise errors.OpPrereqError("Instance's disk layout is not"
5571
                                 " network mirrored, cannot failover.",
5572
                                 errors.ECODE_STATE)
5573

    
5574
    secondary_nodes = instance.secondary_nodes
5575
    if not secondary_nodes:
5576
      raise errors.ProgrammerError("no secondary node but using "
5577
                                   "a mirrored disk template")
5578

    
5579
    target_node = secondary_nodes[0]
5580
    _CheckNodeOnline(self, target_node)
5581
    _CheckNodeNotDrained(self, target_node)
5582
    if instance.admin_up:
5583
      # check memory requirements on the secondary node
5584
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5585
                           instance.name, bep[constants.BE_MEMORY],
5586
                           instance.hypervisor)
5587
    else:
5588
      self.LogInfo("Not checking memory on the secondary node as"
5589
                   " instance will not be started")
5590

    
5591
    # check bridge existance
5592
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5593

    
5594
  def Exec(self, feedback_fn):
5595
    """Failover an instance.
5596

5597
    The failover is done by shutting it down on its present node and
5598
    starting it on the secondary.
5599

5600
    """
5601
    instance = self.instance
5602
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5603

    
5604
    source_node = instance.primary_node
5605
    target_node = instance.secondary_nodes[0]
5606

    
5607
    if instance.admin_up:
5608
      feedback_fn("* checking disk consistency between source and target")
5609
      for dev in instance.disks:
5610
        # for drbd, these are drbd over lvm
5611
        if not _CheckDiskConsistency(self, dev, target_node, False):
5612
          if not self.op.ignore_consistency:
5613
            raise errors.OpExecError("Disk %s is degraded on target node,"
5614
                                     " aborting failover." % dev.iv_name)
5615
    else:
5616
      feedback_fn("* not checking disk consistency as instance is not running")
5617

    
5618
    feedback_fn("* shutting down instance on source node")
5619
    logging.info("Shutting down instance %s on node %s",
5620
                 instance.name, source_node)
5621

    
5622
    result = self.rpc.call_instance_shutdown(source_node, instance,
5623
                                             self.op.shutdown_timeout)
5624
    msg = result.fail_msg
5625
    if msg:
5626
      if self.op.ignore_consistency or primary_node.offline:
5627
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5628
                             " Proceeding anyway. Please make sure node"
5629
                             " %s is down. Error details: %s",
5630
                             instance.name, source_node, source_node, msg)
5631
      else:
5632
        raise errors.OpExecError("Could not shutdown instance %s on"
5633
                                 " node %s: %s" %
5634
                                 (instance.name, source_node, msg))
5635

    
5636
    feedback_fn("* deactivating the instance's disks on source node")
5637
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5638
      raise errors.OpExecError("Can't shut down the instance's disks.")
5639

    
5640
    instance.primary_node = target_node
5641
    # distribute new instance config to the other nodes
5642
    self.cfg.Update(instance, feedback_fn)
5643

    
5644
    # Only start the instance if it's marked as up
5645
    if instance.admin_up:
5646
      feedback_fn("* activating the instance's disks on target node")
5647
      logging.info("Starting instance %s on node %s",
5648
                   instance.name, target_node)
5649

    
5650
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5651
                                           ignore_secondaries=True)
5652
      if not disks_ok:
5653
        _ShutdownInstanceDisks(self, instance)
5654
        raise errors.OpExecError("Can't activate the instance's disks")
5655

    
5656
      feedback_fn("* starting the instance on the target node")
5657
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5658
      msg = result.fail_msg
5659
      if msg:
5660
        _ShutdownInstanceDisks(self, instance)
5661
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5662
                                 (instance.name, target_node, msg))
5663

    
5664

    
5665
class LUMigrateInstance(LogicalUnit):
5666
  """Migrate an instance.
5667

5668
  This is migration without shutting down, compared to the failover,
5669
  which is done with shutdown.
5670

5671
  """
5672
  HPATH = "instance-migrate"
5673
  HTYPE = constants.HTYPE_INSTANCE
5674
  REQ_BGL = False
5675

    
5676
  def ExpandNames(self):
5677
    self._ExpandAndLockInstance()
5678

    
5679
    self.needed_locks[locking.LEVEL_NODE] = []
5680
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5681

    
5682
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5683
                                       self.op.cleanup)
5684
    self.tasklets = [self._migrater]
5685

    
5686
  def DeclareLocks(self, level):
5687
    if level == locking.LEVEL_NODE:
5688
      self._LockInstancesNodes()
5689

    
5690
  def BuildHooksEnv(self):
5691
    """Build hooks env.
5692

5693
    This runs on master, primary and secondary nodes of the instance.
5694

5695
    """
5696
    instance = self._migrater.instance
5697
    source_node = instance.primary_node
5698
    target_node = instance.secondary_nodes[0]
5699
    env = _BuildInstanceHookEnvByObject(self, instance)
5700
    env["MIGRATE_LIVE"] = self._migrater.live
5701
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5702
    env.update({
5703
        "OLD_PRIMARY": source_node,
5704
        "OLD_SECONDARY": target_node,
5705
        "NEW_PRIMARY": target_node,
5706
        "NEW_SECONDARY": source_node,
5707
        })
5708
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5709
    nl_post = list(nl)
5710
    nl_post.append(source_node)
5711
    return env, nl, nl_post
5712

    
5713

    
5714
class LUMoveInstance(LogicalUnit):
5715
  """Move an instance by data-copying.
5716

5717
  """
5718
  HPATH = "instance-move"
5719
  HTYPE = constants.HTYPE_INSTANCE
5720
  REQ_BGL = False
5721

    
5722
  def ExpandNames(self):
5723
    self._ExpandAndLockInstance()
5724
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5725
    self.op.target_node = target_node
5726
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5727
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5728

    
5729
  def DeclareLocks(self, level):
5730
    if level == locking.LEVEL_NODE:
5731
      self._LockInstancesNodes(primary_only=True)
5732

    
5733
  def BuildHooksEnv(self):
5734
    """Build hooks env.
5735

5736
    This runs on master, primary and secondary nodes of the instance.
5737

5738
    """
5739
    env = {
5740
      "TARGET_NODE": self.op.target_node,
5741
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5742
      }
5743
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5744
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5745
                                       self.op.target_node]
5746
    return env, nl, nl
5747

    
5748
  def CheckPrereq(self):
5749
    """Check prerequisites.
5750

5751
    This checks that the instance is in the cluster.
5752

5753
    """
5754
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5755
    assert self.instance is not None, \
5756
      "Cannot retrieve locked instance %s" % self.op.instance_name
5757

    
5758
    node = self.cfg.GetNodeInfo(self.op.target_node)
5759
    assert node is not None, \
5760
      "Cannot retrieve locked node %s" % self.op.target_node
5761

    
5762
    self.target_node = target_node = node.name
5763

    
5764
    if target_node == instance.primary_node:
5765
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5766
                                 (instance.name, target_node),
5767
                                 errors.ECODE_STATE)
5768

    
5769
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5770

    
5771
    for idx, dsk in enumerate(instance.disks):
5772
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5773
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5774
                                   " cannot copy" % idx, errors.ECODE_STATE)
5775

    
5776
    _CheckNodeOnline(self, target_node)
5777
    _CheckNodeNotDrained(self, target_node)
5778
    _CheckNodeVmCapable(self, target_node)
5779

    
5780
    if instance.admin_up:
5781
      # check memory requirements on the secondary node
5782
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5783
                           instance.name, bep[constants.BE_MEMORY],
5784
                           instance.hypervisor)
5785
    else:
5786
      self.LogInfo("Not checking memory on the secondary node as"
5787
                   " instance will not be started")
5788

    
5789
    # check bridge existance
5790
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5791

    
5792
  def Exec(self, feedback_fn):
5793
    """Move an instance.
5794

5795
    The move is done by shutting it down on its present node, copying
5796
    the data over (slow) and starting it on the new node.
5797

5798
    """
5799
    instance = self.instance
5800

    
5801
    source_node = instance.primary_node
5802
    target_node = self.target_node
5803

    
5804
    self.LogInfo("Shutting down instance %s on source node %s",
5805
                 instance.name, source_node)
5806

    
5807
    result = self.rpc.call_instance_shutdown(source_node, instance,
5808
                                             self.op.shutdown_timeout)
5809
    msg = result.fail_msg
5810
    if msg:
5811
      if self.op.ignore_consistency:
5812
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5813
                             " Proceeding anyway. Please make sure node"
5814
                             " %s is down. Error details: %s",
5815
                             instance.name, source_node, source_node, msg)
5816
      else:
5817
        raise errors.OpExecError("Could not shutdown instance %s on"
5818
                                 " node %s: %s" %
5819
                                 (instance.name, source_node, msg))
5820

    
5821
    # create the target disks
5822
    try:
5823
      _CreateDisks(self, instance, target_node=target_node)
5824
    except errors.OpExecError:
5825
      self.LogWarning("Device creation failed, reverting...")
5826
      try:
5827
        _RemoveDisks(self, instance, target_node=target_node)
5828
      finally:
5829
        self.cfg.ReleaseDRBDMinors(instance.name)
5830
        raise
5831

    
5832
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5833

    
5834
    errs = []
5835
    # activate, get path, copy the data over
5836
    for idx, disk in enumerate(instance.disks):
5837
      self.LogInfo("Copying data for disk %d", idx)
5838
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5839
                                               instance.name, True)
5840
      if result.fail_msg:
5841
        self.LogWarning("Can't assemble newly created disk %d: %s",
5842
                        idx, result.fail_msg)
5843
        errs.append(result.fail_msg)
5844
        break
5845
      dev_path = result.payload
5846
      result = self.rpc.call_blockdev_export(source_node, disk,
5847
                                             target_node, dev_path,
5848
                                             cluster_name)
5849
      if result.fail_msg:
5850
        self.LogWarning("Can't copy data over for disk %d: %s",
5851
                        idx, result.fail_msg)
5852
        errs.append(result.fail_msg)
5853
        break
5854

    
5855
    if errs:
5856
      self.LogWarning("Some disks failed to copy, aborting")
5857
      try:
5858
        _RemoveDisks(self, instance, target_node=target_node)
5859
      finally:
5860
        self.cfg.ReleaseDRBDMinors(instance.name)
5861
        raise errors.OpExecError("Errors during disk copy: %s" %
5862
                                 (",".join(errs),))
5863

    
5864
    instance.primary_node = target_node
5865
    self.cfg.Update(instance, feedback_fn)
5866

    
5867
    self.LogInfo("Removing the disks on the original node")
5868
    _RemoveDisks(self, instance, target_node=source_node)
5869

    
5870
    # Only start the instance if it's marked as up
5871
    if instance.admin_up:
5872
      self.LogInfo("Starting instance %s on node %s",
5873
                   instance.name, target_node)
5874

    
5875
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5876
                                           ignore_secondaries=True)
5877
      if not disks_ok:
5878
        _ShutdownInstanceDisks(self, instance)
5879
        raise errors.OpExecError("Can't activate the instance's disks")
5880

    
5881
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5882
      msg = result.fail_msg
5883
      if msg:
5884
        _ShutdownInstanceDisks(self, instance)
5885
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5886
                                 (instance.name, target_node, msg))
5887

    
5888

    
5889
class LUMigrateNode(LogicalUnit):
5890
  """Migrate all instances from a node.
5891

5892
  """
5893
  HPATH = "node-migrate"
5894
  HTYPE = constants.HTYPE_NODE
5895
  REQ_BGL = False
5896

    
5897
  def ExpandNames(self):
5898
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5899

    
5900
    self.needed_locks = {
5901
      locking.LEVEL_NODE: [self.op.node_name],
5902
      }
5903

    
5904
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5905

    
5906
    # Create tasklets for migrating instances for all instances on this node
5907
    names = []
5908
    tasklets = []
5909

    
5910
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5911
      logging.debug("Migrating instance %s", inst.name)
5912
      names.append(inst.name)
5913

    
5914
      tasklets.append(TLMigrateInstance(self, inst.name, False))
5915

    
5916
    self.tasklets = tasklets
5917

    
5918
    # Declare instance locks
5919
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5920

    
5921
  def DeclareLocks(self, level):
5922
    if level == locking.LEVEL_NODE:
5923
      self._LockInstancesNodes()
5924

    
5925
  def BuildHooksEnv(self):
5926
    """Build hooks env.
5927

5928
    This runs on the master, the primary and all the secondaries.
5929

5930
    """
5931
    env = {
5932
      "NODE_NAME": self.op.node_name,
5933
      }
5934

    
5935
    nl = [self.cfg.GetMasterNode()]
5936

    
5937
    return (env, nl, nl)
5938

    
5939

    
5940
class TLMigrateInstance(Tasklet):
5941
  """Tasklet class for instance migration.
5942

5943
  @type live: boolean
5944
  @ivar live: whether the migration will be done live or non-live;
5945
      this variable is initalized only after CheckPrereq has run
5946

5947
  """
5948
  def __init__(self, lu, instance_name, cleanup):
5949
    """Initializes this class.
5950

5951
    """
5952
    Tasklet.__init__(self, lu)
5953

    
5954
    # Parameters
5955
    self.instance_name = instance_name
5956
    self.cleanup = cleanup
5957
    self.live = False # will be overridden later
5958

    
5959
  def CheckPrereq(self):
5960
    """Check prerequisites.
5961

5962
    This checks that the instance is in the cluster.
5963

5964
    """
5965
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5966
    instance = self.cfg.GetInstanceInfo(instance_name)
5967
    assert instance is not None
5968

    
5969
    if instance.disk_template != constants.DT_DRBD8:
5970
      raise errors.OpPrereqError("Instance's disk layout is not"
5971
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5972

    
5973
    secondary_nodes = instance.secondary_nodes
5974
    if not secondary_nodes:
5975
      raise errors.ConfigurationError("No secondary node but using"
5976
                                      " drbd8 disk template")
5977

    
5978
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5979

    
5980
    target_node = secondary_nodes[0]
5981
    # check memory requirements on the secondary node
5982
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5983
                         instance.name, i_be[constants.BE_MEMORY],
5984
                         instance.hypervisor)
5985

    
5986
    # check bridge existance
5987
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5988

    
5989
    if not self.cleanup:
5990
      _CheckNodeNotDrained(self.lu, target_node)
5991
      result = self.rpc.call_instance_migratable(instance.primary_node,
5992
                                                 instance)
5993
      result.Raise("Can't migrate, please use failover",
5994
                   prereq=True, ecode=errors.ECODE_STATE)
5995

    
5996
    self.instance = instance
5997

    
5998
    if self.lu.op.live is not None and self.lu.op.mode is not None:
5999
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6000
                                 " parameters are accepted",
6001
                                 errors.ECODE_INVAL)
6002
    if self.lu.op.live is not None:
6003
      if self.lu.op.live:
6004
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
6005
      else:
6006
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6007
      # reset the 'live' parameter to None so that repeated
6008
      # invocations of CheckPrereq do not raise an exception
6009
      self.lu.op.live = None
6010
    elif self.lu.op.mode is None:
6011
      # read the default value from the hypervisor
6012
      i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6013
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6014

    
6015
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6016

    
6017
  def _WaitUntilSync(self):
6018
    """Poll with custom rpc for disk sync.
6019

6020
    This uses our own step-based rpc call.
6021

6022
    """
6023
    self.feedback_fn("* wait until resync is done")
6024
    all_done = False
6025
    while not all_done:
6026
      all_done = True
6027
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6028
                                            self.nodes_ip,
6029
                                            self.instance.disks)
6030
      min_percent = 100
6031
      for node, nres in result.items():
6032
        nres.Raise("Cannot resync disks on node %s" % node)
6033
        node_done, node_percent = nres.payload
6034
        all_done = all_done and node_done
6035
        if node_percent is not None:
6036
          min_percent = min(min_percent, node_percent)
6037
      if not all_done:
6038
        if min_percent < 100:
6039
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6040
        time.sleep(2)
6041

    
6042
  def _EnsureSecondary(self, node):
6043
    """Demote a node to secondary.
6044

6045
    """
6046
    self.feedback_fn("* switching node %s to secondary mode" % node)
6047

    
6048
    for dev in self.instance.disks:
6049
      self.cfg.SetDiskID(dev, node)
6050

    
6051
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6052
                                          self.instance.disks)
6053
    result.Raise("Cannot change disk to secondary on node %s" % node)
6054

    
6055
  def _GoStandalone(self):
6056
    """Disconnect from the network.
6057

6058
    """
6059
    self.feedback_fn("* changing into standalone mode")
6060
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6061
                                               self.instance.disks)
6062
    for node, nres in result.items():
6063
      nres.Raise("Cannot disconnect disks node %s" % node)
6064

    
6065
  def _GoReconnect(self, multimaster):
6066
    """Reconnect to the network.
6067

6068
    """
6069
    if multimaster:
6070
      msg = "dual-master"
6071
    else:
6072
      msg = "single-master"
6073
    self.feedback_fn("* changing disks into %s mode" % msg)
6074
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6075
                                           self.instance.disks,
6076
                                           self.instance.name, multimaster)
6077
    for node, nres in result.items():
6078
      nres.Raise("Cannot change disks config on node %s" % node)
6079

    
6080
  def _ExecCleanup(self):
6081
    """Try to cleanup after a failed migration.
6082

6083
    The cleanup is done by:
6084
      - check that the instance is running only on one node
6085
        (and update the config if needed)
6086
      - change disks on its secondary node to secondary
6087
      - wait until disks are fully synchronized
6088
      - disconnect from the network
6089
      - change disks into single-master mode
6090
      - wait again until disks are fully synchronized
6091

6092
    """
6093
    instance = self.instance
6094
    target_node = self.target_node
6095
    source_node = self.source_node
6096

    
6097
    # check running on only one node
6098
    self.feedback_fn("* checking where the instance actually runs"
6099
                     " (if this hangs, the hypervisor might be in"
6100
                     " a bad state)")
6101
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6102
    for node, result in ins_l.items():
6103
      result.Raise("Can't contact node %s" % node)
6104

    
6105
    runningon_source = instance.name in ins_l[source_node].payload
6106
    runningon_target = instance.name in ins_l[target_node].payload
6107

    
6108
    if runningon_source and runningon_target:
6109
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6110
                               " or the hypervisor is confused. You will have"
6111
                               " to ensure manually that it runs only on one"
6112
                               " and restart this operation.")
6113

    
6114
    if not (runningon_source or runningon_target):
6115
      raise errors.OpExecError("Instance does not seem to be running at all."
6116
                               " In this case, it's safer to repair by"
6117
                               " running 'gnt-instance stop' to ensure disk"
6118
                               " shutdown, and then restarting it.")
6119

    
6120
    if runningon_target:
6121
      # the migration has actually succeeded, we need to update the config
6122
      self.feedback_fn("* instance running on secondary node (%s),"
6123
                       " updating config" % target_node)
6124
      instance.primary_node = target_node
6125
      self.cfg.Update(instance, self.feedback_fn)
6126
      demoted_node = source_node
6127
    else:
6128
      self.feedback_fn("* instance confirmed to be running on its"
6129
                       " primary node (%s)" % source_node)
6130
      demoted_node = target_node
6131

    
6132
    self._EnsureSecondary(demoted_node)
6133
    try:
6134
      self._WaitUntilSync()
6135
    except errors.OpExecError:
6136
      # we ignore here errors, since if the device is standalone, it
6137
      # won't be able to sync
6138
      pass
6139
    self._GoStandalone()
6140
    self._GoReconnect(False)
6141
    self._WaitUntilSync()
6142

    
6143
    self.feedback_fn("* done")
6144

    
6145
  def _RevertDiskStatus(self):
6146
    """Try to revert the disk status after a failed migration.
6147

6148
    """
6149
    target_node = self.target_node
6150
    try:
6151
      self._EnsureSecondary(target_node)
6152
      self._GoStandalone()
6153
      self._GoReconnect(False)
6154
      self._WaitUntilSync()
6155
    except errors.OpExecError, err:
6156
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6157
                         " drives: error '%s'\n"
6158
                         "Please look and recover the instance status" %
6159
                         str(err))
6160

    
6161
  def _AbortMigration(self):
6162
    """Call the hypervisor code to abort a started migration.
6163

6164
    """
6165
    instance = self.instance
6166
    target_node = self.target_node
6167
    migration_info = self.migration_info
6168

    
6169
    abort_result = self.rpc.call_finalize_migration(target_node,
6170
                                                    instance,
6171
                                                    migration_info,
6172
                                                    False)
6173
    abort_msg = abort_result.fail_msg
6174
    if abort_msg:
6175
      logging.error("Aborting migration failed on target node %s: %s",
6176
                    target_node, abort_msg)
6177
      # Don't raise an exception here, as we stil have to try to revert the
6178
      # disk status, even if this step failed.
6179

    
6180
  def _ExecMigration(self):
6181
    """Migrate an instance.
6182

6183
    The migrate is done by:
6184
      - change the disks into dual-master mode
6185
      - wait until disks are fully synchronized again
6186
      - migrate the instance
6187
      - change disks on the new secondary node (the old primary) to secondary
6188
      - wait until disks are fully synchronized
6189
      - change disks into single-master mode
6190

6191
    """
6192
    instance = self.instance
6193
    target_node = self.target_node
6194
    source_node = self.source_node
6195

    
6196
    self.feedback_fn("* checking disk consistency between source and target")
6197
    for dev in instance.disks:
6198
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6199
        raise errors.OpExecError("Disk %s is degraded or not fully"
6200
                                 " synchronized on target node,"
6201
                                 " aborting migrate." % dev.iv_name)
6202

    
6203
    # First get the migration information from the remote node
6204
    result = self.rpc.call_migration_info(source_node, instance)
6205
    msg = result.fail_msg
6206
    if msg:
6207
      log_err = ("Failed fetching source migration information from %s: %s" %
6208
                 (source_node, msg))
6209
      logging.error(log_err)
6210
      raise errors.OpExecError(log_err)
6211

    
6212
    self.migration_info = migration_info = result.payload
6213

    
6214
    # Then switch the disks to master/master mode
6215
    self._EnsureSecondary(target_node)
6216
    self._GoStandalone()
6217
    self._GoReconnect(True)
6218
    self._WaitUntilSync()
6219

    
6220
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6221
    result = self.rpc.call_accept_instance(target_node,
6222
                                           instance,
6223
                                           migration_info,
6224
                                           self.nodes_ip[target_node])
6225

    
6226
    msg = result.fail_msg
6227
    if msg:
6228
      logging.error("Instance pre-migration failed, trying to revert"
6229
                    " disk status: %s", msg)
6230
      self.feedback_fn("Pre-migration failed, aborting")
6231
      self._AbortMigration()
6232
      self._RevertDiskStatus()
6233
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6234
                               (instance.name, msg))
6235

    
6236
    self.feedback_fn("* migrating instance to %s" % target_node)
6237
    time.sleep(10)
6238
    result = self.rpc.call_instance_migrate(source_node, instance,
6239
                                            self.nodes_ip[target_node],
6240
                                            self.live)
6241
    msg = result.fail_msg
6242
    if msg:
6243
      logging.error("Instance migration failed, trying to revert"
6244
                    " disk status: %s", msg)
6245
      self.feedback_fn("Migration failed, aborting")
6246
      self._AbortMigration()
6247
      self._RevertDiskStatus()
6248
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6249
                               (instance.name, msg))
6250
    time.sleep(10)
6251

    
6252
    instance.primary_node = target_node
6253
    # distribute new instance config to the other nodes
6254
    self.cfg.Update(instance, self.feedback_fn)
6255

    
6256
    result = self.rpc.call_finalize_migration(target_node,
6257
                                              instance,
6258
                                              migration_info,
6259
                                              True)
6260
    msg = result.fail_msg
6261
    if msg:
6262
      logging.error("Instance migration succeeded, but finalization failed:"
6263
                    " %s", msg)
6264
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6265
                               msg)
6266

    
6267
    self._EnsureSecondary(source_node)
6268
    self._WaitUntilSync()
6269
    self._GoStandalone()
6270
    self._GoReconnect(False)
6271
    self._WaitUntilSync()
6272

    
6273
    self.feedback_fn("* done")
6274

    
6275
  def Exec(self, feedback_fn):
6276
    """Perform the migration.
6277

6278
    """
6279
    feedback_fn("Migrating instance %s" % self.instance.name)
6280

    
6281
    self.feedback_fn = feedback_fn
6282

    
6283
    self.source_node = self.instance.primary_node
6284
    self.target_node = self.instance.secondary_nodes[0]
6285
    self.all_nodes = [self.source_node, self.target_node]
6286
    self.nodes_ip = {
6287
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6288
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6289
      }
6290

    
6291
    if self.cleanup:
6292
      return self._ExecCleanup()
6293
    else:
6294
      return self._ExecMigration()
6295

    
6296

    
6297
def _CreateBlockDev(lu, node, instance, device, force_create,
6298
                    info, force_open):
6299
  """Create a tree of block devices on a given node.
6300

6301
  If this device type has to be created on secondaries, create it and
6302
  all its children.
6303

6304
  If not, just recurse to children keeping the same 'force' value.
6305

6306
  @param lu: the lu on whose behalf we execute
6307
  @param node: the node on which to create the device
6308
  @type instance: L{objects.Instance}
6309
  @param instance: the instance which owns the device
6310
  @type device: L{objects.Disk}
6311
  @param device: the device to create
6312
  @type force_create: boolean
6313
  @param force_create: whether to force creation of this device; this
6314
      will be change to True whenever we find a device which has
6315
      CreateOnSecondary() attribute
6316
  @param info: the extra 'metadata' we should attach to the device
6317
      (this will be represented as a LVM tag)
6318
  @type force_open: boolean
6319
  @param force_open: this parameter will be passes to the
6320
      L{backend.BlockdevCreate} function where it specifies
6321
      whether we run on primary or not, and it affects both
6322
      the child assembly and the device own Open() execution
6323

6324
  """
6325
  if device.CreateOnSecondary():
6326
    force_create = True
6327

    
6328
  if device.children:
6329
    for child in device.children:
6330
      _CreateBlockDev(lu, node, instance, child, force_create,
6331
                      info, force_open)
6332

    
6333
  if not force_create:
6334
    return
6335

    
6336
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6337

    
6338

    
6339
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6340
  """Create a single block device on a given node.
6341

6342
  This will not recurse over children of the device, so they must be
6343
  created in advance.
6344

6345
  @param lu: the lu on whose behalf we execute
6346
  @param node: the node on which to create the device
6347
  @type instance: L{objects.Instance}
6348
  @param instance: the instance which owns the device
6349
  @type device: L{objects.Disk}
6350
  @param device: the device to create
6351
  @param info: the extra 'metadata' we should attach to the device
6352
      (this will be represented as a LVM tag)
6353
  @type force_open: boolean
6354
  @param force_open: this parameter will be passes to the
6355
      L{backend.BlockdevCreate} function where it specifies
6356
      whether we run on primary or not, and it affects both
6357
      the child assembly and the device own Open() execution
6358

6359
  """
6360
  lu.cfg.SetDiskID(device, node)
6361
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6362
                                       instance.name, force_open, info)
6363
  result.Raise("Can't create block device %s on"
6364
               " node %s for instance %s" % (device, node, instance.name))
6365
  if device.physical_id is None:
6366
    device.physical_id = result.payload
6367

    
6368

    
6369
def _GenerateUniqueNames(lu, exts):
6370
  """Generate a suitable LV name.
6371

6372
  This will generate a logical volume name for the given instance.
6373

6374
  """
6375
  results = []
6376
  for val in exts:
6377
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6378
    results.append("%s%s" % (new_id, val))
6379
  return results
6380

    
6381

    
6382
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6383
                         p_minor, s_minor):
6384
  """Generate a drbd8 device complete with its children.
6385

6386
  """
6387
  port = lu.cfg.AllocatePort()
6388
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6389
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6390
                          logical_id=(vgname, names[0]))
6391
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6392
                          logical_id=(vgname, names[1]))
6393
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6394
                          logical_id=(primary, secondary, port,
6395
                                      p_minor, s_minor,
6396
                                      shared_secret),
6397
                          children=[dev_data, dev_meta],
6398
                          iv_name=iv_name)
6399
  return drbd_dev
6400

    
6401

    
6402
def _GenerateDiskTemplate(lu, template_name,
6403
                          instance_name, primary_node,
6404
                          secondary_nodes, disk_info,
6405
                          file_storage_dir, file_driver,
6406
                          base_index, feedback_fn):
6407
  """Generate the entire disk layout for a given template type.
6408

6409
  """
6410
  #TODO: compute space requirements
6411

    
6412
  vgname = lu.cfg.GetVGName()
6413
  disk_count = len(disk_info)
6414
  disks = []
6415
  if template_name == constants.DT_DISKLESS:
6416
    pass
6417
  elif template_name == constants.DT_PLAIN:
6418
    if len(secondary_nodes) != 0:
6419
      raise errors.ProgrammerError("Wrong template configuration")
6420

    
6421
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6422
                                      for i in range(disk_count)])
6423
    for idx, disk in enumerate(disk_info):
6424
      disk_index = idx + base_index
6425
      vg = disk.get("vg", vgname)
6426
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6427
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6428
                              logical_id=(vg, names[idx]),
6429
                              iv_name="disk/%d" % disk_index,
6430
                              mode=disk["mode"])
6431
      disks.append(disk_dev)
6432
  elif template_name == constants.DT_DRBD8:
6433
    if len(secondary_nodes) != 1:
6434
      raise errors.ProgrammerError("Wrong template configuration")
6435
    remote_node = secondary_nodes[0]
6436
    minors = lu.cfg.AllocateDRBDMinor(
6437
      [primary_node, remote_node] * len(disk_info), instance_name)
6438

    
6439
    names = []
6440
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6441
                                               for i in range(disk_count)]):
6442
      names.append(lv_prefix + "_data")
6443
      names.append(lv_prefix + "_meta")
6444
    for idx, disk in enumerate(disk_info):
6445
      disk_index = idx + base_index
6446
      vg = disk.get("vg", vgname)
6447
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6448
                                      disk["size"], vg, names[idx*2:idx*2+2],
6449
                                      "disk/%d" % disk_index,
6450
                                      minors[idx*2], minors[idx*2+1])
6451
      disk_dev.mode = disk["mode"]
6452
      disks.append(disk_dev)
6453
  elif template_name == constants.DT_FILE:
6454
    if len(secondary_nodes) != 0:
6455
      raise errors.ProgrammerError("Wrong template configuration")
6456

    
6457
    opcodes.RequireFileStorage()
6458

    
6459
    for idx, disk in enumerate(disk_info):
6460
      disk_index = idx + base_index
6461
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6462
                              iv_name="disk/%d" % disk_index,
6463
                              logical_id=(file_driver,
6464
                                          "%s/disk%d" % (file_storage_dir,
6465
                                                         disk_index)),
6466
                              mode=disk["mode"])
6467
      disks.append(disk_dev)
6468
  else:
6469
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6470
  return disks
6471

    
6472

    
6473
def _GetInstanceInfoText(instance):
6474
  """Compute that text that should be added to the disk's metadata.
6475

6476
  """
6477
  return "originstname+%s" % instance.name
6478

    
6479

    
6480
def _CalcEta(time_taken, written, total_size):
6481
  """Calculates the ETA based on size written and total size.
6482

6483
  @param time_taken: The time taken so far
6484
  @param written: amount written so far
6485
  @param total_size: The total size of data to be written
6486
  @return: The remaining time in seconds
6487

6488
  """
6489
  avg_time = time_taken / float(written)
6490
  return (total_size - written) * avg_time
6491

    
6492

    
6493
def _WipeDisks(lu, instance):
6494
  """Wipes instance disks.
6495

6496
  @type lu: L{LogicalUnit}
6497
  @param lu: the logical unit on whose behalf we execute
6498
  @type instance: L{objects.Instance}
6499
  @param instance: the instance whose disks we should create
6500
  @return: the success of the wipe
6501

6502
  """
6503
  node = instance.primary_node
6504
  for idx, device in enumerate(instance.disks):
6505
    lu.LogInfo("* Wiping disk %d", idx)
6506
    logging.info("Wiping disk %d for instance %s", idx, instance.name)
6507

    
6508
    # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6509
    # MAX_WIPE_CHUNK at max
6510
    wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6511
                          constants.MIN_WIPE_CHUNK_PERCENT)
6512

    
6513
    offset = 0
6514
    size = device.size
6515
    last_output = 0
6516
    start_time = time.time()
6517

    
6518
    while offset < size:
6519
      wipe_size = min(wipe_chunk_size, size - offset)
6520
      result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6521
      result.Raise("Could not wipe disk %d at offset %d for size %d" %
6522
                   (idx, offset, wipe_size))
6523
      now = time.time()
6524
      offset += wipe_size
6525
      if now - last_output >= 60:
6526
        eta = _CalcEta(now - start_time, offset, size)
6527
        lu.LogInfo(" - done: %.1f%% ETA: %s" %
6528
                   (offset / float(size) * 100, utils.FormatSeconds(eta)))
6529
        last_output = now
6530

    
6531

    
6532
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6533
  """Create all disks for an instance.
6534

6535
  This abstracts away some work from AddInstance.
6536

6537
  @type lu: L{LogicalUnit}
6538
  @param lu: the logical unit on whose behalf we execute
6539
  @type instance: L{objects.Instance}
6540
  @param instance: the instance whose disks we should create
6541
  @type to_skip: list
6542
  @param to_skip: list of indices to skip
6543
  @type target_node: string
6544
  @param target_node: if passed, overrides the target node for creation
6545
  @rtype: boolean
6546
  @return: the success of the creation
6547

6548
  """
6549
  info = _GetInstanceInfoText(instance)
6550
  if target_node is None:
6551
    pnode = instance.primary_node
6552
    all_nodes = instance.all_nodes
6553
  else:
6554
    pnode = target_node
6555
    all_nodes = [pnode]
6556

    
6557
  if instance.disk_template == constants.DT_FILE:
6558
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6559
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6560

    
6561
    result.Raise("Failed to create directory '%s' on"
6562
                 " node %s" % (file_storage_dir, pnode))
6563

    
6564
  # Note: this needs to be kept in sync with adding of disks in
6565
  # LUSetInstanceParams
6566
  for idx, device in enumerate(instance.disks):
6567
    if to_skip and idx in to_skip:
6568
      continue
6569
    logging.info("Creating volume %s for instance %s",
6570
                 device.iv_name, instance.name)
6571
    #HARDCODE
6572
    for node in all_nodes:
6573
      f_create = node == pnode
6574
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6575

    
6576

    
6577
def _RemoveDisks(lu, instance, target_node=None):
6578
  """Remove all disks for an instance.
6579

6580
  This abstracts away some work from `AddInstance()` and
6581
  `RemoveInstance()`. Note that in case some of the devices couldn't
6582
  be removed, the removal will continue with the other ones (compare
6583
  with `_CreateDisks()`).
6584

6585
  @type lu: L{LogicalUnit}
6586
  @param lu: the logical unit on whose behalf we execute
6587
  @type instance: L{objects.Instance}
6588
  @param instance: the instance whose disks we should remove
6589
  @type target_node: string
6590
  @param target_node: used to override the node on which to remove the disks
6591
  @rtype: boolean
6592
  @return: the success of the removal
6593

6594
  """
6595
  logging.info("Removing block devices for instance %s", instance.name)
6596

    
6597
  all_result = True
6598
  for device in instance.disks:
6599
    if target_node:
6600
      edata = [(target_node, device)]
6601
    else:
6602
      edata = device.ComputeNodeTree(instance.primary_node)
6603
    for node, disk in edata:
6604
      lu.cfg.SetDiskID(disk, node)
6605
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6606
      if msg:
6607
        lu.LogWarning("Could not remove block device %s on node %s,"
6608
                      " continuing anyway: %s", device.iv_name, node, msg)
6609
        all_result = False
6610

    
6611
  if instance.disk_template == constants.DT_FILE:
6612
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6613
    if target_node:
6614
      tgt = target_node
6615
    else:
6616
      tgt = instance.primary_node
6617
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6618
    if result.fail_msg:
6619
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6620
                    file_storage_dir, instance.primary_node, result.fail_msg)
6621
      all_result = False
6622

    
6623
  return all_result
6624

    
6625

    
6626
def _ComputeDiskSizePerVG(disk_template, disks):
6627
  """Compute disk size requirements in the volume group
6628

6629
  """
6630
  def _compute(disks, payload):
6631
    """Universal algorithm
6632

6633
    """
6634
    vgs = {}
6635
    for disk in disks:
6636
      vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6637

    
6638
    return vgs
6639

    
6640
  # Required free disk space as a function of disk and swap space
6641
  req_size_dict = {
6642
    constants.DT_DISKLESS: None,
6643
    constants.DT_PLAIN: _compute(disks, 0),
6644
    # 128 MB are added for drbd metadata for each disk
6645
    constants.DT_DRBD8: _compute(disks, 128),
6646
    constants.DT_FILE: None,
6647
  }
6648

    
6649
  if disk_template not in req_size_dict:
6650
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6651
                                 " is unknown" %  disk_template)
6652

    
6653
  return req_size_dict[disk_template]
6654

    
6655

    
6656
def _ComputeDiskSize(disk_template, disks):
6657
  """Compute disk size requirements in the volume group
6658

6659
  """
6660
  # Required free disk space as a function of disk and swap space
6661
  req_size_dict = {
6662
    constants.DT_DISKLESS: None,
6663
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6664
    # 128 MB are added for drbd metadata for each disk
6665
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6666
    constants.DT_FILE: None,
6667
  }
6668

    
6669
  if disk_template not in req_size_dict:
6670
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6671
                                 " is unknown" %  disk_template)
6672

    
6673
  return req_size_dict[disk_template]
6674

    
6675

    
6676
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6677
  """Hypervisor parameter validation.
6678

6679
  This function abstract the hypervisor parameter validation to be
6680
  used in both instance create and instance modify.
6681

6682
  @type lu: L{LogicalUnit}
6683
  @param lu: the logical unit for which we check
6684
  @type nodenames: list
6685
  @param nodenames: the list of nodes on which we should check
6686
  @type hvname: string
6687
  @param hvname: the name of the hypervisor we should use
6688
  @type hvparams: dict
6689
  @param hvparams: the parameters which we need to check
6690
  @raise errors.OpPrereqError: if the parameters are not valid
6691

6692
  """
6693
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6694
                                                  hvname,
6695
                                                  hvparams)
6696
  for node in nodenames:
6697
    info = hvinfo[node]
6698
    if info.offline:
6699
      continue
6700
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6701

    
6702

    
6703
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6704
  """OS parameters validation.
6705

6706
  @type lu: L{LogicalUnit}
6707
  @param lu: the logical unit for which we check
6708
  @type required: boolean
6709
  @param required: whether the validation should fail if the OS is not
6710
      found
6711
  @type nodenames: list
6712
  @param nodenames: the list of nodes on which we should check
6713
  @type osname: string
6714
  @param osname: the name of the hypervisor we should use
6715
  @type osparams: dict
6716
  @param osparams: the parameters which we need to check
6717
  @raise errors.OpPrereqError: if the parameters are not valid
6718

6719
  """
6720
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6721
                                   [constants.OS_VALIDATE_PARAMETERS],
6722
                                   osparams)
6723
  for node, nres in result.items():
6724
    # we don't check for offline cases since this should be run only
6725
    # against the master node and/or an instance's nodes
6726
    nres.Raise("OS Parameters validation failed on node %s" % node)
6727
    if not nres.payload:
6728
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6729
                 osname, node)
6730

    
6731

    
6732
class LUCreateInstance(LogicalUnit):
6733
  """Create an instance.
6734

6735
  """
6736
  HPATH = "instance-add"
6737
  HTYPE = constants.HTYPE_INSTANCE
6738
  REQ_BGL = False
6739

    
6740
  def CheckArguments(self):
6741
    """Check arguments.
6742

6743
    """
6744
    # do not require name_check to ease forward/backward compatibility
6745
    # for tools
6746
    if self.op.no_install and self.op.start:
6747
      self.LogInfo("No-installation mode selected, disabling startup")
6748
      self.op.start = False
6749
    # validate/normalize the instance name
6750
    self.op.instance_name = \
6751
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
6752

    
6753
    if self.op.ip_check and not self.op.name_check:
6754
      # TODO: make the ip check more flexible and not depend on the name check
6755
      raise errors.OpPrereqError("Cannot do ip check without a name check",
6756
                                 errors.ECODE_INVAL)
6757

    
6758
    # check nics' parameter names
6759
    for nic in self.op.nics:
6760
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6761

    
6762
    # check disks. parameter names and consistent adopt/no-adopt strategy
6763
    has_adopt = has_no_adopt = False
6764
    for disk in self.op.disks:
6765
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6766
      if "adopt" in disk:
6767
        has_adopt = True
6768
      else:
6769
        has_no_adopt = True
6770
    if has_adopt and has_no_adopt:
6771
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6772
                                 errors.ECODE_INVAL)
6773
    if has_adopt:
6774
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6775
        raise errors.OpPrereqError("Disk adoption is not supported for the"
6776
                                   " '%s' disk template" %
6777
                                   self.op.disk_template,
6778
                                   errors.ECODE_INVAL)
6779
      if self.op.iallocator is not None:
6780
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6781
                                   " iallocator script", errors.ECODE_INVAL)
6782
      if self.op.mode == constants.INSTANCE_IMPORT:
6783
        raise errors.OpPrereqError("Disk adoption not allowed for"
6784
                                   " instance import", errors.ECODE_INVAL)
6785

    
6786
    self.adopt_disks = has_adopt
6787

    
6788
    # instance name verification
6789
    if self.op.name_check:
6790
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6791
      self.op.instance_name = self.hostname1.name
6792
      # used in CheckPrereq for ip ping check
6793
      self.check_ip = self.hostname1.ip
6794
    else:
6795
      self.check_ip = None
6796

    
6797
    # file storage checks
6798
    if (self.op.file_driver and
6799
        not self.op.file_driver in constants.FILE_DRIVER):
6800
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6801
                                 self.op.file_driver, errors.ECODE_INVAL)
6802

    
6803
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6804
      raise errors.OpPrereqError("File storage directory path not absolute",
6805
                                 errors.ECODE_INVAL)
6806

    
6807
    ### Node/iallocator related checks
6808
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6809

    
6810
    if self.op.pnode is not None:
6811
      if self.op.disk_template in constants.DTS_NET_MIRROR:
6812
        if self.op.snode is None:
6813
          raise errors.OpPrereqError("The networked disk templates need"
6814
                                     " a mirror node", errors.ECODE_INVAL)
6815
      elif self.op.snode:
6816
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6817
                        " template")
6818
        self.op.snode = None
6819

    
6820
    self._cds = _GetClusterDomainSecret()
6821

    
6822
    if self.op.mode == constants.INSTANCE_IMPORT:
6823
      # On import force_variant must be True, because if we forced it at
6824
      # initial install, our only chance when importing it back is that it
6825
      # works again!
6826
      self.op.force_variant = True
6827

    
6828
      if self.op.no_install:
6829
        self.LogInfo("No-installation mode has no effect during import")
6830

    
6831
    elif self.op.mode == constants.INSTANCE_CREATE:
6832
      if self.op.os_type is None:
6833
        raise errors.OpPrereqError("No guest OS specified",
6834
                                   errors.ECODE_INVAL)
6835
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6836
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6837
                                   " installation" % self.op.os_type,
6838
                                   errors.ECODE_STATE)
6839
      if self.op.disk_template is None:
6840
        raise errors.OpPrereqError("No disk template specified",
6841
                                   errors.ECODE_INVAL)
6842

    
6843
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6844
      # Check handshake to ensure both clusters have the same domain secret
6845
      src_handshake = self.op.source_handshake
6846
      if not src_handshake:
6847
        raise errors.OpPrereqError("Missing source handshake",
6848
                                   errors.ECODE_INVAL)
6849

    
6850
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6851
                                                           src_handshake)
6852
      if errmsg:
6853
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6854
                                   errors.ECODE_INVAL)
6855

    
6856
      # Load and check source CA
6857
      self.source_x509_ca_pem = self.op.source_x509_ca
6858
      if not self.source_x509_ca_pem:
6859
        raise errors.OpPrereqError("Missing source X509 CA",
6860
                                   errors.ECODE_INVAL)
6861

    
6862
      try:
6863
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6864
                                                    self._cds)
6865
      except OpenSSL.crypto.Error, err:
6866
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6867
                                   (err, ), errors.ECODE_INVAL)
6868

    
6869
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6870
      if errcode is not None:
6871
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6872
                                   errors.ECODE_INVAL)
6873

    
6874
      self.source_x509_ca = cert
6875

    
6876
      src_instance_name = self.op.source_instance_name
6877
      if not src_instance_name:
6878
        raise errors.OpPrereqError("Missing source instance name",
6879
                                   errors.ECODE_INVAL)
6880

    
6881
      self.source_instance_name = \
6882
          netutils.GetHostname(name=src_instance_name).name
6883

    
6884
    else:
6885
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6886
                                 self.op.mode, errors.ECODE_INVAL)
6887

    
6888
  def ExpandNames(self):
6889
    """ExpandNames for CreateInstance.
6890

6891
    Figure out the right locks for instance creation.
6892

6893
    """
6894
    self.needed_locks = {}
6895

    
6896
    instance_name = self.op.instance_name
6897
    # this is just a preventive check, but someone might still add this
6898
    # instance in the meantime, and creation will fail at lock-add time
6899
    if instance_name in self.cfg.GetInstanceList():
6900
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6901
                                 instance_name, errors.ECODE_EXISTS)
6902

    
6903
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6904

    
6905
    if self.op.iallocator:
6906
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6907
    else:
6908
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6909
      nodelist = [self.op.pnode]
6910
      if self.op.snode is not None:
6911
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6912
        nodelist.append(self.op.snode)
6913
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6914

    
6915
    # in case of import lock the source node too
6916
    if self.op.mode == constants.INSTANCE_IMPORT:
6917
      src_node = self.op.src_node
6918
      src_path = self.op.src_path
6919

    
6920
      if src_path is None:
6921
        self.op.src_path = src_path = self.op.instance_name
6922

    
6923
      if src_node is None:
6924
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6925
        self.op.src_node = None
6926
        if os.path.isabs(src_path):
6927
          raise errors.OpPrereqError("Importing an instance from an absolute"
6928
                                     " path requires a source node option.",
6929
                                     errors.ECODE_INVAL)
6930
      else:
6931
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6932
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6933
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6934
        if not os.path.isabs(src_path):
6935
          self.op.src_path = src_path = \
6936
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6937

    
6938
  def _RunAllocator(self):
6939
    """Run the allocator based on input opcode.
6940

6941
    """
6942
    nics = [n.ToDict() for n in self.nics]
6943
    ial = IAllocator(self.cfg, self.rpc,
6944
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6945
                     name=self.op.instance_name,
6946
                     disk_template=self.op.disk_template,
6947
                     tags=[],
6948
                     os=self.op.os_type,
6949
                     vcpus=self.be_full[constants.BE_VCPUS],
6950
                     mem_size=self.be_full[constants.BE_MEMORY],
6951
                     disks=self.disks,
6952
                     nics=nics,
6953
                     hypervisor=self.op.hypervisor,
6954
                     )
6955

    
6956
    ial.Run(self.op.iallocator)
6957

    
6958
    if not ial.success:
6959
      raise errors.OpPrereqError("Can't compute nodes using"
6960
                                 " iallocator '%s': %s" %
6961
                                 (self.op.iallocator, ial.info),
6962
                                 errors.ECODE_NORES)
6963
    if len(ial.result) != ial.required_nodes:
6964
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6965
                                 " of nodes (%s), required %s" %
6966
                                 (self.op.iallocator, len(ial.result),
6967
                                  ial.required_nodes), errors.ECODE_FAULT)
6968
    self.op.pnode = ial.result[0]
6969
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6970
                 self.op.instance_name, self.op.iallocator,
6971
                 utils.CommaJoin(ial.result))
6972
    if ial.required_nodes == 2:
6973
      self.op.snode = ial.result[1]
6974

    
6975
  def BuildHooksEnv(self):
6976
    """Build hooks env.
6977

6978
    This runs on master, primary and secondary nodes of the instance.
6979

6980
    """
6981
    env = {
6982
      "ADD_MODE": self.op.mode,
6983
      }
6984
    if self.op.mode == constants.INSTANCE_IMPORT:
6985
      env["SRC_NODE"] = self.op.src_node
6986
      env["SRC_PATH"] = self.op.src_path
6987
      env["SRC_IMAGES"] = self.src_images
6988

    
6989
    env.update(_BuildInstanceHookEnv(
6990
      name=self.op.instance_name,
6991
      primary_node=self.op.pnode,
6992
      secondary_nodes=self.secondaries,
6993
      status=self.op.start,
6994
      os_type=self.op.os_type,
6995
      memory=self.be_full[constants.BE_MEMORY],
6996
      vcpus=self.be_full[constants.BE_VCPUS],
6997
      nics=_NICListToTuple(self, self.nics),
6998
      disk_template=self.op.disk_template,
6999
      disks=[(d["size"], d["mode"]) for d in self.disks],
7000
      bep=self.be_full,
7001
      hvp=self.hv_full,
7002
      hypervisor_name=self.op.hypervisor,
7003
    ))
7004

    
7005
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7006
          self.secondaries)
7007
    return env, nl, nl
7008

    
7009
  def _ReadExportInfo(self):
7010
    """Reads the export information from disk.
7011

7012
    It will override the opcode source node and path with the actual
7013
    information, if these two were not specified before.
7014

7015
    @return: the export information
7016

7017
    """
7018
    assert self.op.mode == constants.INSTANCE_IMPORT
7019

    
7020
    src_node = self.op.src_node
7021
    src_path = self.op.src_path
7022

    
7023
    if src_node is None:
7024
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7025
      exp_list = self.rpc.call_export_list(locked_nodes)
7026
      found = False
7027
      for node in exp_list:
7028
        if exp_list[node].fail_msg:
7029
          continue
7030
        if src_path in exp_list[node].payload:
7031
          found = True
7032
          self.op.src_node = src_node = node
7033
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7034
                                                       src_path)
7035
          break
7036
      if not found:
7037
        raise errors.OpPrereqError("No export found for relative path %s" %
7038
                                    src_path, errors.ECODE_INVAL)
7039

    
7040
    _CheckNodeOnline(self, src_node)
7041
    result = self.rpc.call_export_info(src_node, src_path)
7042
    result.Raise("No export or invalid export found in dir %s" % src_path)
7043

    
7044
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7045
    if not export_info.has_section(constants.INISECT_EXP):
7046
      raise errors.ProgrammerError("Corrupted export config",
7047
                                   errors.ECODE_ENVIRON)
7048

    
7049
    ei_version = export_info.get(constants.INISECT_EXP, "version")
7050
    if (int(ei_version) != constants.EXPORT_VERSION):
7051
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7052
                                 (ei_version, constants.EXPORT_VERSION),
7053
                                 errors.ECODE_ENVIRON)
7054
    return export_info
7055

    
7056
  def _ReadExportParams(self, einfo):
7057
    """Use export parameters as defaults.
7058

7059
    In case the opcode doesn't specify (as in override) some instance
7060
    parameters, then try to use them from the export information, if
7061
    that declares them.
7062

7063
    """
7064
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7065

    
7066
    if self.op.disk_template is None:
7067
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
7068
        self.op.disk_template = einfo.get(constants.INISECT_INS,
7069
                                          "disk_template")
7070
      else:
7071
        raise errors.OpPrereqError("No disk template specified and the export"
7072
                                   " is missing the disk_template information",
7073
                                   errors.ECODE_INVAL)
7074

    
7075
    if not self.op.disks:
7076
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7077
        disks = []
7078
        # TODO: import the disk iv_name too
7079
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7080
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7081
          disks.append({"size": disk_sz})
7082
        self.op.disks = disks
7083
      else:
7084
        raise errors.OpPrereqError("No disk info specified and the export"
7085
                                   " is missing the disk information",
7086
                                   errors.ECODE_INVAL)
7087

    
7088
    if (not self.op.nics and
7089
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7090
      nics = []
7091
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7092
        ndict = {}
7093
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7094
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7095
          ndict[name] = v
7096
        nics.append(ndict)
7097
      self.op.nics = nics
7098

    
7099
    if (self.op.hypervisor is None and
7100
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7101
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7102
    if einfo.has_section(constants.INISECT_HYP):
7103
      # use the export parameters but do not override the ones
7104
      # specified by the user
7105
      for name, value in einfo.items(constants.INISECT_HYP):
7106
        if name not in self.op.hvparams:
7107
          self.op.hvparams[name] = value
7108

    
7109
    if einfo.has_section(constants.INISECT_BEP):
7110
      # use the parameters, without overriding
7111
      for name, value in einfo.items(constants.INISECT_BEP):
7112
        if name not in self.op.beparams:
7113
          self.op.beparams[name] = value
7114
    else:
7115
      # try to read the parameters old style, from the main section
7116
      for name in constants.BES_PARAMETERS:
7117
        if (name not in self.op.beparams and
7118
            einfo.has_option(constants.INISECT_INS, name)):
7119
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7120

    
7121
    if einfo.has_section(constants.INISECT_OSP):
7122
      # use the parameters, without overriding
7123
      for name, value in einfo.items(constants.INISECT_OSP):
7124
        if name not in self.op.osparams:
7125
          self.op.osparams[name] = value
7126

    
7127
  def _RevertToDefaults(self, cluster):
7128
    """Revert the instance parameters to the default values.
7129

7130
    """
7131
    # hvparams
7132
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7133
    for name in self.op.hvparams.keys():
7134
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7135
        del self.op.hvparams[name]
7136
    # beparams
7137
    be_defs = cluster.SimpleFillBE({})
7138
    for name in self.op.beparams.keys():
7139
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7140
        del self.op.beparams[name]
7141
    # nic params
7142
    nic_defs = cluster.SimpleFillNIC({})
7143
    for nic in self.op.nics:
7144
      for name in constants.NICS_PARAMETERS:
7145
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7146
          del nic[name]
7147
    # osparams
7148
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7149
    for name in self.op.osparams.keys():
7150
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7151
        del self.op.osparams[name]
7152

    
7153
  def CheckPrereq(self):
7154
    """Check prerequisites.
7155

7156
    """
7157
    if self.op.mode == constants.INSTANCE_IMPORT:
7158
      export_info = self._ReadExportInfo()
7159
      self._ReadExportParams(export_info)
7160

    
7161
    if (not self.cfg.GetVGName() and
7162
        self.op.disk_template not in constants.DTS_NOT_LVM):
7163
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7164
                                 " instances", errors.ECODE_STATE)
7165

    
7166
    if self.op.hypervisor is None:
7167
      self.op.hypervisor = self.cfg.GetHypervisorType()
7168

    
7169
    cluster = self.cfg.GetClusterInfo()
7170
    enabled_hvs = cluster.enabled_hypervisors
7171
    if self.op.hypervisor not in enabled_hvs:
7172
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7173
                                 " cluster (%s)" % (self.op.hypervisor,
7174
                                  ",".join(enabled_hvs)),
7175
                                 errors.ECODE_STATE)
7176

    
7177
    # check hypervisor parameter syntax (locally)
7178
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7179
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7180
                                      self.op.hvparams)
7181
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7182
    hv_type.CheckParameterSyntax(filled_hvp)
7183
    self.hv_full = filled_hvp
7184
    # check that we don't specify global parameters on an instance
7185
    _CheckGlobalHvParams(self.op.hvparams)
7186

    
7187
    # fill and remember the beparams dict
7188
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7189
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7190

    
7191
    # build os parameters
7192
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7193

    
7194
    # now that hvp/bep are in final format, let's reset to defaults,
7195
    # if told to do so
7196
    if self.op.identify_defaults:
7197
      self._RevertToDefaults(cluster)
7198

    
7199
    # NIC buildup
7200
    self.nics = []
7201
    for idx, nic in enumerate(self.op.nics):
7202
      nic_mode_req = nic.get("mode", None)
7203
      nic_mode = nic_mode_req
7204
      if nic_mode is None:
7205
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7206

    
7207
      # in routed mode, for the first nic, the default ip is 'auto'
7208
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7209
        default_ip_mode = constants.VALUE_AUTO
7210
      else:
7211
        default_ip_mode = constants.VALUE_NONE
7212

    
7213
      # ip validity checks
7214
      ip = nic.get("ip", default_ip_mode)
7215
      if ip is None or ip.lower() == constants.VALUE_NONE:
7216
        nic_ip = None
7217
      elif ip.lower() == constants.VALUE_AUTO:
7218
        if not self.op.name_check:
7219
          raise errors.OpPrereqError("IP address set to auto but name checks"
7220
                                     " have been skipped",
7221
                                     errors.ECODE_INVAL)
7222
        nic_ip = self.hostname1.ip
7223
      else:
7224
        if not netutils.IPAddress.IsValid(ip):
7225
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7226
                                     errors.ECODE_INVAL)
7227
        nic_ip = ip
7228

    
7229
      # TODO: check the ip address for uniqueness
7230
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7231
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7232
                                   errors.ECODE_INVAL)
7233

    
7234
      # MAC address verification
7235
      mac = nic.get("mac", constants.VALUE_AUTO)
7236
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7237
        mac = utils.NormalizeAndValidateMac(mac)
7238

    
7239
        try:
7240
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7241
        except errors.ReservationError:
7242
          raise errors.OpPrereqError("MAC address %s already in use"
7243
                                     " in cluster" % mac,
7244
                                     errors.ECODE_NOTUNIQUE)
7245

    
7246
      # bridge verification
7247
      bridge = nic.get("bridge", None)
7248
      link = nic.get("link", None)
7249
      if bridge and link:
7250
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7251
                                   " at the same time", errors.ECODE_INVAL)
7252
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7253
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7254
                                   errors.ECODE_INVAL)
7255
      elif bridge:
7256
        link = bridge
7257

    
7258
      nicparams = {}
7259
      if nic_mode_req:
7260
        nicparams[constants.NIC_MODE] = nic_mode_req
7261
      if link:
7262
        nicparams[constants.NIC_LINK] = link
7263

    
7264
      check_params = cluster.SimpleFillNIC(nicparams)
7265
      objects.NIC.CheckParameterSyntax(check_params)
7266
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7267

    
7268
    # disk checks/pre-build
7269
    self.disks = []
7270
    for disk in self.op.disks:
7271
      mode = disk.get("mode", constants.DISK_RDWR)
7272
      if mode not in constants.DISK_ACCESS_SET:
7273
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7274
                                   mode, errors.ECODE_INVAL)
7275
      size = disk.get("size", None)
7276
      if size is None:
7277
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7278
      try:
7279
        size = int(size)
7280
      except (TypeError, ValueError):
7281
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7282
                                   errors.ECODE_INVAL)
7283
      vg = disk.get("vg", self.cfg.GetVGName())
7284
      new_disk = {"size": size, "mode": mode, "vg": vg}
7285
      if "adopt" in disk:
7286
        new_disk["adopt"] = disk["adopt"]
7287
      self.disks.append(new_disk)
7288

    
7289
    if self.op.mode == constants.INSTANCE_IMPORT:
7290

    
7291
      # Check that the new instance doesn't have less disks than the export
7292
      instance_disks = len(self.disks)
7293
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7294
      if instance_disks < export_disks:
7295
        raise errors.OpPrereqError("Not enough disks to import."
7296
                                   " (instance: %d, export: %d)" %
7297
                                   (instance_disks, export_disks),
7298
                                   errors.ECODE_INVAL)
7299

    
7300
      disk_images = []
7301
      for idx in range(export_disks):
7302
        option = 'disk%d_dump' % idx
7303
        if export_info.has_option(constants.INISECT_INS, option):
7304
          # FIXME: are the old os-es, disk sizes, etc. useful?
7305
          export_name = export_info.get(constants.INISECT_INS, option)
7306
          image = utils.PathJoin(self.op.src_path, export_name)
7307
          disk_images.append(image)
7308
        else:
7309
          disk_images.append(False)
7310

    
7311
      self.src_images = disk_images
7312

    
7313
      old_name = export_info.get(constants.INISECT_INS, 'name')
7314
      try:
7315
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7316
      except (TypeError, ValueError), err:
7317
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7318
                                   " an integer: %s" % str(err),
7319
                                   errors.ECODE_STATE)
7320
      if self.op.instance_name == old_name:
7321
        for idx, nic in enumerate(self.nics):
7322
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7323
            nic_mac_ini = 'nic%d_mac' % idx
7324
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7325

    
7326
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7327

    
7328
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7329
    if self.op.ip_check:
7330
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7331
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7332
                                   (self.check_ip, self.op.instance_name),
7333
                                   errors.ECODE_NOTUNIQUE)
7334

    
7335
    #### mac address generation
7336
    # By generating here the mac address both the allocator and the hooks get
7337
    # the real final mac address rather than the 'auto' or 'generate' value.
7338
    # There is a race condition between the generation and the instance object
7339
    # creation, which means that we know the mac is valid now, but we're not
7340
    # sure it will be when we actually add the instance. If things go bad
7341
    # adding the instance will abort because of a duplicate mac, and the
7342
    # creation job will fail.
7343
    for nic in self.nics:
7344
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7345
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7346

    
7347
    #### allocator run
7348

    
7349
    if self.op.iallocator is not None:
7350
      self._RunAllocator()
7351

    
7352
    #### node related checks
7353

    
7354
    # check primary node
7355
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7356
    assert self.pnode is not None, \
7357
      "Cannot retrieve locked node %s" % self.op.pnode
7358
    if pnode.offline:
7359
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7360
                                 pnode.name, errors.ECODE_STATE)
7361
    if pnode.drained:
7362
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7363
                                 pnode.name, errors.ECODE_STATE)
7364
    if not pnode.vm_capable:
7365
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7366
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
7367

    
7368
    self.secondaries = []
7369

    
7370
    # mirror node verification
7371
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7372
      if self.op.snode == pnode.name:
7373
        raise errors.OpPrereqError("The secondary node cannot be the"
7374
                                   " primary node.", errors.ECODE_INVAL)
7375
      _CheckNodeOnline(self, self.op.snode)
7376
      _CheckNodeNotDrained(self, self.op.snode)
7377
      _CheckNodeVmCapable(self, self.op.snode)
7378
      self.secondaries.append(self.op.snode)
7379

    
7380
    nodenames = [pnode.name] + self.secondaries
7381

    
7382
    if not self.adopt_disks:
7383
      # Check lv size requirements, if not adopting
7384
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7385
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7386

    
7387
    else: # instead, we must check the adoption data
7388
      all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7389
      if len(all_lvs) != len(self.disks):
7390
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7391
                                   errors.ECODE_INVAL)
7392
      for lv_name in all_lvs:
7393
        try:
7394
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7395
          # to ReserveLV uses the same syntax
7396
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7397
        except errors.ReservationError:
7398
          raise errors.OpPrereqError("LV named %s used by another instance" %
7399
                                     lv_name, errors.ECODE_NOTUNIQUE)
7400

    
7401
      vg_names = self.rpc.call_vg_list([pnode.name])
7402
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7403

    
7404
      node_lvs = self.rpc.call_lv_list([pnode.name],
7405
                                       vg_names[pnode.name].payload.keys()
7406
                                      )[pnode.name]
7407
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7408
      node_lvs = node_lvs.payload
7409

    
7410
      delta = all_lvs.difference(node_lvs.keys())
7411
      if delta:
7412
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7413
                                   utils.CommaJoin(delta),
7414
                                   errors.ECODE_INVAL)
7415
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7416
      if online_lvs:
7417
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7418
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7419
                                   errors.ECODE_STATE)
7420
      # update the size of disk based on what is found
7421
      for dsk in self.disks:
7422
        dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7423

    
7424
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7425

    
7426
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7427
    # check OS parameters (remotely)
7428
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7429

    
7430
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7431

    
7432
    # memory check on primary node
7433
    if self.op.start:
7434
      _CheckNodeFreeMemory(self, self.pnode.name,
7435
                           "creating instance %s" % self.op.instance_name,
7436
                           self.be_full[constants.BE_MEMORY],
7437
                           self.op.hypervisor)
7438

    
7439
    self.dry_run_result = list(nodenames)
7440

    
7441
  def Exec(self, feedback_fn):
7442
    """Create and add the instance to the cluster.
7443

7444
    """
7445
    instance = self.op.instance_name
7446
    pnode_name = self.pnode.name
7447

    
7448
    ht_kind = self.op.hypervisor
7449
    if ht_kind in constants.HTS_REQ_PORT:
7450
      network_port = self.cfg.AllocatePort()
7451
    else:
7452
      network_port = None
7453

    
7454
    if constants.ENABLE_FILE_STORAGE:
7455
      # this is needed because os.path.join does not accept None arguments
7456
      if self.op.file_storage_dir is None:
7457
        string_file_storage_dir = ""
7458
      else:
7459
        string_file_storage_dir = self.op.file_storage_dir
7460

    
7461
      # build the full file storage dir path
7462
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7463
                                        string_file_storage_dir, instance)
7464
    else:
7465
      file_storage_dir = ""
7466

    
7467
    disks = _GenerateDiskTemplate(self,
7468
                                  self.op.disk_template,
7469
                                  instance, pnode_name,
7470
                                  self.secondaries,
7471
                                  self.disks,
7472
                                  file_storage_dir,
7473
                                  self.op.file_driver,
7474
                                  0,
7475
                                  feedback_fn)
7476

    
7477
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7478
                            primary_node=pnode_name,
7479
                            nics=self.nics, disks=disks,
7480
                            disk_template=self.op.disk_template,
7481
                            admin_up=False,
7482
                            network_port=network_port,
7483
                            beparams=self.op.beparams,
7484
                            hvparams=self.op.hvparams,
7485
                            hypervisor=self.op.hypervisor,
7486
                            osparams=self.op.osparams,
7487
                            )
7488

    
7489
    if self.adopt_disks:
7490
      # rename LVs to the newly-generated names; we need to construct
7491
      # 'fake' LV disks with the old data, plus the new unique_id
7492
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7493
      rename_to = []
7494
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7495
        rename_to.append(t_dsk.logical_id)
7496
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7497
        self.cfg.SetDiskID(t_dsk, pnode_name)
7498
      result = self.rpc.call_blockdev_rename(pnode_name,
7499
                                             zip(tmp_disks, rename_to))
7500
      result.Raise("Failed to rename adoped LVs")
7501
    else:
7502
      feedback_fn("* creating instance disks...")
7503
      try:
7504
        _CreateDisks(self, iobj)
7505
      except errors.OpExecError:
7506
        self.LogWarning("Device creation failed, reverting...")
7507
        try:
7508
          _RemoveDisks(self, iobj)
7509
        finally:
7510
          self.cfg.ReleaseDRBDMinors(instance)
7511
          raise
7512

    
7513
      if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7514
        feedback_fn("* wiping instance disks...")
7515
        try:
7516
          _WipeDisks(self, iobj)
7517
        except errors.OpExecError:
7518
          self.LogWarning("Device wiping failed, reverting...")
7519
          try:
7520
            _RemoveDisks(self, iobj)
7521
          finally:
7522
            self.cfg.ReleaseDRBDMinors(instance)
7523
            raise
7524

    
7525
    feedback_fn("adding instance %s to cluster config" % instance)
7526

    
7527
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7528

    
7529
    # Declare that we don't want to remove the instance lock anymore, as we've
7530
    # added the instance to the config
7531
    del self.remove_locks[locking.LEVEL_INSTANCE]
7532
    # Unlock all the nodes
7533
    if self.op.mode == constants.INSTANCE_IMPORT:
7534
      nodes_keep = [self.op.src_node]
7535
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7536
                       if node != self.op.src_node]
7537
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7538
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7539
    else:
7540
      self.context.glm.release(locking.LEVEL_NODE)
7541
      del self.acquired_locks[locking.LEVEL_NODE]
7542

    
7543
    if self.op.wait_for_sync:
7544
      disk_abort = not _WaitForSync(self, iobj)
7545
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7546
      # make sure the disks are not degraded (still sync-ing is ok)
7547
      time.sleep(15)
7548
      feedback_fn("* checking mirrors status")
7549
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7550
    else:
7551
      disk_abort = False
7552

    
7553
    if disk_abort:
7554
      _RemoveDisks(self, iobj)
7555
      self.cfg.RemoveInstance(iobj.name)
7556
      # Make sure the instance lock gets removed
7557
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7558
      raise errors.OpExecError("There are some degraded disks for"
7559
                               " this instance")
7560

    
7561
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7562
      if self.op.mode == constants.INSTANCE_CREATE:
7563
        if not self.op.no_install:
7564
          feedback_fn("* running the instance OS create scripts...")
7565
          # FIXME: pass debug option from opcode to backend
7566
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7567
                                                 self.op.debug_level)
7568
          result.Raise("Could not add os for instance %s"
7569
                       " on node %s" % (instance, pnode_name))
7570

    
7571
      elif self.op.mode == constants.INSTANCE_IMPORT:
7572
        feedback_fn("* running the instance OS import scripts...")
7573

    
7574
        transfers = []
7575

    
7576
        for idx, image in enumerate(self.src_images):
7577
          if not image:
7578
            continue
7579

    
7580
          # FIXME: pass debug option from opcode to backend
7581
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7582
                                             constants.IEIO_FILE, (image, ),
7583
                                             constants.IEIO_SCRIPT,
7584
                                             (iobj.disks[idx], idx),
7585
                                             None)
7586
          transfers.append(dt)
7587

    
7588
        import_result = \
7589
          masterd.instance.TransferInstanceData(self, feedback_fn,
7590
                                                self.op.src_node, pnode_name,
7591
                                                self.pnode.secondary_ip,
7592
                                                iobj, transfers)
7593
        if not compat.all(import_result):
7594
          self.LogWarning("Some disks for instance %s on node %s were not"
7595
                          " imported successfully" % (instance, pnode_name))
7596

    
7597
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7598
        feedback_fn("* preparing remote import...")
7599
        # The source cluster will stop the instance before attempting to make a
7600
        # connection. In some cases stopping an instance can take a long time,
7601
        # hence the shutdown timeout is added to the connection timeout.
7602
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7603
                           self.op.source_shutdown_timeout)
7604
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7605

    
7606
        assert iobj.primary_node == self.pnode.name
7607
        disk_results = \
7608
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7609
                                        self.source_x509_ca,
7610
                                        self._cds, timeouts)
7611
        if not compat.all(disk_results):
7612
          # TODO: Should the instance still be started, even if some disks
7613
          # failed to import (valid for local imports, too)?
7614
          self.LogWarning("Some disks for instance %s on node %s were not"
7615
                          " imported successfully" % (instance, pnode_name))
7616

    
7617
        # Run rename script on newly imported instance
7618
        assert iobj.name == instance
7619
        feedback_fn("Running rename script for %s" % instance)
7620
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7621
                                                   self.source_instance_name,
7622
                                                   self.op.debug_level)
7623
        if result.fail_msg:
7624
          self.LogWarning("Failed to run rename script for %s on node"
7625
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7626

    
7627
      else:
7628
        # also checked in the prereq part
7629
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7630
                                     % self.op.mode)
7631

    
7632
    if self.op.start:
7633
      iobj.admin_up = True
7634
      self.cfg.Update(iobj, feedback_fn)
7635
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7636
      feedback_fn("* starting instance...")
7637
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7638
      result.Raise("Could not start instance")
7639

    
7640
    return list(iobj.all_nodes)
7641

    
7642

    
7643
class LUConnectConsole(NoHooksLU):
7644
  """Connect to an instance's console.
7645

7646
  This is somewhat special in that it returns the command line that
7647
  you need to run on the master node in order to connect to the
7648
  console.
7649

7650
  """
7651
  REQ_BGL = False
7652

    
7653
  def ExpandNames(self):
7654
    self._ExpandAndLockInstance()
7655

    
7656
  def CheckPrereq(self):
7657
    """Check prerequisites.
7658

7659
    This checks that the instance is in the cluster.
7660

7661
    """
7662
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7663
    assert self.instance is not None, \
7664
      "Cannot retrieve locked instance %s" % self.op.instance_name
7665
    _CheckNodeOnline(self, self.instance.primary_node)
7666

    
7667
  def Exec(self, feedback_fn):
7668
    """Connect to the console of an instance
7669

7670
    """
7671
    instance = self.instance
7672
    node = instance.primary_node
7673

    
7674
    node_insts = self.rpc.call_instance_list([node],
7675
                                             [instance.hypervisor])[node]
7676
    node_insts.Raise("Can't get node information from %s" % node)
7677

    
7678
    if instance.name not in node_insts.payload:
7679
      if instance.admin_up:
7680
        state = "ERROR_down"
7681
      else:
7682
        state = "ADMIN_down"
7683
      raise errors.OpExecError("Instance %s is not running (state %s)" %
7684
                               (instance.name, state))
7685

    
7686
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7687

    
7688
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7689
    cluster = self.cfg.GetClusterInfo()
7690
    # beparams and hvparams are passed separately, to avoid editing the
7691
    # instance and then saving the defaults in the instance itself.
7692
    hvparams = cluster.FillHV(instance)
7693
    beparams = cluster.FillBE(instance)
7694
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7695

    
7696
    console = objects.InstanceConsole(instance=instance.name,
7697
                                      kind=constants.CONS_SSH,
7698
                                      host=node,
7699
                                      user="root",
7700
                                      command=console_cmd)
7701

    
7702
    assert console.Validate()
7703

    
7704
    return console.ToDict()
7705

    
7706

    
7707
class LUReplaceDisks(LogicalUnit):
7708
  """Replace the disks of an instance.
7709

7710
  """
7711
  HPATH = "mirrors-replace"
7712
  HTYPE = constants.HTYPE_INSTANCE
7713
  REQ_BGL = False
7714

    
7715
  def CheckArguments(self):
7716
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7717
                                  self.op.iallocator)
7718

    
7719
  def ExpandNames(self):
7720
    self._ExpandAndLockInstance()
7721

    
7722
    if self.op.iallocator is not None:
7723
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7724

    
7725
    elif self.op.remote_node is not None:
7726
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7727
      self.op.remote_node = remote_node
7728

    
7729
      # Warning: do not remove the locking of the new secondary here
7730
      # unless DRBD8.AddChildren is changed to work in parallel;
7731
      # currently it doesn't since parallel invocations of
7732
      # FindUnusedMinor will conflict
7733
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7734
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7735

    
7736
    else:
7737
      self.needed_locks[locking.LEVEL_NODE] = []
7738
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7739

    
7740
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7741
                                   self.op.iallocator, self.op.remote_node,
7742
                                   self.op.disks, False, self.op.early_release)
7743

    
7744
    self.tasklets = [self.replacer]
7745

    
7746
  def DeclareLocks(self, level):
7747
    # If we're not already locking all nodes in the set we have to declare the
7748
    # instance's primary/secondary nodes.
7749
    if (level == locking.LEVEL_NODE and
7750
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7751
      self._LockInstancesNodes()
7752

    
7753
  def BuildHooksEnv(self):
7754
    """Build hooks env.
7755

7756
    This runs on the master, the primary and all the secondaries.
7757

7758
    """
7759
    instance = self.replacer.instance
7760
    env = {
7761
      "MODE": self.op.mode,
7762
      "NEW_SECONDARY": self.op.remote_node,
7763
      "OLD_SECONDARY": instance.secondary_nodes[0],
7764
      }
7765
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7766
    nl = [
7767
      self.cfg.GetMasterNode(),
7768
      instance.primary_node,
7769
      ]
7770
    if self.op.remote_node is not None:
7771
      nl.append(self.op.remote_node)
7772
    return env, nl, nl
7773

    
7774

    
7775
class TLReplaceDisks(Tasklet):
7776
  """Replaces disks for an instance.
7777

7778
  Note: Locking is not within the scope of this class.
7779

7780
  """
7781
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7782
               disks, delay_iallocator, early_release):
7783
    """Initializes this class.
7784

7785
    """
7786
    Tasklet.__init__(self, lu)
7787

    
7788
    # Parameters
7789
    self.instance_name = instance_name
7790
    self.mode = mode
7791
    self.iallocator_name = iallocator_name
7792
    self.remote_node = remote_node
7793
    self.disks = disks
7794
    self.delay_iallocator = delay_iallocator
7795
    self.early_release = early_release
7796

    
7797
    # Runtime data
7798
    self.instance = None
7799
    self.new_node = None
7800
    self.target_node = None
7801
    self.other_node = None
7802
    self.remote_node_info = None
7803
    self.node_secondary_ip = None
7804

    
7805
  @staticmethod
7806
  def CheckArguments(mode, remote_node, iallocator):
7807
    """Helper function for users of this class.
7808

7809
    """
7810
    # check for valid parameter combination
7811
    if mode == constants.REPLACE_DISK_CHG:
7812
      if remote_node is None and iallocator is None:
7813
        raise errors.OpPrereqError("When changing the secondary either an"
7814
                                   " iallocator script must be used or the"
7815
                                   " new node given", errors.ECODE_INVAL)
7816

    
7817
      if remote_node is not None and iallocator is not None:
7818
        raise errors.OpPrereqError("Give either the iallocator or the new"
7819
                                   " secondary, not both", errors.ECODE_INVAL)
7820

    
7821
    elif remote_node is not None or iallocator is not None:
7822
      # Not replacing the secondary
7823
      raise errors.OpPrereqError("The iallocator and new node options can"
7824
                                 " only be used when changing the"
7825
                                 " secondary node", errors.ECODE_INVAL)
7826

    
7827
  @staticmethod
7828
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7829
    """Compute a new secondary node using an IAllocator.
7830

7831
    """
7832
    ial = IAllocator(lu.cfg, lu.rpc,
7833
                     mode=constants.IALLOCATOR_MODE_RELOC,
7834
                     name=instance_name,
7835
                     relocate_from=relocate_from)
7836

    
7837
    ial.Run(iallocator_name)
7838

    
7839
    if not ial.success:
7840
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7841
                                 " %s" % (iallocator_name, ial.info),
7842
                                 errors.ECODE_NORES)
7843

    
7844
    if len(ial.result) != ial.required_nodes:
7845
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7846
                                 " of nodes (%s), required %s" %
7847
                                 (iallocator_name,
7848
                                  len(ial.result), ial.required_nodes),
7849
                                 errors.ECODE_FAULT)
7850

    
7851
    remote_node_name = ial.result[0]
7852

    
7853
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7854
               instance_name, remote_node_name)
7855

    
7856
    return remote_node_name
7857

    
7858
  def _FindFaultyDisks(self, node_name):
7859
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7860
                                    node_name, True)
7861

    
7862
  def CheckPrereq(self):
7863
    """Check prerequisites.
7864

7865
    This checks that the instance is in the cluster.
7866

7867
    """
7868
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7869
    assert instance is not None, \
7870
      "Cannot retrieve locked instance %s" % self.instance_name
7871

    
7872
    if instance.disk_template != constants.DT_DRBD8:
7873
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7874
                                 " instances", errors.ECODE_INVAL)
7875

    
7876
    if len(instance.secondary_nodes) != 1:
7877
      raise errors.OpPrereqError("The instance has a strange layout,"
7878
                                 " expected one secondary but found %d" %
7879
                                 len(instance.secondary_nodes),
7880
                                 errors.ECODE_FAULT)
7881

    
7882
    if not self.delay_iallocator:
7883
      self._CheckPrereq2()
7884

    
7885
  def _CheckPrereq2(self):
7886
    """Check prerequisites, second part.
7887

7888
    This function should always be part of CheckPrereq. It was separated and is
7889
    now called from Exec because during node evacuation iallocator was only
7890
    called with an unmodified cluster model, not taking planned changes into
7891
    account.
7892

7893
    """
7894
    instance = self.instance
7895
    secondary_node = instance.secondary_nodes[0]
7896

    
7897
    if self.iallocator_name is None:
7898
      remote_node = self.remote_node
7899
    else:
7900
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7901
                                       instance.name, instance.secondary_nodes)
7902

    
7903
    if remote_node is not None:
7904
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7905
      assert self.remote_node_info is not None, \
7906
        "Cannot retrieve locked node %s" % remote_node
7907
    else:
7908
      self.remote_node_info = None
7909

    
7910
    if remote_node == self.instance.primary_node:
7911
      raise errors.OpPrereqError("The specified node is the primary node of"
7912
                                 " the instance.", errors.ECODE_INVAL)
7913

    
7914
    if remote_node == secondary_node:
7915
      raise errors.OpPrereqError("The specified node is already the"
7916
                                 " secondary node of the instance.",
7917
                                 errors.ECODE_INVAL)
7918

    
7919
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7920
                                    constants.REPLACE_DISK_CHG):
7921
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7922
                                 errors.ECODE_INVAL)
7923

    
7924
    if self.mode == constants.REPLACE_DISK_AUTO:
7925
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7926
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7927

    
7928
      if faulty_primary and faulty_secondary:
7929
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7930
                                   " one node and can not be repaired"
7931
                                   " automatically" % self.instance_name,
7932
                                   errors.ECODE_STATE)
7933

    
7934
      if faulty_primary:
7935
        self.disks = faulty_primary
7936
        self.target_node = instance.primary_node
7937
        self.other_node = secondary_node
7938
        check_nodes = [self.target_node, self.other_node]
7939
      elif faulty_secondary:
7940
        self.disks = faulty_secondary
7941
        self.target_node = secondary_node
7942
        self.other_node = instance.primary_node
7943
        check_nodes = [self.target_node, self.other_node]
7944
      else:
7945
        self.disks = []
7946
        check_nodes = []
7947

    
7948
    else:
7949
      # Non-automatic modes
7950
      if self.mode == constants.REPLACE_DISK_PRI:
7951
        self.target_node = instance.primary_node
7952
        self.other_node = secondary_node
7953
        check_nodes = [self.target_node, self.other_node]
7954

    
7955
      elif self.mode == constants.REPLACE_DISK_SEC:
7956
        self.target_node = secondary_node
7957
        self.other_node = instance.primary_node
7958
        check_nodes = [self.target_node, self.other_node]
7959

    
7960
      elif self.mode == constants.REPLACE_DISK_CHG:
7961
        self.new_node = remote_node
7962
        self.other_node = instance.primary_node
7963
        self.target_node = secondary_node
7964
        check_nodes = [self.new_node, self.other_node]
7965

    
7966
        _CheckNodeNotDrained(self.lu, remote_node)
7967
        _CheckNodeVmCapable(self.lu, remote_node)
7968

    
7969
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7970
        assert old_node_info is not None
7971
        if old_node_info.offline and not self.early_release:
7972
          # doesn't make sense to delay the release
7973
          self.early_release = True
7974
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7975
                          " early-release mode", secondary_node)
7976

    
7977
      else:
7978
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7979
                                     self.mode)
7980

    
7981
      # If not specified all disks should be replaced
7982
      if not self.disks:
7983
        self.disks = range(len(self.instance.disks))
7984

    
7985
    for node in check_nodes:
7986
      _CheckNodeOnline(self.lu, node)
7987

    
7988
    # Check whether disks are valid
7989
    for disk_idx in self.disks:
7990
      instance.FindDisk(disk_idx)
7991

    
7992
    # Get secondary node IP addresses
7993
    node_2nd_ip = {}
7994

    
7995
    for node_name in [self.target_node, self.other_node, self.new_node]:
7996
      if node_name is not None:
7997
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7998

    
7999
    self.node_secondary_ip = node_2nd_ip
8000

    
8001
  def Exec(self, feedback_fn):
8002
    """Execute disk replacement.
8003

8004
    This dispatches the disk replacement to the appropriate handler.
8005

8006
    """
8007
    if self.delay_iallocator:
8008
      self._CheckPrereq2()
8009

    
8010
    if not self.disks:
8011
      feedback_fn("No disks need replacement")
8012
      return
8013

    
8014
    feedback_fn("Replacing disk(s) %s for %s" %
8015
                (utils.CommaJoin(self.disks), self.instance.name))
8016

    
8017
    activate_disks = (not self.instance.admin_up)
8018

    
8019
    # Activate the instance disks if we're replacing them on a down instance
8020
    if activate_disks:
8021
      _StartInstanceDisks(self.lu, self.instance, True)
8022

    
8023
    try:
8024
      # Should we replace the secondary node?
8025
      if self.new_node is not None:
8026
        fn = self._ExecDrbd8Secondary
8027
      else:
8028
        fn = self._ExecDrbd8DiskOnly
8029

    
8030
      return fn(feedback_fn)
8031

    
8032
    finally:
8033
      # Deactivate the instance disks if we're replacing them on a
8034
      # down instance
8035
      if activate_disks:
8036
        _SafeShutdownInstanceDisks(self.lu, self.instance)
8037

    
8038
  def _CheckVolumeGroup(self, nodes):
8039
    self.lu.LogInfo("Checking volume groups")
8040

    
8041
    vgname = self.cfg.GetVGName()
8042

    
8043
    # Make sure volume group exists on all involved nodes
8044
    results = self.rpc.call_vg_list(nodes)
8045
    if not results:
8046
      raise errors.OpExecError("Can't list volume groups on the nodes")
8047

    
8048
    for node in nodes:
8049
      res = results[node]
8050
      res.Raise("Error checking node %s" % node)
8051
      if vgname not in res.payload:
8052
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
8053
                                 (vgname, node))
8054

    
8055
  def _CheckDisksExistence(self, nodes):
8056
    # Check disk existence
8057
    for idx, dev in enumerate(self.instance.disks):
8058
      if idx not in self.disks:
8059
        continue
8060

    
8061
      for node in nodes:
8062
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8063
        self.cfg.SetDiskID(dev, node)
8064

    
8065
        result = self.rpc.call_blockdev_find(node, dev)
8066

    
8067
        msg = result.fail_msg
8068
        if msg or not result.payload:
8069
          if not msg:
8070
            msg = "disk not found"
8071
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8072
                                   (idx, node, msg))
8073

    
8074
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8075
    for idx, dev in enumerate(self.instance.disks):
8076
      if idx not in self.disks:
8077
        continue
8078

    
8079
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8080
                      (idx, node_name))
8081

    
8082
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8083
                                   ldisk=ldisk):
8084
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8085
                                 " replace disks for instance %s" %
8086
                                 (node_name, self.instance.name))
8087

    
8088
  def _CreateNewStorage(self, node_name):
8089
    vgname = self.cfg.GetVGName()
8090
    iv_names = {}
8091

    
8092
    for idx, dev in enumerate(self.instance.disks):
8093
      if idx not in self.disks:
8094
        continue
8095

    
8096
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8097

    
8098
      self.cfg.SetDiskID(dev, node_name)
8099

    
8100
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8101
      names = _GenerateUniqueNames(self.lu, lv_names)
8102

    
8103
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8104
                             logical_id=(vgname, names[0]))
8105
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8106
                             logical_id=(vgname, names[1]))
8107

    
8108
      new_lvs = [lv_data, lv_meta]
8109
      old_lvs = dev.children
8110
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8111

    
8112
      # we pass force_create=True to force the LVM creation
8113
      for new_lv in new_lvs:
8114
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8115
                        _GetInstanceInfoText(self.instance), False)
8116

    
8117
    return iv_names
8118

    
8119
  def _CheckDevices(self, node_name, iv_names):
8120
    for name, (dev, _, _) in iv_names.iteritems():
8121
      self.cfg.SetDiskID(dev, node_name)
8122

    
8123
      result = self.rpc.call_blockdev_find(node_name, dev)
8124

    
8125
      msg = result.fail_msg
8126
      if msg or not result.payload:
8127
        if not msg:
8128
          msg = "disk not found"
8129
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
8130
                                 (name, msg))
8131

    
8132
      if result.payload.is_degraded:
8133
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
8134

    
8135
  def _RemoveOldStorage(self, node_name, iv_names):
8136
    for name, (_, old_lvs, _) in iv_names.iteritems():
8137
      self.lu.LogInfo("Remove logical volumes for %s" % name)
8138

    
8139
      for lv in old_lvs:
8140
        self.cfg.SetDiskID(lv, node_name)
8141

    
8142
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8143
        if msg:
8144
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
8145
                             hint="remove unused LVs manually")
8146

    
8147
  def _ReleaseNodeLock(self, node_name):
8148
    """Releases the lock for a given node."""
8149
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8150

    
8151
  def _ExecDrbd8DiskOnly(self, feedback_fn):
8152
    """Replace a disk on the primary or secondary for DRBD 8.
8153

8154
    The algorithm for replace is quite complicated:
8155

8156
      1. for each disk to be replaced:
8157

8158
        1. create new LVs on the target node with unique names
8159
        1. detach old LVs from the drbd device
8160
        1. rename old LVs to name_replaced.<time_t>
8161
        1. rename new LVs to old LVs
8162
        1. attach the new LVs (with the old names now) to the drbd device
8163

8164
      1. wait for sync across all devices
8165

8166
      1. for each modified disk:
8167

8168
        1. remove old LVs (which have the name name_replaces.<time_t>)
8169

8170
    Failures are not very well handled.
8171

8172
    """
8173
    steps_total = 6
8174

    
8175
    # Step: check device activation
8176
    self.lu.LogStep(1, steps_total, "Check device existence")
8177
    self._CheckDisksExistence([self.other_node, self.target_node])
8178
    self._CheckVolumeGroup([self.target_node, self.other_node])
8179

    
8180
    # Step: check other node consistency
8181
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8182
    self._CheckDisksConsistency(self.other_node,
8183
                                self.other_node == self.instance.primary_node,
8184
                                False)
8185

    
8186
    # Step: create new storage
8187
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8188
    iv_names = self._CreateNewStorage(self.target_node)
8189

    
8190
    # Step: for each lv, detach+rename*2+attach
8191
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8192
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8193
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8194

    
8195
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8196
                                                     old_lvs)
8197
      result.Raise("Can't detach drbd from local storage on node"
8198
                   " %s for device %s" % (self.target_node, dev.iv_name))
8199
      #dev.children = []
8200
      #cfg.Update(instance)
8201

    
8202
      # ok, we created the new LVs, so now we know we have the needed
8203
      # storage; as such, we proceed on the target node to rename
8204
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8205
      # using the assumption that logical_id == physical_id (which in
8206
      # turn is the unique_id on that node)
8207

    
8208
      # FIXME(iustin): use a better name for the replaced LVs
8209
      temp_suffix = int(time.time())
8210
      ren_fn = lambda d, suff: (d.physical_id[0],
8211
                                d.physical_id[1] + "_replaced-%s" % suff)
8212

    
8213
      # Build the rename list based on what LVs exist on the node
8214
      rename_old_to_new = []
8215
      for to_ren in old_lvs:
8216
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8217
        if not result.fail_msg and result.payload:
8218
          # device exists
8219
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8220

    
8221
      self.lu.LogInfo("Renaming the old LVs on the target node")
8222
      result = self.rpc.call_blockdev_rename(self.target_node,
8223
                                             rename_old_to_new)
8224
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
8225

    
8226
      # Now we rename the new LVs to the old LVs
8227
      self.lu.LogInfo("Renaming the new LVs on the target node")
8228
      rename_new_to_old = [(new, old.physical_id)
8229
                           for old, new in zip(old_lvs, new_lvs)]
8230
      result = self.rpc.call_blockdev_rename(self.target_node,
8231
                                             rename_new_to_old)
8232
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8233

    
8234
      for old, new in zip(old_lvs, new_lvs):
8235
        new.logical_id = old.logical_id
8236
        self.cfg.SetDiskID(new, self.target_node)
8237

    
8238
      for disk in old_lvs:
8239
        disk.logical_id = ren_fn(disk, temp_suffix)
8240
        self.cfg.SetDiskID(disk, self.target_node)
8241

    
8242
      # Now that the new lvs have the old name, we can add them to the device
8243
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8244
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8245
                                                  new_lvs)
8246
      msg = result.fail_msg
8247
      if msg:
8248
        for new_lv in new_lvs:
8249
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
8250
                                               new_lv).fail_msg
8251
          if msg2:
8252
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8253
                               hint=("cleanup manually the unused logical"
8254
                                     "volumes"))
8255
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8256

    
8257
      dev.children = new_lvs
8258

    
8259
      self.cfg.Update(self.instance, feedback_fn)
8260

    
8261
    cstep = 5
8262
    if self.early_release:
8263
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8264
      cstep += 1
8265
      self._RemoveOldStorage(self.target_node, iv_names)
8266
      # WARNING: we release both node locks here, do not do other RPCs
8267
      # than WaitForSync to the primary node
8268
      self._ReleaseNodeLock([self.target_node, self.other_node])
8269

    
8270
    # Wait for sync
8271
    # This can fail as the old devices are degraded and _WaitForSync
8272
    # does a combined result over all disks, so we don't check its return value
8273
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8274
    cstep += 1
8275
    _WaitForSync(self.lu, self.instance)
8276

    
8277
    # Check all devices manually
8278
    self._CheckDevices(self.instance.primary_node, iv_names)
8279

    
8280
    # Step: remove old storage
8281
    if not self.early_release:
8282
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8283
      cstep += 1
8284
      self._RemoveOldStorage(self.target_node, iv_names)
8285

    
8286
  def _ExecDrbd8Secondary(self, feedback_fn):
8287
    """Replace the secondary node for DRBD 8.
8288

8289
    The algorithm for replace is quite complicated:
8290
      - for all disks of the instance:
8291
        - create new LVs on the new node with same names
8292
        - shutdown the drbd device on the old secondary
8293
        - disconnect the drbd network on the primary
8294
        - create the drbd device on the new secondary
8295
        - network attach the drbd on the primary, using an artifice:
8296
          the drbd code for Attach() will connect to the network if it
8297
          finds a device which is connected to the good local disks but
8298
          not network enabled
8299
      - wait for sync across all devices
8300
      - remove all disks from the old secondary
8301

8302
    Failures are not very well handled.
8303

8304
    """
8305
    steps_total = 6
8306

    
8307
    # Step: check device activation
8308
    self.lu.LogStep(1, steps_total, "Check device existence")
8309
    self._CheckDisksExistence([self.instance.primary_node])
8310
    self._CheckVolumeGroup([self.instance.primary_node])
8311

    
8312
    # Step: check other node consistency
8313
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8314
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8315

    
8316
    # Step: create new storage
8317
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8318
    for idx, dev in enumerate(self.instance.disks):
8319
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8320
                      (self.new_node, idx))
8321
      # we pass force_create=True to force LVM creation
8322
      for new_lv in dev.children:
8323
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8324
                        _GetInstanceInfoText(self.instance), False)
8325

    
8326
    # Step 4: dbrd minors and drbd setups changes
8327
    # after this, we must manually remove the drbd minors on both the
8328
    # error and the success paths
8329
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8330
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8331
                                         for dev in self.instance.disks],
8332
                                        self.instance.name)
8333
    logging.debug("Allocated minors %r", minors)
8334

    
8335
    iv_names = {}
8336
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8337
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8338
                      (self.new_node, idx))
8339
      # create new devices on new_node; note that we create two IDs:
8340
      # one without port, so the drbd will be activated without
8341
      # networking information on the new node at this stage, and one
8342
      # with network, for the latter activation in step 4
8343
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8344
      if self.instance.primary_node == o_node1:
8345
        p_minor = o_minor1
8346
      else:
8347
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8348
        p_minor = o_minor2
8349

    
8350
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8351
                      p_minor, new_minor, o_secret)
8352
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8353
                    p_minor, new_minor, o_secret)
8354

    
8355
      iv_names[idx] = (dev, dev.children, new_net_id)
8356
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8357
                    new_net_id)
8358
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8359
                              logical_id=new_alone_id,
8360
                              children=dev.children,
8361
                              size=dev.size)
8362
      try:
8363
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8364
                              _GetInstanceInfoText(self.instance), False)
8365
      except errors.GenericError:
8366
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8367
        raise
8368

    
8369
    # We have new devices, shutdown the drbd on the old secondary
8370
    for idx, dev in enumerate(self.instance.disks):
8371
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8372
      self.cfg.SetDiskID(dev, self.target_node)
8373
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8374
      if msg:
8375
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8376
                           "node: %s" % (idx, msg),
8377
                           hint=("Please cleanup this device manually as"
8378
                                 " soon as possible"))
8379

    
8380
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8381
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8382
                                               self.node_secondary_ip,
8383
                                               self.instance.disks)\
8384
                                              [self.instance.primary_node]
8385

    
8386
    msg = result.fail_msg
8387
    if msg:
8388
      # detaches didn't succeed (unlikely)
8389
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8390
      raise errors.OpExecError("Can't detach the disks from the network on"
8391
                               " old node: %s" % (msg,))
8392

    
8393
    # if we managed to detach at least one, we update all the disks of
8394
    # the instance to point to the new secondary
8395
    self.lu.LogInfo("Updating instance configuration")
8396
    for dev, _, new_logical_id in iv_names.itervalues():
8397
      dev.logical_id = new_logical_id
8398
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8399

    
8400
    self.cfg.Update(self.instance, feedback_fn)
8401

    
8402
    # and now perform the drbd attach
8403
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8404
                    " (standalone => connected)")
8405
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8406
                                            self.new_node],
8407
                                           self.node_secondary_ip,
8408
                                           self.instance.disks,
8409
                                           self.instance.name,
8410
                                           False)
8411
    for to_node, to_result in result.items():
8412
      msg = to_result.fail_msg
8413
      if msg:
8414
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8415
                           to_node, msg,
8416
                           hint=("please do a gnt-instance info to see the"
8417
                                 " status of disks"))
8418
    cstep = 5
8419
    if self.early_release:
8420
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8421
      cstep += 1
8422
      self._RemoveOldStorage(self.target_node, iv_names)
8423
      # WARNING: we release all node locks here, do not do other RPCs
8424
      # than WaitForSync to the primary node
8425
      self._ReleaseNodeLock([self.instance.primary_node,
8426
                             self.target_node,
8427
                             self.new_node])
8428

    
8429
    # Wait for sync
8430
    # This can fail as the old devices are degraded and _WaitForSync
8431
    # does a combined result over all disks, so we don't check its return value
8432
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8433
    cstep += 1
8434
    _WaitForSync(self.lu, self.instance)
8435

    
8436
    # Check all devices manually
8437
    self._CheckDevices(self.instance.primary_node, iv_names)
8438

    
8439
    # Step: remove old storage
8440
    if not self.early_release:
8441
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8442
      self._RemoveOldStorage(self.target_node, iv_names)
8443

    
8444

    
8445
class LURepairNodeStorage(NoHooksLU):
8446
  """Repairs the volume group on a node.
8447

8448
  """
8449
  REQ_BGL = False
8450

    
8451
  def CheckArguments(self):
8452
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8453

    
8454
    storage_type = self.op.storage_type
8455

    
8456
    if (constants.SO_FIX_CONSISTENCY not in
8457
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8458
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8459
                                 " repaired" % storage_type,
8460
                                 errors.ECODE_INVAL)
8461

    
8462
  def ExpandNames(self):
8463
    self.needed_locks = {
8464
      locking.LEVEL_NODE: [self.op.node_name],
8465
      }
8466

    
8467
  def _CheckFaultyDisks(self, instance, node_name):
8468
    """Ensure faulty disks abort the opcode or at least warn."""
8469
    try:
8470
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8471
                                  node_name, True):
8472
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8473
                                   " node '%s'" % (instance.name, node_name),
8474
                                   errors.ECODE_STATE)
8475
    except errors.OpPrereqError, err:
8476
      if self.op.ignore_consistency:
8477
        self.proc.LogWarning(str(err.args[0]))
8478
      else:
8479
        raise
8480

    
8481
  def CheckPrereq(self):
8482
    """Check prerequisites.
8483

8484
    """
8485
    # Check whether any instance on this node has faulty disks
8486
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8487
      if not inst.admin_up:
8488
        continue
8489
      check_nodes = set(inst.all_nodes)
8490
      check_nodes.discard(self.op.node_name)
8491
      for inst_node_name in check_nodes:
8492
        self._CheckFaultyDisks(inst, inst_node_name)
8493

    
8494
  def Exec(self, feedback_fn):
8495
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8496
                (self.op.name, self.op.node_name))
8497

    
8498
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8499
    result = self.rpc.call_storage_execute(self.op.node_name,
8500
                                           self.op.storage_type, st_args,
8501
                                           self.op.name,
8502
                                           constants.SO_FIX_CONSISTENCY)
8503
    result.Raise("Failed to repair storage unit '%s' on %s" %
8504
                 (self.op.name, self.op.node_name))
8505

    
8506

    
8507
class LUNodeEvacuationStrategy(NoHooksLU):
8508
  """Computes the node evacuation strategy.
8509

8510
  """
8511
  REQ_BGL = False
8512

    
8513
  def CheckArguments(self):
8514
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8515

    
8516
  def ExpandNames(self):
8517
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8518
    self.needed_locks = locks = {}
8519
    if self.op.remote_node is None:
8520
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8521
    else:
8522
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8523
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8524

    
8525
  def Exec(self, feedback_fn):
8526
    if self.op.remote_node is not None:
8527
      instances = []
8528
      for node in self.op.nodes:
8529
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8530
      result = []
8531
      for i in instances:
8532
        if i.primary_node == self.op.remote_node:
8533
          raise errors.OpPrereqError("Node %s is the primary node of"
8534
                                     " instance %s, cannot use it as"
8535
                                     " secondary" %
8536
                                     (self.op.remote_node, i.name),
8537
                                     errors.ECODE_INVAL)
8538
        result.append([i.name, self.op.remote_node])
8539
    else:
8540
      ial = IAllocator(self.cfg, self.rpc,
8541
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8542
                       evac_nodes=self.op.nodes)
8543
      ial.Run(self.op.iallocator, validate=True)
8544
      if not ial.success:
8545
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8546
                                 errors.ECODE_NORES)
8547
      result = ial.result
8548
    return result
8549

    
8550

    
8551
class LUGrowDisk(LogicalUnit):
8552
  """Grow a disk of an instance.
8553

8554
  """
8555
  HPATH = "disk-grow"
8556
  HTYPE = constants.HTYPE_INSTANCE
8557
  REQ_BGL = False
8558

    
8559
  def ExpandNames(self):
8560
    self._ExpandAndLockInstance()
8561
    self.needed_locks[locking.LEVEL_NODE] = []
8562
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8563

    
8564
  def DeclareLocks(self, level):
8565
    if level == locking.LEVEL_NODE:
8566
      self._LockInstancesNodes()
8567

    
8568
  def BuildHooksEnv(self):
8569
    """Build hooks env.
8570

8571
    This runs on the master, the primary and all the secondaries.
8572

8573
    """
8574
    env = {
8575
      "DISK": self.op.disk,
8576
      "AMOUNT": self.op.amount,
8577
      }
8578
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8579
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8580
    return env, nl, nl
8581

    
8582
  def CheckPrereq(self):
8583
    """Check prerequisites.
8584

8585
    This checks that the instance is in the cluster.
8586

8587
    """
8588
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8589
    assert instance is not None, \
8590
      "Cannot retrieve locked instance %s" % self.op.instance_name
8591
    nodenames = list(instance.all_nodes)
8592
    for node in nodenames:
8593
      _CheckNodeOnline(self, node)
8594

    
8595
    self.instance = instance
8596

    
8597
    if instance.disk_template not in constants.DTS_GROWABLE:
8598
      raise errors.OpPrereqError("Instance's disk layout does not support"
8599
                                 " growing.", errors.ECODE_INVAL)
8600

    
8601
    self.disk = instance.FindDisk(self.op.disk)
8602

    
8603
    if instance.disk_template != constants.DT_FILE:
8604
      # TODO: check the free disk space for file, when that feature
8605
      # will be supported
8606
      _CheckNodesFreeDiskPerVG(self, nodenames,
8607
                               {self.disk.physical_id[0]: self.op.amount})
8608

    
8609
  def Exec(self, feedback_fn):
8610
    """Execute disk grow.
8611

8612
    """
8613
    instance = self.instance
8614
    disk = self.disk
8615

    
8616
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8617
    if not disks_ok:
8618
      raise errors.OpExecError("Cannot activate block device to grow")
8619

    
8620
    for node in instance.all_nodes:
8621
      self.cfg.SetDiskID(disk, node)
8622
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8623
      result.Raise("Grow request failed to node %s" % node)
8624

    
8625
      # TODO: Rewrite code to work properly
8626
      # DRBD goes into sync mode for a short amount of time after executing the
8627
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8628
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8629
      # time is a work-around.
8630
      time.sleep(5)
8631

    
8632
    disk.RecordGrow(self.op.amount)
8633
    self.cfg.Update(instance, feedback_fn)
8634
    if self.op.wait_for_sync:
8635
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8636
      if disk_abort:
8637
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8638
                             " status.\nPlease check the instance.")
8639
      if not instance.admin_up:
8640
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8641
    elif not instance.admin_up:
8642
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8643
                           " not supposed to be running because no wait for"
8644
                           " sync mode was requested.")
8645

    
8646

    
8647
class LUQueryInstanceData(NoHooksLU):
8648
  """Query runtime instance data.
8649

8650
  """
8651
  REQ_BGL = False
8652

    
8653
  def ExpandNames(self):
8654
    self.needed_locks = {}
8655
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8656

    
8657
    if self.op.instances:
8658
      self.wanted_names = []
8659
      for name in self.op.instances:
8660
        full_name = _ExpandInstanceName(self.cfg, name)
8661
        self.wanted_names.append(full_name)
8662
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8663
    else:
8664
      self.wanted_names = None
8665
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8666

    
8667
    self.needed_locks[locking.LEVEL_NODE] = []
8668
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8669

    
8670
  def DeclareLocks(self, level):
8671
    if level == locking.LEVEL_NODE:
8672
      self._LockInstancesNodes()
8673

    
8674
  def CheckPrereq(self):
8675
    """Check prerequisites.
8676

8677
    This only checks the optional instance list against the existing names.
8678

8679
    """
8680
    if self.wanted_names is None:
8681
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8682

    
8683
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8684
                             in self.wanted_names]
8685

    
8686
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8687
    """Returns the status of a block device
8688

8689
    """
8690
    if self.op.static or not node:
8691
      return None
8692

    
8693
    self.cfg.SetDiskID(dev, node)
8694

    
8695
    result = self.rpc.call_blockdev_find(node, dev)
8696
    if result.offline:
8697
      return None
8698

    
8699
    result.Raise("Can't compute disk status for %s" % instance_name)
8700

    
8701
    status = result.payload
8702
    if status is None:
8703
      return None
8704

    
8705
    return (status.dev_path, status.major, status.minor,
8706
            status.sync_percent, status.estimated_time,
8707
            status.is_degraded, status.ldisk_status)
8708

    
8709
  def _ComputeDiskStatus(self, instance, snode, dev):
8710
    """Compute block device status.
8711

8712
    """
8713
    if dev.dev_type in constants.LDS_DRBD:
8714
      # we change the snode then (otherwise we use the one passed in)
8715
      if dev.logical_id[0] == instance.primary_node:
8716
        snode = dev.logical_id[1]
8717
      else:
8718
        snode = dev.logical_id[0]
8719

    
8720
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8721
                                              instance.name, dev)
8722
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8723

    
8724
    if dev.children:
8725
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8726
                      for child in dev.children]
8727
    else:
8728
      dev_children = []
8729

    
8730
    data = {
8731
      "iv_name": dev.iv_name,
8732
      "dev_type": dev.dev_type,
8733
      "logical_id": dev.logical_id,
8734
      "physical_id": dev.physical_id,
8735
      "pstatus": dev_pstatus,
8736
      "sstatus": dev_sstatus,
8737
      "children": dev_children,
8738
      "mode": dev.mode,
8739
      "size": dev.size,
8740
      }
8741

    
8742
    return data
8743

    
8744
  def Exec(self, feedback_fn):
8745
    """Gather and return data"""
8746
    result = {}
8747

    
8748
    cluster = self.cfg.GetClusterInfo()
8749

    
8750
    for instance in self.wanted_instances:
8751
      if not self.op.static:
8752
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8753
                                                  instance.name,
8754
                                                  instance.hypervisor)
8755
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8756
        remote_info = remote_info.payload
8757
        if remote_info and "state" in remote_info:
8758
          remote_state = "up"
8759
        else:
8760
          remote_state = "down"
8761
      else:
8762
        remote_state = None
8763
      if instance.admin_up:
8764
        config_state = "up"
8765
      else:
8766
        config_state = "down"
8767

    
8768
      disks = [self._ComputeDiskStatus(instance, None, device)
8769
               for device in instance.disks]
8770

    
8771
      idict = {
8772
        "name": instance.name,
8773
        "config_state": config_state,
8774
        "run_state": remote_state,
8775
        "pnode": instance.primary_node,
8776
        "snodes": instance.secondary_nodes,
8777
        "os": instance.os,
8778
        # this happens to be the same format used for hooks
8779
        "nics": _NICListToTuple(self, instance.nics),
8780
        "disk_template": instance.disk_template,
8781
        "disks": disks,
8782
        "hypervisor": instance.hypervisor,
8783
        "network_port": instance.network_port,
8784
        "hv_instance": instance.hvparams,
8785
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8786
        "be_instance": instance.beparams,
8787
        "be_actual": cluster.FillBE(instance),
8788
        "os_instance": instance.osparams,
8789
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8790
        "serial_no": instance.serial_no,
8791
        "mtime": instance.mtime,
8792
        "ctime": instance.ctime,
8793
        "uuid": instance.uuid,
8794
        }
8795

    
8796
      result[instance.name] = idict
8797

    
8798
    return result
8799

    
8800

    
8801
class LUSetInstanceParams(LogicalUnit):
8802
  """Modifies an instances's parameters.
8803

8804
  """
8805
  HPATH = "instance-modify"
8806
  HTYPE = constants.HTYPE_INSTANCE
8807
  REQ_BGL = False
8808

    
8809
  def CheckArguments(self):
8810
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8811
            self.op.hvparams or self.op.beparams or self.op.os_name):
8812
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8813

    
8814
    if self.op.hvparams:
8815
      _CheckGlobalHvParams(self.op.hvparams)
8816

    
8817
    # Disk validation
8818
    disk_addremove = 0
8819
    for disk_op, disk_dict in self.op.disks:
8820
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8821
      if disk_op == constants.DDM_REMOVE:
8822
        disk_addremove += 1
8823
        continue
8824
      elif disk_op == constants.DDM_ADD:
8825
        disk_addremove += 1
8826
      else:
8827
        if not isinstance(disk_op, int):
8828
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8829
        if not isinstance(disk_dict, dict):
8830
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8831
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8832

    
8833
      if disk_op == constants.DDM_ADD:
8834
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8835
        if mode not in constants.DISK_ACCESS_SET:
8836
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8837
                                     errors.ECODE_INVAL)
8838
        size = disk_dict.get('size', None)
8839
        if size is None:
8840
          raise errors.OpPrereqError("Required disk parameter size missing",
8841
                                     errors.ECODE_INVAL)
8842
        try:
8843
          size = int(size)
8844
        except (TypeError, ValueError), err:
8845
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8846
                                     str(err), errors.ECODE_INVAL)
8847
        disk_dict['size'] = size
8848
      else:
8849
        # modification of disk
8850
        if 'size' in disk_dict:
8851
          raise errors.OpPrereqError("Disk size change not possible, use"
8852
                                     " grow-disk", errors.ECODE_INVAL)
8853

    
8854
    if disk_addremove > 1:
8855
      raise errors.OpPrereqError("Only one disk add or remove operation"
8856
                                 " supported at a time", errors.ECODE_INVAL)
8857

    
8858
    if self.op.disks and self.op.disk_template is not None:
8859
      raise errors.OpPrereqError("Disk template conversion and other disk"
8860
                                 " changes not supported at the same time",
8861
                                 errors.ECODE_INVAL)
8862

    
8863
    if (self.op.disk_template and
8864
        self.op.disk_template in constants.DTS_NET_MIRROR and
8865
        self.op.remote_node is None):
8866
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
8867
                                 " one requires specifying a secondary node",
8868
                                 errors.ECODE_INVAL)
8869

    
8870
    # NIC validation
8871
    nic_addremove = 0
8872
    for nic_op, nic_dict in self.op.nics:
8873
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8874
      if nic_op == constants.DDM_REMOVE:
8875
        nic_addremove += 1
8876
        continue
8877
      elif nic_op == constants.DDM_ADD:
8878
        nic_addremove += 1
8879
      else:
8880
        if not isinstance(nic_op, int):
8881
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8882
        if not isinstance(nic_dict, dict):
8883
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8884
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8885

    
8886
      # nic_dict should be a dict
8887
      nic_ip = nic_dict.get('ip', None)
8888
      if nic_ip is not None:
8889
        if nic_ip.lower() == constants.VALUE_NONE:
8890
          nic_dict['ip'] = None
8891
        else:
8892
          if not netutils.IPAddress.IsValid(nic_ip):
8893
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8894
                                       errors.ECODE_INVAL)
8895

    
8896
      nic_bridge = nic_dict.get('bridge', None)
8897
      nic_link = nic_dict.get('link', None)
8898
      if nic_bridge and nic_link:
8899
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8900
                                   " at the same time", errors.ECODE_INVAL)
8901
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8902
        nic_dict['bridge'] = None
8903
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8904
        nic_dict['link'] = None
8905

    
8906
      if nic_op == constants.DDM_ADD:
8907
        nic_mac = nic_dict.get('mac', None)
8908
        if nic_mac is None:
8909
          nic_dict['mac'] = constants.VALUE_AUTO
8910

    
8911
      if 'mac' in nic_dict:
8912
        nic_mac = nic_dict['mac']
8913
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8914
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8915

    
8916
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8917
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8918
                                     " modifying an existing nic",
8919
                                     errors.ECODE_INVAL)
8920

    
8921
    if nic_addremove > 1:
8922
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8923
                                 " supported at a time", errors.ECODE_INVAL)
8924

    
8925
  def ExpandNames(self):
8926
    self._ExpandAndLockInstance()
8927
    self.needed_locks[locking.LEVEL_NODE] = []
8928
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8929

    
8930
  def DeclareLocks(self, level):
8931
    if level == locking.LEVEL_NODE:
8932
      self._LockInstancesNodes()
8933
      if self.op.disk_template and self.op.remote_node:
8934
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8935
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8936

    
8937
  def BuildHooksEnv(self):
8938
    """Build hooks env.
8939

8940
    This runs on the master, primary and secondaries.
8941

8942
    """
8943
    args = dict()
8944
    if constants.BE_MEMORY in self.be_new:
8945
      args['memory'] = self.be_new[constants.BE_MEMORY]
8946
    if constants.BE_VCPUS in self.be_new:
8947
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8948
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8949
    # information at all.
8950
    if self.op.nics:
8951
      args['nics'] = []
8952
      nic_override = dict(self.op.nics)
8953
      for idx, nic in enumerate(self.instance.nics):
8954
        if idx in nic_override:
8955
          this_nic_override = nic_override[idx]
8956
        else:
8957
          this_nic_override = {}
8958
        if 'ip' in this_nic_override:
8959
          ip = this_nic_override['ip']
8960
        else:
8961
          ip = nic.ip
8962
        if 'mac' in this_nic_override:
8963
          mac = this_nic_override['mac']
8964
        else:
8965
          mac = nic.mac
8966
        if idx in self.nic_pnew:
8967
          nicparams = self.nic_pnew[idx]
8968
        else:
8969
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8970
        mode = nicparams[constants.NIC_MODE]
8971
        link = nicparams[constants.NIC_LINK]
8972
        args['nics'].append((ip, mac, mode, link))
8973
      if constants.DDM_ADD in nic_override:
8974
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8975
        mac = nic_override[constants.DDM_ADD]['mac']
8976
        nicparams = self.nic_pnew[constants.DDM_ADD]
8977
        mode = nicparams[constants.NIC_MODE]
8978
        link = nicparams[constants.NIC_LINK]
8979
        args['nics'].append((ip, mac, mode, link))
8980
      elif constants.DDM_REMOVE in nic_override:
8981
        del args['nics'][-1]
8982

    
8983
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8984
    if self.op.disk_template:
8985
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8986
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8987
    return env, nl, nl
8988

    
8989
  def CheckPrereq(self):
8990
    """Check prerequisites.
8991

8992
    This only checks the instance list against the existing names.
8993

8994
    """
8995
    # checking the new params on the primary/secondary nodes
8996

    
8997
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8998
    cluster = self.cluster = self.cfg.GetClusterInfo()
8999
    assert self.instance is not None, \
9000
      "Cannot retrieve locked instance %s" % self.op.instance_name
9001
    pnode = instance.primary_node
9002
    nodelist = list(instance.all_nodes)
9003

    
9004
    # OS change
9005
    if self.op.os_name and not self.op.force:
9006
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9007
                      self.op.force_variant)
9008
      instance_os = self.op.os_name
9009
    else:
9010
      instance_os = instance.os
9011

    
9012
    if self.op.disk_template:
9013
      if instance.disk_template == self.op.disk_template:
9014
        raise errors.OpPrereqError("Instance already has disk template %s" %
9015
                                   instance.disk_template, errors.ECODE_INVAL)
9016

    
9017
      if (instance.disk_template,
9018
          self.op.disk_template) not in self._DISK_CONVERSIONS:
9019
        raise errors.OpPrereqError("Unsupported disk template conversion from"
9020
                                   " %s to %s" % (instance.disk_template,
9021
                                                  self.op.disk_template),
9022
                                   errors.ECODE_INVAL)
9023
      _CheckInstanceDown(self, instance, "cannot change disk template")
9024
      if self.op.disk_template in constants.DTS_NET_MIRROR:
9025
        if self.op.remote_node == pnode:
9026
          raise errors.OpPrereqError("Given new secondary node %s is the same"
9027
                                     " as the primary node of the instance" %
9028
                                     self.op.remote_node, errors.ECODE_STATE)
9029
        _CheckNodeOnline(self, self.op.remote_node)
9030
        _CheckNodeNotDrained(self, self.op.remote_node)
9031
        # FIXME: here we assume that the old instance type is DT_PLAIN
9032
        assert instance.disk_template == constants.DT_PLAIN
9033
        disks = [{"size": d.size, "vg": d.logical_id[0]}
9034
                 for d in instance.disks]
9035
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9036
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9037

    
9038
    # hvparams processing
9039
    if self.op.hvparams:
9040
      hv_type = instance.hypervisor
9041
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9042
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9043
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9044

    
9045
      # local check
9046
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9047
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9048
      self.hv_new = hv_new # the new actual values
9049
      self.hv_inst = i_hvdict # the new dict (without defaults)
9050
    else:
9051
      self.hv_new = self.hv_inst = {}
9052

    
9053
    # beparams processing
9054
    if self.op.beparams:
9055
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9056
                                   use_none=True)
9057
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9058
      be_new = cluster.SimpleFillBE(i_bedict)
9059
      self.be_new = be_new # the new actual values
9060
      self.be_inst = i_bedict # the new dict (without defaults)
9061
    else:
9062
      self.be_new = self.be_inst = {}
9063

    
9064
    # osparams processing
9065
    if self.op.osparams:
9066
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9067
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9068
      self.os_inst = i_osdict # the new dict (without defaults)
9069
    else:
9070
      self.os_inst = {}
9071

    
9072
    self.warn = []
9073

    
9074
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9075
      mem_check_list = [pnode]
9076
      if be_new[constants.BE_AUTO_BALANCE]:
9077
        # either we changed auto_balance to yes or it was from before
9078
        mem_check_list.extend(instance.secondary_nodes)
9079
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9080
                                                  instance.hypervisor)
9081
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9082
                                         instance.hypervisor)
9083
      pninfo = nodeinfo[pnode]
9084
      msg = pninfo.fail_msg
9085
      if msg:
9086
        # Assume the primary node is unreachable and go ahead
9087
        self.warn.append("Can't get info from primary node %s: %s" %
9088
                         (pnode,  msg))
9089
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
9090
        self.warn.append("Node data from primary node %s doesn't contain"
9091
                         " free memory information" % pnode)
9092
      elif instance_info.fail_msg:
9093
        self.warn.append("Can't get instance runtime information: %s" %
9094
                        instance_info.fail_msg)
9095
      else:
9096
        if instance_info.payload:
9097
          current_mem = int(instance_info.payload['memory'])
9098
        else:
9099
          # Assume instance not running
9100
          # (there is a slight race condition here, but it's not very probable,
9101
          # and we have no other way to check)
9102
          current_mem = 0
9103
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9104
                    pninfo.payload['memory_free'])
9105
        if miss_mem > 0:
9106
          raise errors.OpPrereqError("This change will prevent the instance"
9107
                                     " from starting, due to %d MB of memory"
9108
                                     " missing on its primary node" % miss_mem,
9109
                                     errors.ECODE_NORES)
9110

    
9111
      if be_new[constants.BE_AUTO_BALANCE]:
9112
        for node, nres in nodeinfo.items():
9113
          if node not in instance.secondary_nodes:
9114
            continue
9115
          msg = nres.fail_msg
9116
          if msg:
9117
            self.warn.append("Can't get info from secondary node %s: %s" %
9118
                             (node, msg))
9119
          elif not isinstance(nres.payload.get('memory_free', None), int):
9120
            self.warn.append("Secondary node %s didn't return free"
9121
                             " memory information" % node)
9122
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9123
            self.warn.append("Not enough memory to failover instance to"
9124
                             " secondary node %s" % node)
9125

    
9126
    # NIC processing
9127
    self.nic_pnew = {}
9128
    self.nic_pinst = {}
9129
    for nic_op, nic_dict in self.op.nics:
9130
      if nic_op == constants.DDM_REMOVE:
9131
        if not instance.nics:
9132
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9133
                                     errors.ECODE_INVAL)
9134
        continue
9135
      if nic_op != constants.DDM_ADD:
9136
        # an existing nic
9137
        if not instance.nics:
9138
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9139
                                     " no NICs" % nic_op,
9140
                                     errors.ECODE_INVAL)
9141
        if nic_op < 0 or nic_op >= len(instance.nics):
9142
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9143
                                     " are 0 to %d" %
9144
                                     (nic_op, len(instance.nics) - 1),
9145
                                     errors.ECODE_INVAL)
9146
        old_nic_params = instance.nics[nic_op].nicparams
9147
        old_nic_ip = instance.nics[nic_op].ip
9148
      else:
9149
        old_nic_params = {}
9150
        old_nic_ip = None
9151

    
9152
      update_params_dict = dict([(key, nic_dict[key])
9153
                                 for key in constants.NICS_PARAMETERS
9154
                                 if key in nic_dict])
9155

    
9156
      if 'bridge' in nic_dict:
9157
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9158

    
9159
      new_nic_params = _GetUpdatedParams(old_nic_params,
9160
                                         update_params_dict)
9161
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9162
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9163
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9164
      self.nic_pinst[nic_op] = new_nic_params
9165
      self.nic_pnew[nic_op] = new_filled_nic_params
9166
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9167

    
9168
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
9169
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9170
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9171
        if msg:
9172
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9173
          if self.op.force:
9174
            self.warn.append(msg)
9175
          else:
9176
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9177
      if new_nic_mode == constants.NIC_MODE_ROUTED:
9178
        if 'ip' in nic_dict:
9179
          nic_ip = nic_dict['ip']
9180
        else:
9181
          nic_ip = old_nic_ip
9182
        if nic_ip is None:
9183
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9184
                                     ' on a routed nic', errors.ECODE_INVAL)
9185
      if 'mac' in nic_dict:
9186
        nic_mac = nic_dict['mac']
9187
        if nic_mac is None:
9188
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9189
                                     errors.ECODE_INVAL)
9190
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9191
          # otherwise generate the mac
9192
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9193
        else:
9194
          # or validate/reserve the current one
9195
          try:
9196
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9197
          except errors.ReservationError:
9198
            raise errors.OpPrereqError("MAC address %s already in use"
9199
                                       " in cluster" % nic_mac,
9200
                                       errors.ECODE_NOTUNIQUE)
9201

    
9202
    # DISK processing
9203
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9204
      raise errors.OpPrereqError("Disk operations not supported for"
9205
                                 " diskless instances",
9206
                                 errors.ECODE_INVAL)
9207
    for disk_op, _ in self.op.disks:
9208
      if disk_op == constants.DDM_REMOVE:
9209
        if len(instance.disks) == 1:
9210
          raise errors.OpPrereqError("Cannot remove the last disk of"
9211
                                     " an instance", errors.ECODE_INVAL)
9212
        _CheckInstanceDown(self, instance, "cannot remove disks")
9213

    
9214
      if (disk_op == constants.DDM_ADD and
9215
          len(instance.nics) >= constants.MAX_DISKS):
9216
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9217
                                   " add more" % constants.MAX_DISKS,
9218
                                   errors.ECODE_STATE)
9219
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9220
        # an existing disk
9221
        if disk_op < 0 or disk_op >= len(instance.disks):
9222
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
9223
                                     " are 0 to %d" %
9224
                                     (disk_op, len(instance.disks)),
9225
                                     errors.ECODE_INVAL)
9226

    
9227
    return
9228

    
9229
  def _ConvertPlainToDrbd(self, feedback_fn):
9230
    """Converts an instance from plain to drbd.
9231

9232
    """
9233
    feedback_fn("Converting template to drbd")
9234
    instance = self.instance
9235
    pnode = instance.primary_node
9236
    snode = self.op.remote_node
9237

    
9238
    # create a fake disk info for _GenerateDiskTemplate
9239
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9240
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9241
                                      instance.name, pnode, [snode],
9242
                                      disk_info, None, None, 0, feedback_fn)
9243
    info = _GetInstanceInfoText(instance)
9244
    feedback_fn("Creating aditional volumes...")
9245
    # first, create the missing data and meta devices
9246
    for disk in new_disks:
9247
      # unfortunately this is... not too nice
9248
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9249
                            info, True)
9250
      for child in disk.children:
9251
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
9252
    # at this stage, all new LVs have been created, we can rename the
9253
    # old ones
9254
    feedback_fn("Renaming original volumes...")
9255
    rename_list = [(o, n.children[0].logical_id)
9256
                   for (o, n) in zip(instance.disks, new_disks)]
9257
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
9258
    result.Raise("Failed to rename original LVs")
9259

    
9260
    feedback_fn("Initializing DRBD devices...")
9261
    # all child devices are in place, we can now create the DRBD devices
9262
    for disk in new_disks:
9263
      for node in [pnode, snode]:
9264
        f_create = node == pnode
9265
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9266

    
9267
    # at this point, the instance has been modified
9268
    instance.disk_template = constants.DT_DRBD8
9269
    instance.disks = new_disks
9270
    self.cfg.Update(instance, feedback_fn)
9271

    
9272
    # disks are created, waiting for sync
9273
    disk_abort = not _WaitForSync(self, instance)
9274
    if disk_abort:
9275
      raise errors.OpExecError("There are some degraded disks for"
9276
                               " this instance, please cleanup manually")
9277

    
9278
  def _ConvertDrbdToPlain(self, feedback_fn):
9279
    """Converts an instance from drbd to plain.
9280

9281
    """
9282
    instance = self.instance
9283
    assert len(instance.secondary_nodes) == 1
9284
    pnode = instance.primary_node
9285
    snode = instance.secondary_nodes[0]
9286
    feedback_fn("Converting template to plain")
9287

    
9288
    old_disks = instance.disks
9289
    new_disks = [d.children[0] for d in old_disks]
9290

    
9291
    # copy over size and mode
9292
    for parent, child in zip(old_disks, new_disks):
9293
      child.size = parent.size
9294
      child.mode = parent.mode
9295

    
9296
    # update instance structure
9297
    instance.disks = new_disks
9298
    instance.disk_template = constants.DT_PLAIN
9299
    self.cfg.Update(instance, feedback_fn)
9300

    
9301
    feedback_fn("Removing volumes on the secondary node...")
9302
    for disk in old_disks:
9303
      self.cfg.SetDiskID(disk, snode)
9304
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9305
      if msg:
9306
        self.LogWarning("Could not remove block device %s on node %s,"
9307
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9308

    
9309
    feedback_fn("Removing unneeded volumes on the primary node...")
9310
    for idx, disk in enumerate(old_disks):
9311
      meta = disk.children[1]
9312
      self.cfg.SetDiskID(meta, pnode)
9313
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9314
      if msg:
9315
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9316
                        " continuing anyway: %s", idx, pnode, msg)
9317

    
9318
  def Exec(self, feedback_fn):
9319
    """Modifies an instance.
9320

9321
    All parameters take effect only at the next restart of the instance.
9322

9323
    """
9324
    # Process here the warnings from CheckPrereq, as we don't have a
9325
    # feedback_fn there.
9326
    for warn in self.warn:
9327
      feedback_fn("WARNING: %s" % warn)
9328

    
9329
    result = []
9330
    instance = self.instance
9331
    # disk changes
9332
    for disk_op, disk_dict in self.op.disks:
9333
      if disk_op == constants.DDM_REMOVE:
9334
        # remove the last disk
9335
        device = instance.disks.pop()
9336
        device_idx = len(instance.disks)
9337
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9338
          self.cfg.SetDiskID(disk, node)
9339
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9340
          if msg:
9341
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9342
                            " continuing anyway", device_idx, node, msg)
9343
        result.append(("disk/%d" % device_idx, "remove"))
9344
      elif disk_op == constants.DDM_ADD:
9345
        # add a new disk
9346
        if instance.disk_template == constants.DT_FILE:
9347
          file_driver, file_path = instance.disks[0].logical_id
9348
          file_path = os.path.dirname(file_path)
9349
        else:
9350
          file_driver = file_path = None
9351
        disk_idx_base = len(instance.disks)
9352
        new_disk = _GenerateDiskTemplate(self,
9353
                                         instance.disk_template,
9354
                                         instance.name, instance.primary_node,
9355
                                         instance.secondary_nodes,
9356
                                         [disk_dict],
9357
                                         file_path,
9358
                                         file_driver,
9359
                                         disk_idx_base, feedback_fn)[0]
9360
        instance.disks.append(new_disk)
9361
        info = _GetInstanceInfoText(instance)
9362

    
9363
        logging.info("Creating volume %s for instance %s",
9364
                     new_disk.iv_name, instance.name)
9365
        # Note: this needs to be kept in sync with _CreateDisks
9366
        #HARDCODE
9367
        for node in instance.all_nodes:
9368
          f_create = node == instance.primary_node
9369
          try:
9370
            _CreateBlockDev(self, node, instance, new_disk,
9371
                            f_create, info, f_create)
9372
          except errors.OpExecError, err:
9373
            self.LogWarning("Failed to create volume %s (%s) on"
9374
                            " node %s: %s",
9375
                            new_disk.iv_name, new_disk, node, err)
9376
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9377
                       (new_disk.size, new_disk.mode)))
9378
      else:
9379
        # change a given disk
9380
        instance.disks[disk_op].mode = disk_dict['mode']
9381
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9382

    
9383
    if self.op.disk_template:
9384
      r_shut = _ShutdownInstanceDisks(self, instance)
9385
      if not r_shut:
9386
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9387
                                 " proceed with disk template conversion")
9388
      mode = (instance.disk_template, self.op.disk_template)
9389
      try:
9390
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9391
      except:
9392
        self.cfg.ReleaseDRBDMinors(instance.name)
9393
        raise
9394
      result.append(("disk_template", self.op.disk_template))
9395

    
9396
    # NIC changes
9397
    for nic_op, nic_dict in self.op.nics:
9398
      if nic_op == constants.DDM_REMOVE:
9399
        # remove the last nic
9400
        del instance.nics[-1]
9401
        result.append(("nic.%d" % len(instance.nics), "remove"))
9402
      elif nic_op == constants.DDM_ADD:
9403
        # mac and bridge should be set, by now
9404
        mac = nic_dict['mac']
9405
        ip = nic_dict.get('ip', None)
9406
        nicparams = self.nic_pinst[constants.DDM_ADD]
9407
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9408
        instance.nics.append(new_nic)
9409
        result.append(("nic.%d" % (len(instance.nics) - 1),
9410
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9411
                       (new_nic.mac, new_nic.ip,
9412
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9413
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9414
                       )))
9415
      else:
9416
        for key in 'mac', 'ip':
9417
          if key in nic_dict:
9418
            setattr(instance.nics[nic_op], key, nic_dict[key])
9419
        if nic_op in self.nic_pinst:
9420
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9421
        for key, val in nic_dict.iteritems():
9422
          result.append(("nic.%s/%d" % (key, nic_op), val))
9423

    
9424
    # hvparams changes
9425
    if self.op.hvparams:
9426
      instance.hvparams = self.hv_inst
9427
      for key, val in self.op.hvparams.iteritems():
9428
        result.append(("hv/%s" % key, val))
9429

    
9430
    # beparams changes
9431
    if self.op.beparams:
9432
      instance.beparams = self.be_inst
9433
      for key, val in self.op.beparams.iteritems():
9434
        result.append(("be/%s" % key, val))
9435

    
9436
    # OS change
9437
    if self.op.os_name:
9438
      instance.os = self.op.os_name
9439

    
9440
    # osparams changes
9441
    if self.op.osparams:
9442
      instance.osparams = self.os_inst
9443
      for key, val in self.op.osparams.iteritems():
9444
        result.append(("os/%s" % key, val))
9445

    
9446
    self.cfg.Update(instance, feedback_fn)
9447

    
9448
    return result
9449

    
9450
  _DISK_CONVERSIONS = {
9451
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9452
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9453
    }
9454

    
9455

    
9456
class LUQueryExports(NoHooksLU):
9457
  """Query the exports list
9458

9459
  """
9460
  REQ_BGL = False
9461

    
9462
  def ExpandNames(self):
9463
    self.needed_locks = {}
9464
    self.share_locks[locking.LEVEL_NODE] = 1
9465
    if not self.op.nodes:
9466
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9467
    else:
9468
      self.needed_locks[locking.LEVEL_NODE] = \
9469
        _GetWantedNodes(self, self.op.nodes)
9470

    
9471
  def Exec(self, feedback_fn):
9472
    """Compute the list of all the exported system images.
9473

9474
    @rtype: dict
9475
    @return: a dictionary with the structure node->(export-list)
9476
        where export-list is a list of the instances exported on
9477
        that node.
9478

9479
    """
9480
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9481
    rpcresult = self.rpc.call_export_list(self.nodes)
9482
    result = {}
9483
    for node in rpcresult:
9484
      if rpcresult[node].fail_msg:
9485
        result[node] = False
9486
      else:
9487
        result[node] = rpcresult[node].payload
9488

    
9489
    return result
9490

    
9491

    
9492
class LUPrepareExport(NoHooksLU):
9493
  """Prepares an instance for an export and returns useful information.
9494

9495
  """
9496
  REQ_BGL = False
9497

    
9498
  def ExpandNames(self):
9499
    self._ExpandAndLockInstance()
9500

    
9501
  def CheckPrereq(self):
9502
    """Check prerequisites.
9503

9504
    """
9505
    instance_name = self.op.instance_name
9506

    
9507
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9508
    assert self.instance is not None, \
9509
          "Cannot retrieve locked instance %s" % self.op.instance_name
9510
    _CheckNodeOnline(self, self.instance.primary_node)
9511

    
9512
    self._cds = _GetClusterDomainSecret()
9513

    
9514
  def Exec(self, feedback_fn):
9515
    """Prepares an instance for an export.
9516

9517
    """
9518
    instance = self.instance
9519

    
9520
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9521
      salt = utils.GenerateSecret(8)
9522

    
9523
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9524
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9525
                                              constants.RIE_CERT_VALIDITY)
9526
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9527

    
9528
      (name, cert_pem) = result.payload
9529

    
9530
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9531
                                             cert_pem)
9532

    
9533
      return {
9534
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9535
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9536
                          salt),
9537
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9538
        }
9539

    
9540
    return None
9541

    
9542

    
9543
class LUExportInstance(LogicalUnit):
9544
  """Export an instance to an image in the cluster.
9545

9546
  """
9547
  HPATH = "instance-export"
9548
  HTYPE = constants.HTYPE_INSTANCE
9549
  REQ_BGL = False
9550

    
9551
  def CheckArguments(self):
9552
    """Check the arguments.
9553

9554
    """
9555
    self.x509_key_name = self.op.x509_key_name
9556
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9557

    
9558
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9559
      if not self.x509_key_name:
9560
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9561
                                   errors.ECODE_INVAL)
9562

    
9563
      if not self.dest_x509_ca_pem:
9564
        raise errors.OpPrereqError("Missing destination X509 CA",
9565
                                   errors.ECODE_INVAL)
9566

    
9567
  def ExpandNames(self):
9568
    self._ExpandAndLockInstance()
9569

    
9570
    # Lock all nodes for local exports
9571
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9572
      # FIXME: lock only instance primary and destination node
9573
      #
9574
      # Sad but true, for now we have do lock all nodes, as we don't know where
9575
      # the previous export might be, and in this LU we search for it and
9576
      # remove it from its current node. In the future we could fix this by:
9577
      #  - making a tasklet to search (share-lock all), then create the
9578
      #    new one, then one to remove, after
9579
      #  - removing the removal operation altogether
9580
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9581

    
9582
  def DeclareLocks(self, level):
9583
    """Last minute lock declaration."""
9584
    # All nodes are locked anyway, so nothing to do here.
9585

    
9586
  def BuildHooksEnv(self):
9587
    """Build hooks env.
9588

9589
    This will run on the master, primary node and target node.
9590

9591
    """
9592
    env = {
9593
      "EXPORT_MODE": self.op.mode,
9594
      "EXPORT_NODE": self.op.target_node,
9595
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9596
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9597
      # TODO: Generic function for boolean env variables
9598
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9599
      }
9600

    
9601
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9602

    
9603
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9604

    
9605
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9606
      nl.append(self.op.target_node)
9607

    
9608
    return env, nl, nl
9609

    
9610
  def CheckPrereq(self):
9611
    """Check prerequisites.
9612

9613
    This checks that the instance and node names are valid.
9614

9615
    """
9616
    instance_name = self.op.instance_name
9617

    
9618
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9619
    assert self.instance is not None, \
9620
          "Cannot retrieve locked instance %s" % self.op.instance_name
9621
    _CheckNodeOnline(self, self.instance.primary_node)
9622

    
9623
    if (self.op.remove_instance and self.instance.admin_up and
9624
        not self.op.shutdown):
9625
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9626
                                 " down before")
9627

    
9628
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9629
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9630
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9631
      assert self.dst_node is not None
9632

    
9633
      _CheckNodeOnline(self, self.dst_node.name)
9634
      _CheckNodeNotDrained(self, self.dst_node.name)
9635

    
9636
      self._cds = None
9637
      self.dest_disk_info = None
9638
      self.dest_x509_ca = None
9639

    
9640
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9641
      self.dst_node = None
9642

    
9643
      if len(self.op.target_node) != len(self.instance.disks):
9644
        raise errors.OpPrereqError(("Received destination information for %s"
9645
                                    " disks, but instance %s has %s disks") %
9646
                                   (len(self.op.target_node), instance_name,
9647
                                    len(self.instance.disks)),
9648
                                   errors.ECODE_INVAL)
9649

    
9650
      cds = _GetClusterDomainSecret()
9651

    
9652
      # Check X509 key name
9653
      try:
9654
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9655
      except (TypeError, ValueError), err:
9656
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9657

    
9658
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9659
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9660
                                   errors.ECODE_INVAL)
9661

    
9662
      # Load and verify CA
9663
      try:
9664
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9665
      except OpenSSL.crypto.Error, err:
9666
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9667
                                   (err, ), errors.ECODE_INVAL)
9668

    
9669
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9670
      if errcode is not None:
9671
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9672
                                   (msg, ), errors.ECODE_INVAL)
9673

    
9674
      self.dest_x509_ca = cert
9675

    
9676
      # Verify target information
9677
      disk_info = []
9678
      for idx, disk_data in enumerate(self.op.target_node):
9679
        try:
9680
          (host, port, magic) = \
9681
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9682
        except errors.GenericError, err:
9683
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9684
                                     (idx, err), errors.ECODE_INVAL)
9685

    
9686
        disk_info.append((host, port, magic))
9687

    
9688
      assert len(disk_info) == len(self.op.target_node)
9689
      self.dest_disk_info = disk_info
9690

    
9691
    else:
9692
      raise errors.ProgrammerError("Unhandled export mode %r" %
9693
                                   self.op.mode)
9694

    
9695
    # instance disk type verification
9696
    # TODO: Implement export support for file-based disks
9697
    for disk in self.instance.disks:
9698
      if disk.dev_type == constants.LD_FILE:
9699
        raise errors.OpPrereqError("Export not supported for instances with"
9700
                                   " file-based disks", errors.ECODE_INVAL)
9701

    
9702
  def _CleanupExports(self, feedback_fn):
9703
    """Removes exports of current instance from all other nodes.
9704

9705
    If an instance in a cluster with nodes A..D was exported to node C, its
9706
    exports will be removed from the nodes A, B and D.
9707

9708
    """
9709
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
9710

    
9711
    nodelist = self.cfg.GetNodeList()
9712
    nodelist.remove(self.dst_node.name)
9713

    
9714
    # on one-node clusters nodelist will be empty after the removal
9715
    # if we proceed the backup would be removed because OpQueryExports
9716
    # substitutes an empty list with the full cluster node list.
9717
    iname = self.instance.name
9718
    if nodelist:
9719
      feedback_fn("Removing old exports for instance %s" % iname)
9720
      exportlist = self.rpc.call_export_list(nodelist)
9721
      for node in exportlist:
9722
        if exportlist[node].fail_msg:
9723
          continue
9724
        if iname in exportlist[node].payload:
9725
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9726
          if msg:
9727
            self.LogWarning("Could not remove older export for instance %s"
9728
                            " on node %s: %s", iname, node, msg)
9729

    
9730
  def Exec(self, feedback_fn):
9731
    """Export an instance to an image in the cluster.
9732

9733
    """
9734
    assert self.op.mode in constants.EXPORT_MODES
9735

    
9736
    instance = self.instance
9737
    src_node = instance.primary_node
9738

    
9739
    if self.op.shutdown:
9740
      # shutdown the instance, but not the disks
9741
      feedback_fn("Shutting down instance %s" % instance.name)
9742
      result = self.rpc.call_instance_shutdown(src_node, instance,
9743
                                               self.op.shutdown_timeout)
9744
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9745
      result.Raise("Could not shutdown instance %s on"
9746
                   " node %s" % (instance.name, src_node))
9747

    
9748
    # set the disks ID correctly since call_instance_start needs the
9749
    # correct drbd minor to create the symlinks
9750
    for disk in instance.disks:
9751
      self.cfg.SetDiskID(disk, src_node)
9752

    
9753
    activate_disks = (not instance.admin_up)
9754

    
9755
    if activate_disks:
9756
      # Activate the instance disks if we'exporting a stopped instance
9757
      feedback_fn("Activating disks for %s" % instance.name)
9758
      _StartInstanceDisks(self, instance, None)
9759

    
9760
    try:
9761
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9762
                                                     instance)
9763

    
9764
      helper.CreateSnapshots()
9765
      try:
9766
        if (self.op.shutdown and instance.admin_up and
9767
            not self.op.remove_instance):
9768
          assert not activate_disks
9769
          feedback_fn("Starting instance %s" % instance.name)
9770
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9771
          msg = result.fail_msg
9772
          if msg:
9773
            feedback_fn("Failed to start instance: %s" % msg)
9774
            _ShutdownInstanceDisks(self, instance)
9775
            raise errors.OpExecError("Could not start instance: %s" % msg)
9776

    
9777
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
9778
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9779
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9780
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9781
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9782

    
9783
          (key_name, _, _) = self.x509_key_name
9784

    
9785
          dest_ca_pem = \
9786
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9787
                                            self.dest_x509_ca)
9788

    
9789
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9790
                                                     key_name, dest_ca_pem,
9791
                                                     timeouts)
9792
      finally:
9793
        helper.Cleanup()
9794

    
9795
      # Check for backwards compatibility
9796
      assert len(dresults) == len(instance.disks)
9797
      assert compat.all(isinstance(i, bool) for i in dresults), \
9798
             "Not all results are boolean: %r" % dresults
9799

    
9800
    finally:
9801
      if activate_disks:
9802
        feedback_fn("Deactivating disks for %s" % instance.name)
9803
        _ShutdownInstanceDisks(self, instance)
9804

    
9805
    if not (compat.all(dresults) and fin_resu):
9806
      failures = []
9807
      if not fin_resu:
9808
        failures.append("export finalization")
9809
      if not compat.all(dresults):
9810
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9811
                               if not dsk)
9812
        failures.append("disk export: disk(s) %s" % fdsk)
9813

    
9814
      raise errors.OpExecError("Export failed, errors in %s" %
9815
                               utils.CommaJoin(failures))
9816

    
9817
    # At this point, the export was successful, we can cleanup/finish
9818

    
9819
    # Remove instance if requested
9820
    if self.op.remove_instance:
9821
      feedback_fn("Removing instance %s" % instance.name)
9822
      _RemoveInstance(self, feedback_fn, instance,
9823
                      self.op.ignore_remove_failures)
9824

    
9825
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9826
      self._CleanupExports(feedback_fn)
9827

    
9828
    return fin_resu, dresults
9829

    
9830

    
9831
class LURemoveExport(NoHooksLU):
9832
  """Remove exports related to the named instance.
9833

9834
  """
9835
  REQ_BGL = False
9836

    
9837
  def ExpandNames(self):
9838
    self.needed_locks = {}
9839
    # We need all nodes to be locked in order for RemoveExport to work, but we
9840
    # don't need to lock the instance itself, as nothing will happen to it (and
9841
    # we can remove exports also for a removed instance)
9842
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9843

    
9844
  def Exec(self, feedback_fn):
9845
    """Remove any export.
9846

9847
    """
9848
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9849
    # If the instance was not found we'll try with the name that was passed in.
9850
    # This will only work if it was an FQDN, though.
9851
    fqdn_warn = False
9852
    if not instance_name:
9853
      fqdn_warn = True
9854
      instance_name = self.op.instance_name
9855

    
9856
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9857
    exportlist = self.rpc.call_export_list(locked_nodes)
9858
    found = False
9859
    for node in exportlist:
9860
      msg = exportlist[node].fail_msg
9861
      if msg:
9862
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9863
        continue
9864
      if instance_name in exportlist[node].payload:
9865
        found = True
9866
        result = self.rpc.call_export_remove(node, instance_name)
9867
        msg = result.fail_msg
9868
        if msg:
9869
          logging.error("Could not remove export for instance %s"
9870
                        " on node %s: %s", instance_name, node, msg)
9871

    
9872
    if fqdn_warn and not found:
9873
      feedback_fn("Export not found. If trying to remove an export belonging"
9874
                  " to a deleted instance please use its Fully Qualified"
9875
                  " Domain Name.")
9876

    
9877

    
9878
class LUAddGroup(LogicalUnit):
9879
  """Logical unit for creating node groups.
9880

9881
  """
9882
  HPATH = "group-add"
9883
  HTYPE = constants.HTYPE_GROUP
9884
  REQ_BGL = False
9885

    
9886
  def ExpandNames(self):
9887
    # We need the new group's UUID here so that we can create and acquire the
9888
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
9889
    # that it should not check whether the UUID exists in the configuration.
9890
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
9891
    self.needed_locks = {}
9892
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
9893

    
9894
  def CheckPrereq(self):
9895
    """Check prerequisites.
9896

9897
    This checks that the given group name is not an existing node group
9898
    already.
9899

9900
    """
9901
    try:
9902
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
9903
    except errors.OpPrereqError:
9904
      pass
9905
    else:
9906
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
9907
                                 " node group (UUID: %s)" %
9908
                                 (self.op.group_name, existing_uuid),
9909
                                 errors.ECODE_EXISTS)
9910

    
9911
    if self.op.ndparams:
9912
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
9913

    
9914
  def BuildHooksEnv(self):
9915
    """Build hooks env.
9916

9917
    """
9918
    env = {
9919
      "GROUP_NAME": self.op.group_name,
9920
      }
9921
    mn = self.cfg.GetMasterNode()
9922
    return env, [mn], [mn]
9923

    
9924
  def Exec(self, feedback_fn):
9925
    """Add the node group to the cluster.
9926

9927
    """
9928
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
9929
                                  uuid=self.group_uuid,
9930
                                  alloc_policy=self.op.alloc_policy,
9931
                                  ndparams=self.op.ndparams)
9932

    
9933
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
9934
    del self.remove_locks[locking.LEVEL_NODEGROUP]
9935

    
9936

    
9937
class _GroupQuery(_QueryBase):
9938

    
9939
  FIELDS = query.GROUP_FIELDS
9940

    
9941
  def ExpandNames(self, lu):
9942
    lu.needed_locks = {}
9943

    
9944
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
9945
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
9946

    
9947
    if not self.names:
9948
      self.wanted = [name_to_uuid[name]
9949
                     for name in utils.NiceSort(name_to_uuid.keys())]
9950
    else:
9951
      # Accept names to be either names or UUIDs.
9952
      missing = []
9953
      self.wanted = []
9954
      all_uuid = frozenset(self._all_groups.keys())
9955

    
9956
      for name in self.names:
9957
        if name in all_uuid:
9958
          self.wanted.append(name)
9959
        elif name in name_to_uuid:
9960
          self.wanted.append(name_to_uuid[name])
9961
        else:
9962
          missing.append(name)
9963

    
9964
      if missing:
9965
        raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
9966
                                   errors.ECODE_NOENT)
9967

    
9968
  def DeclareLocks(self, lu, level):
9969
    pass
9970

    
9971
  def _GetQueryData(self, lu):
9972
    """Computes the list of node groups and their attributes.
9973

9974
    """
9975
    do_nodes = query.GQ_NODE in self.requested_data
9976
    do_instances = query.GQ_INST in self.requested_data
9977

    
9978
    group_to_nodes = None
9979
    group_to_instances = None
9980

    
9981
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
9982
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
9983
    # latter GetAllInstancesInfo() is not enough, for we have to go through
9984
    # instance->node. Hence, we will need to process nodes even if we only need
9985
    # instance information.
9986
    if do_nodes or do_instances:
9987
      all_nodes = lu.cfg.GetAllNodesInfo()
9988
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
9989
      node_to_group = {}
9990

    
9991
      for node in all_nodes.values():
9992
        if node.group in group_to_nodes:
9993
          group_to_nodes[node.group].append(node.name)
9994
          node_to_group[node.name] = node.group
9995

    
9996
      if do_instances:
9997
        all_instances = lu.cfg.GetAllInstancesInfo()
9998
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
9999

    
10000
        for instance in all_instances.values():
10001
          node = instance.primary_node
10002
          if node in node_to_group:
10003
            group_to_instances[node_to_group[node]].append(instance.name)
10004

    
10005
        if not do_nodes:
10006
          # Do not pass on node information if it was not requested.
10007
          group_to_nodes = None
10008

    
10009
    return query.GroupQueryData([self._all_groups[uuid]
10010
                                 for uuid in self.wanted],
10011
                                group_to_nodes, group_to_instances)
10012

    
10013

    
10014
class LUQueryGroups(NoHooksLU):
10015
  """Logical unit for querying node groups.
10016

10017
  """
10018
  REQ_BGL = False
10019

    
10020
  def CheckArguments(self):
10021
    self.gq = _GroupQuery(self.op.names, self.op.output_fields, False)
10022

    
10023
  def ExpandNames(self):
10024
    self.gq.ExpandNames(self)
10025

    
10026
  def Exec(self, feedback_fn):
10027
    return self.gq.OldStyleQuery(self)
10028

    
10029

    
10030
class LUSetGroupParams(LogicalUnit):
10031
  """Modifies the parameters of a node group.
10032

10033
  """
10034
  HPATH = "group-modify"
10035
  HTYPE = constants.HTYPE_GROUP
10036
  REQ_BGL = False
10037

    
10038
  def CheckArguments(self):
10039
    all_changes = [
10040
      self.op.ndparams,
10041
      self.op.alloc_policy,
10042
      ]
10043

    
10044
    if all_changes.count(None) == len(all_changes):
10045
      raise errors.OpPrereqError("Please pass at least one modification",
10046
                                 errors.ECODE_INVAL)
10047

    
10048
  def ExpandNames(self):
10049
    # This raises errors.OpPrereqError on its own:
10050
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10051

    
10052
    self.needed_locks = {
10053
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10054
      }
10055

    
10056
  def CheckPrereq(self):
10057
    """Check prerequisites.
10058

10059
    """
10060
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10061

    
10062
    if self.group is None:
10063
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10064
                               (self.op.group_name, self.group_uuid))
10065

    
10066
    if self.op.ndparams:
10067
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10068
      self.new_ndparams = self.group.SimpleFillND(self.op.ndparams)
10069

    
10070
  def BuildHooksEnv(self):
10071
    """Build hooks env.
10072

10073
    """
10074
    env = {
10075
      "GROUP_NAME": self.op.group_name,
10076
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
10077
      }
10078
    mn = self.cfg.GetMasterNode()
10079
    return env, [mn], [mn]
10080

    
10081
  def Exec(self, feedback_fn):
10082
    """Modifies the node group.
10083

10084
    """
10085
    result = []
10086

    
10087
    if self.op.ndparams:
10088
      self.group.ndparams = self.new_ndparams
10089
      result.append(("ndparams", str(self.group.ndparams)))
10090

    
10091
    if self.op.alloc_policy:
10092
      self.group.alloc_policy = self.op.alloc_policy
10093

    
10094
    self.cfg.Update(self.group, feedback_fn)
10095
    return result
10096

    
10097

    
10098

    
10099
class LURemoveGroup(LogicalUnit):
10100
  HPATH = "group-remove"
10101
  HTYPE = constants.HTYPE_GROUP
10102
  REQ_BGL = False
10103

    
10104
  def ExpandNames(self):
10105
    # This will raises errors.OpPrereqError on its own:
10106
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10107
    self.needed_locks = {
10108
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10109
      }
10110

    
10111
  def CheckPrereq(self):
10112
    """Check prerequisites.
10113

10114
    This checks that the given group name exists as a node group, that is
10115
    empty (i.e., contains no nodes), and that is not the last group of the
10116
    cluster.
10117

10118
    """
10119
    # Verify that the group is empty.
10120
    group_nodes = [node.name
10121
                   for node in self.cfg.GetAllNodesInfo().values()
10122
                   if node.group == self.group_uuid]
10123

    
10124
    if group_nodes:
10125
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
10126
                                 " nodes: %s" %
10127
                                 (self.op.group_name,
10128
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
10129
                                 errors.ECODE_STATE)
10130

    
10131
    # Verify the cluster would not be left group-less.
10132
    if len(self.cfg.GetNodeGroupList()) == 1:
10133
      raise errors.OpPrereqError("Group '%s' is the last group in the cluster,"
10134
                                 " which cannot be left without at least one"
10135
                                 " group" % self.op.group_name,
10136
                                 errors.ECODE_STATE)
10137

    
10138
  def BuildHooksEnv(self):
10139
    """Build hooks env.
10140

10141
    """
10142
    env = {
10143
      "GROUP_NAME": self.op.group_name,
10144
      }
10145
    mn = self.cfg.GetMasterNode()
10146
    return env, [mn], [mn]
10147

    
10148
  def Exec(self, feedback_fn):
10149
    """Remove the node group.
10150

10151
    """
10152
    try:
10153
      self.cfg.RemoveNodeGroup(self.group_uuid)
10154
    except errors.ConfigurationError:
10155
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10156
                               (self.op.group_name, self.group_uuid))
10157

    
10158
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10159

    
10160

    
10161
class LURenameGroup(LogicalUnit):
10162
  HPATH = "group-rename"
10163
  HTYPE = constants.HTYPE_GROUP
10164
  REQ_BGL = False
10165

    
10166
  def ExpandNames(self):
10167
    # This raises errors.OpPrereqError on its own:
10168
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10169

    
10170
    self.needed_locks = {
10171
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10172
      }
10173

    
10174
  def CheckPrereq(self):
10175
    """Check prerequisites.
10176

10177
    This checks that the given old_name exists as a node group, and that
10178
    new_name doesn't.
10179

10180
    """
10181
    try:
10182
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10183
    except errors.OpPrereqError:
10184
      pass
10185
    else:
10186
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10187
                                 " node group (UUID: %s)" %
10188
                                 (self.op.new_name, new_name_uuid),
10189
                                 errors.ECODE_EXISTS)
10190

    
10191
  def BuildHooksEnv(self):
10192
    """Build hooks env.
10193

10194
    """
10195
    env = {
10196
      "OLD_NAME": self.op.old_name,
10197
      "NEW_NAME": self.op.new_name,
10198
      }
10199

    
10200
    mn = self.cfg.GetMasterNode()
10201
    all_nodes = self.cfg.GetAllNodesInfo()
10202
    run_nodes = [mn]
10203
    all_nodes.pop(mn, None)
10204

    
10205
    for node in all_nodes.values():
10206
      if node.group == self.group_uuid:
10207
        run_nodes.append(node.name)
10208

    
10209
    return env, run_nodes, run_nodes
10210

    
10211
  def Exec(self, feedback_fn):
10212
    """Rename the node group.
10213

10214
    """
10215
    group = self.cfg.GetNodeGroup(self.group_uuid)
10216

    
10217
    if group is None:
10218
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10219
                               (self.op.old_name, self.group_uuid))
10220

    
10221
    group.name = self.op.new_name
10222
    self.cfg.Update(group, feedback_fn)
10223

    
10224
    return self.op.new_name
10225

    
10226

    
10227
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10228
  """Generic tags LU.
10229

10230
  This is an abstract class which is the parent of all the other tags LUs.
10231

10232
  """
10233

    
10234
  def ExpandNames(self):
10235
    self.needed_locks = {}
10236
    if self.op.kind == constants.TAG_NODE:
10237
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10238
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
10239
    elif self.op.kind == constants.TAG_INSTANCE:
10240
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10241
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10242

    
10243
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10244
    # not possible to acquire the BGL based on opcode parameters)
10245

    
10246
  def CheckPrereq(self):
10247
    """Check prerequisites.
10248

10249
    """
10250
    if self.op.kind == constants.TAG_CLUSTER:
10251
      self.target = self.cfg.GetClusterInfo()
10252
    elif self.op.kind == constants.TAG_NODE:
10253
      self.target = self.cfg.GetNodeInfo(self.op.name)
10254
    elif self.op.kind == constants.TAG_INSTANCE:
10255
      self.target = self.cfg.GetInstanceInfo(self.op.name)
10256
    else:
10257
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10258
                                 str(self.op.kind), errors.ECODE_INVAL)
10259

    
10260

    
10261
class LUGetTags(TagsLU):
10262
  """Returns the tags of a given object.
10263

10264
  """
10265
  REQ_BGL = False
10266

    
10267
  def ExpandNames(self):
10268
    TagsLU.ExpandNames(self)
10269

    
10270
    # Share locks as this is only a read operation
10271
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10272

    
10273
  def Exec(self, feedback_fn):
10274
    """Returns the tag list.
10275

10276
    """
10277
    return list(self.target.GetTags())
10278

    
10279

    
10280
class LUSearchTags(NoHooksLU):
10281
  """Searches the tags for a given pattern.
10282

10283
  """
10284
  REQ_BGL = False
10285

    
10286
  def ExpandNames(self):
10287
    self.needed_locks = {}
10288

    
10289
  def CheckPrereq(self):
10290
    """Check prerequisites.
10291

10292
    This checks the pattern passed for validity by compiling it.
10293

10294
    """
10295
    try:
10296
      self.re = re.compile(self.op.pattern)
10297
    except re.error, err:
10298
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10299
                                 (self.op.pattern, err), errors.ECODE_INVAL)
10300

    
10301
  def Exec(self, feedback_fn):
10302
    """Returns the tag list.
10303

10304
    """
10305
    cfg = self.cfg
10306
    tgts = [("/cluster", cfg.GetClusterInfo())]
10307
    ilist = cfg.GetAllInstancesInfo().values()
10308
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10309
    nlist = cfg.GetAllNodesInfo().values()
10310
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10311
    results = []
10312
    for path, target in tgts:
10313
      for tag in target.GetTags():
10314
        if self.re.search(tag):
10315
          results.append((path, tag))
10316
    return results
10317

    
10318

    
10319
class LUAddTags(TagsLU):
10320
  """Sets a tag on a given object.
10321

10322
  """
10323
  REQ_BGL = False
10324

    
10325
  def CheckPrereq(self):
10326
    """Check prerequisites.
10327

10328
    This checks the type and length of the tag name and value.
10329

10330
    """
10331
    TagsLU.CheckPrereq(self)
10332
    for tag in self.op.tags:
10333
      objects.TaggableObject.ValidateTag(tag)
10334

    
10335
  def Exec(self, feedback_fn):
10336
    """Sets the tag.
10337

10338
    """
10339
    try:
10340
      for tag in self.op.tags:
10341
        self.target.AddTag(tag)
10342
    except errors.TagError, err:
10343
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
10344
    self.cfg.Update(self.target, feedback_fn)
10345

    
10346

    
10347
class LUDelTags(TagsLU):
10348
  """Delete a list of tags from a given object.
10349

10350
  """
10351
  REQ_BGL = False
10352

    
10353
  def CheckPrereq(self):
10354
    """Check prerequisites.
10355

10356
    This checks that we have the given tag.
10357

10358
    """
10359
    TagsLU.CheckPrereq(self)
10360
    for tag in self.op.tags:
10361
      objects.TaggableObject.ValidateTag(tag)
10362
    del_tags = frozenset(self.op.tags)
10363
    cur_tags = self.target.GetTags()
10364

    
10365
    diff_tags = del_tags - cur_tags
10366
    if diff_tags:
10367
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
10368
      raise errors.OpPrereqError("Tag(s) %s not found" %
10369
                                 (utils.CommaJoin(diff_names), ),
10370
                                 errors.ECODE_NOENT)
10371

    
10372
  def Exec(self, feedback_fn):
10373
    """Remove the tag from the object.
10374

10375
    """
10376
    for tag in self.op.tags:
10377
      self.target.RemoveTag(tag)
10378
    self.cfg.Update(self.target, feedback_fn)
10379

    
10380

    
10381
class LUTestDelay(NoHooksLU):
10382
  """Sleep for a specified amount of time.
10383

10384
  This LU sleeps on the master and/or nodes for a specified amount of
10385
  time.
10386

10387
  """
10388
  REQ_BGL = False
10389

    
10390
  def ExpandNames(self):
10391
    """Expand names and set required locks.
10392

10393
    This expands the node list, if any.
10394

10395
    """
10396
    self.needed_locks = {}
10397
    if self.op.on_nodes:
10398
      # _GetWantedNodes can be used here, but is not always appropriate to use
10399
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10400
      # more information.
10401
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10402
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10403

    
10404
  def _TestDelay(self):
10405
    """Do the actual sleep.
10406

10407
    """
10408
    if self.op.on_master:
10409
      if not utils.TestDelay(self.op.duration):
10410
        raise errors.OpExecError("Error during master delay test")
10411
    if self.op.on_nodes:
10412
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10413
      for node, node_result in result.items():
10414
        node_result.Raise("Failure during rpc call to node %s" % node)
10415

    
10416
  def Exec(self, feedback_fn):
10417
    """Execute the test delay opcode, with the wanted repetitions.
10418

10419
    """
10420
    if self.op.repeat == 0:
10421
      self._TestDelay()
10422
    else:
10423
      top_value = self.op.repeat - 1
10424
      for i in range(self.op.repeat):
10425
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10426
        self._TestDelay()
10427

    
10428

    
10429
class LUTestJobqueue(NoHooksLU):
10430
  """Utility LU to test some aspects of the job queue.
10431

10432
  """
10433
  REQ_BGL = False
10434

    
10435
  # Must be lower than default timeout for WaitForJobChange to see whether it
10436
  # notices changed jobs
10437
  _CLIENT_CONNECT_TIMEOUT = 20.0
10438
  _CLIENT_CONFIRM_TIMEOUT = 60.0
10439

    
10440
  @classmethod
10441
  def _NotifyUsingSocket(cls, cb, errcls):
10442
    """Opens a Unix socket and waits for another program to connect.
10443

10444
    @type cb: callable
10445
    @param cb: Callback to send socket name to client
10446
    @type errcls: class
10447
    @param errcls: Exception class to use for errors
10448

10449
    """
10450
    # Using a temporary directory as there's no easy way to create temporary
10451
    # sockets without writing a custom loop around tempfile.mktemp and
10452
    # socket.bind
10453
    tmpdir = tempfile.mkdtemp()
10454
    try:
10455
      tmpsock = utils.PathJoin(tmpdir, "sock")
10456

    
10457
      logging.debug("Creating temporary socket at %s", tmpsock)
10458
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10459
      try:
10460
        sock.bind(tmpsock)
10461
        sock.listen(1)
10462

    
10463
        # Send details to client
10464
        cb(tmpsock)
10465

    
10466
        # Wait for client to connect before continuing
10467
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10468
        try:
10469
          (conn, _) = sock.accept()
10470
        except socket.error, err:
10471
          raise errcls("Client didn't connect in time (%s)" % err)
10472
      finally:
10473
        sock.close()
10474
    finally:
10475
      # Remove as soon as client is connected
10476
      shutil.rmtree(tmpdir)
10477

    
10478
    # Wait for client to close
10479
    try:
10480
      try:
10481
        # pylint: disable-msg=E1101
10482
        # Instance of '_socketobject' has no ... member
10483
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10484
        conn.recv(1)
10485
      except socket.error, err:
10486
        raise errcls("Client failed to confirm notification (%s)" % err)
10487
    finally:
10488
      conn.close()
10489

    
10490
  def _SendNotification(self, test, arg, sockname):
10491
    """Sends a notification to the client.
10492

10493
    @type test: string
10494
    @param test: Test name
10495
    @param arg: Test argument (depends on test)
10496
    @type sockname: string
10497
    @param sockname: Socket path
10498

10499
    """
10500
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10501

    
10502
  def _Notify(self, prereq, test, arg):
10503
    """Notifies the client of a test.
10504

10505
    @type prereq: bool
10506
    @param prereq: Whether this is a prereq-phase test
10507
    @type test: string
10508
    @param test: Test name
10509
    @param arg: Test argument (depends on test)
10510

10511
    """
10512
    if prereq:
10513
      errcls = errors.OpPrereqError
10514
    else:
10515
      errcls = errors.OpExecError
10516

    
10517
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10518
                                                  test, arg),
10519
                                   errcls)
10520

    
10521
  def CheckArguments(self):
10522
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10523
    self.expandnames_calls = 0
10524

    
10525
  def ExpandNames(self):
10526
    checkargs_calls = getattr(self, "checkargs_calls", 0)
10527
    if checkargs_calls < 1:
10528
      raise errors.ProgrammerError("CheckArguments was not called")
10529

    
10530
    self.expandnames_calls += 1
10531

    
10532
    if self.op.notify_waitlock:
10533
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
10534

    
10535
    self.LogInfo("Expanding names")
10536

    
10537
    # Get lock on master node (just to get a lock, not for a particular reason)
10538
    self.needed_locks = {
10539
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10540
      }
10541

    
10542
  def Exec(self, feedback_fn):
10543
    if self.expandnames_calls < 1:
10544
      raise errors.ProgrammerError("ExpandNames was not called")
10545

    
10546
    if self.op.notify_exec:
10547
      self._Notify(False, constants.JQT_EXEC, None)
10548

    
10549
    self.LogInfo("Executing")
10550

    
10551
    if self.op.log_messages:
10552
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10553
      for idx, msg in enumerate(self.op.log_messages):
10554
        self.LogInfo("Sending log message %s", idx + 1)
10555
        feedback_fn(constants.JQT_MSGPREFIX + msg)
10556
        # Report how many test messages have been sent
10557
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10558

    
10559
    if self.op.fail:
10560
      raise errors.OpExecError("Opcode failure was requested")
10561

    
10562
    return True
10563

    
10564

    
10565
class IAllocator(object):
10566
  """IAllocator framework.
10567

10568
  An IAllocator instance has three sets of attributes:
10569
    - cfg that is needed to query the cluster
10570
    - input data (all members of the _KEYS class attribute are required)
10571
    - four buffer attributes (in|out_data|text), that represent the
10572
      input (to the external script) in text and data structure format,
10573
      and the output from it, again in two formats
10574
    - the result variables from the script (success, info, nodes) for
10575
      easy usage
10576

10577
  """
10578
  # pylint: disable-msg=R0902
10579
  # lots of instance attributes
10580
  _ALLO_KEYS = [
10581
    "name", "mem_size", "disks", "disk_template",
10582
    "os", "tags", "nics", "vcpus", "hypervisor",
10583
    ]
10584
  _RELO_KEYS = [
10585
    "name", "relocate_from",
10586
    ]
10587
  _EVAC_KEYS = [
10588
    "evac_nodes",
10589
    ]
10590

    
10591
  def __init__(self, cfg, rpc, mode, **kwargs):
10592
    self.cfg = cfg
10593
    self.rpc = rpc
10594
    # init buffer variables
10595
    self.in_text = self.out_text = self.in_data = self.out_data = None
10596
    # init all input fields so that pylint is happy
10597
    self.mode = mode
10598
    self.mem_size = self.disks = self.disk_template = None
10599
    self.os = self.tags = self.nics = self.vcpus = None
10600
    self.hypervisor = None
10601
    self.relocate_from = None
10602
    self.name = None
10603
    self.evac_nodes = None
10604
    # computed fields
10605
    self.required_nodes = None
10606
    # init result fields
10607
    self.success = self.info = self.result = None
10608
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10609
      keyset = self._ALLO_KEYS
10610
      fn = self._AddNewInstance
10611
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10612
      keyset = self._RELO_KEYS
10613
      fn = self._AddRelocateInstance
10614
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10615
      keyset = self._EVAC_KEYS
10616
      fn = self._AddEvacuateNodes
10617
    else:
10618
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10619
                                   " IAllocator" % self.mode)
10620
    for key in kwargs:
10621
      if key not in keyset:
10622
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
10623
                                     " IAllocator" % key)
10624
      setattr(self, key, kwargs[key])
10625

    
10626
    for key in keyset:
10627
      if key not in kwargs:
10628
        raise errors.ProgrammerError("Missing input parameter '%s' to"
10629
                                     " IAllocator" % key)
10630
    self._BuildInputData(fn)
10631

    
10632
  def _ComputeClusterData(self):
10633
    """Compute the generic allocator input data.
10634

10635
    This is the data that is independent of the actual operation.
10636

10637
    """
10638
    cfg = self.cfg
10639
    cluster_info = cfg.GetClusterInfo()
10640
    # cluster data
10641
    data = {
10642
      "version": constants.IALLOCATOR_VERSION,
10643
      "cluster_name": cfg.GetClusterName(),
10644
      "cluster_tags": list(cluster_info.GetTags()),
10645
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10646
      # we don't have job IDs
10647
      }
10648
    iinfo = cfg.GetAllInstancesInfo().values()
10649
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10650

    
10651
    # node data
10652
    node_list = cfg.GetNodeList()
10653

    
10654
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10655
      hypervisor_name = self.hypervisor
10656
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10657
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10658
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10659
      hypervisor_name = cluster_info.enabled_hypervisors[0]
10660

    
10661
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10662
                                        hypervisor_name)
10663
    node_iinfo = \
10664
      self.rpc.call_all_instances_info(node_list,
10665
                                       cluster_info.enabled_hypervisors)
10666

    
10667
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10668

    
10669
    data["nodes"] = self._ComputeNodeData(cfg, node_data, node_iinfo, i_list)
10670

    
10671
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10672

    
10673
    self.in_data = data
10674

    
10675
  @staticmethod
10676
  def _ComputeNodeGroupData(cfg):
10677
    """Compute node groups data.
10678

10679
    """
10680
    ng = {}
10681
    for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10682
      ng[guuid] = {
10683
        "name": gdata.name,
10684
        "alloc_policy": gdata.alloc_policy,
10685
        }
10686
    return ng
10687

    
10688
  @staticmethod
10689
  def _ComputeNodeData(cfg, node_data, node_iinfo, i_list):
10690
    """Compute global node data.
10691

10692
    """
10693
    node_results = {}
10694
    for nname, nresult in node_data.items():
10695
      # first fill in static (config-based) values
10696
      ninfo = cfg.GetNodeInfo(nname)
10697
      pnr = {
10698
        "tags": list(ninfo.GetTags()),
10699
        "primary_ip": ninfo.primary_ip,
10700
        "secondary_ip": ninfo.secondary_ip,
10701
        "offline": ninfo.offline,
10702
        "drained": ninfo.drained,
10703
        "master_candidate": ninfo.master_candidate,
10704
        "group": ninfo.group,
10705
        "master_capable": ninfo.master_capable,
10706
        "vm_capable": ninfo.vm_capable,
10707
        }
10708

    
10709
      if not (ninfo.offline or ninfo.drained):
10710
        nresult.Raise("Can't get data for node %s" % nname)
10711
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10712
                                nname)
10713
        remote_info = nresult.payload
10714

    
10715
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
10716
                     'vg_size', 'vg_free', 'cpu_total']:
10717
          if attr not in remote_info:
10718
            raise errors.OpExecError("Node '%s' didn't return attribute"
10719
                                     " '%s'" % (nname, attr))
10720
          if not isinstance(remote_info[attr], int):
10721
            raise errors.OpExecError("Node '%s' returned invalid value"
10722
                                     " for '%s': %s" %
10723
                                     (nname, attr, remote_info[attr]))
10724
        # compute memory used by primary instances
10725
        i_p_mem = i_p_up_mem = 0
10726
        for iinfo, beinfo in i_list:
10727
          if iinfo.primary_node == nname:
10728
            i_p_mem += beinfo[constants.BE_MEMORY]
10729
            if iinfo.name not in node_iinfo[nname].payload:
10730
              i_used_mem = 0
10731
            else:
10732
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10733
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10734
            remote_info['memory_free'] -= max(0, i_mem_diff)
10735

    
10736
            if iinfo.admin_up:
10737
              i_p_up_mem += beinfo[constants.BE_MEMORY]
10738

    
10739
        # compute memory used by instances
10740
        pnr_dyn = {
10741
          "total_memory": remote_info['memory_total'],
10742
          "reserved_memory": remote_info['memory_dom0'],
10743
          "free_memory": remote_info['memory_free'],
10744
          "total_disk": remote_info['vg_size'],
10745
          "free_disk": remote_info['vg_free'],
10746
          "total_cpus": remote_info['cpu_total'],
10747
          "i_pri_memory": i_p_mem,
10748
          "i_pri_up_memory": i_p_up_mem,
10749
          }
10750
        pnr.update(pnr_dyn)
10751

    
10752
      node_results[nname] = pnr
10753

    
10754
    return node_results
10755

    
10756
  @staticmethod
10757
  def _ComputeInstanceData(cluster_info, i_list):
10758
    """Compute global instance data.
10759

10760
    """
10761
    instance_data = {}
10762
    for iinfo, beinfo in i_list:
10763
      nic_data = []
10764
      for nic in iinfo.nics:
10765
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10766
        nic_dict = {"mac": nic.mac,
10767
                    "ip": nic.ip,
10768
                    "mode": filled_params[constants.NIC_MODE],
10769
                    "link": filled_params[constants.NIC_LINK],
10770
                   }
10771
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10772
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10773
        nic_data.append(nic_dict)
10774
      pir = {
10775
        "tags": list(iinfo.GetTags()),
10776
        "admin_up": iinfo.admin_up,
10777
        "vcpus": beinfo[constants.BE_VCPUS],
10778
        "memory": beinfo[constants.BE_MEMORY],
10779
        "os": iinfo.os,
10780
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10781
        "nics": nic_data,
10782
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10783
        "disk_template": iinfo.disk_template,
10784
        "hypervisor": iinfo.hypervisor,
10785
        }
10786
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10787
                                                 pir["disks"])
10788
      instance_data[iinfo.name] = pir
10789

    
10790
    return instance_data
10791

    
10792
  def _AddNewInstance(self):
10793
    """Add new instance data to allocator structure.
10794

10795
    This in combination with _AllocatorGetClusterData will create the
10796
    correct structure needed as input for the allocator.
10797

10798
    The checks for the completeness of the opcode must have already been
10799
    done.
10800

10801
    """
10802
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10803

    
10804
    if self.disk_template in constants.DTS_NET_MIRROR:
10805
      self.required_nodes = 2
10806
    else:
10807
      self.required_nodes = 1
10808
    request = {
10809
      "name": self.name,
10810
      "disk_template": self.disk_template,
10811
      "tags": self.tags,
10812
      "os": self.os,
10813
      "vcpus": self.vcpus,
10814
      "memory": self.mem_size,
10815
      "disks": self.disks,
10816
      "disk_space_total": disk_space,
10817
      "nics": self.nics,
10818
      "required_nodes": self.required_nodes,
10819
      }
10820
    return request
10821

    
10822
  def _AddRelocateInstance(self):
10823
    """Add relocate instance data to allocator structure.
10824

10825
    This in combination with _IAllocatorGetClusterData will create the
10826
    correct structure needed as input for the allocator.
10827

10828
    The checks for the completeness of the opcode must have already been
10829
    done.
10830

10831
    """
10832
    instance = self.cfg.GetInstanceInfo(self.name)
10833
    if instance is None:
10834
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
10835
                                   " IAllocator" % self.name)
10836

    
10837
    if instance.disk_template not in constants.DTS_NET_MIRROR:
10838
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10839
                                 errors.ECODE_INVAL)
10840

    
10841
    if len(instance.secondary_nodes) != 1:
10842
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
10843
                                 errors.ECODE_STATE)
10844

    
10845
    self.required_nodes = 1
10846
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
10847
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10848

    
10849
    request = {
10850
      "name": self.name,
10851
      "disk_space_total": disk_space,
10852
      "required_nodes": self.required_nodes,
10853
      "relocate_from": self.relocate_from,
10854
      }
10855
    return request
10856

    
10857
  def _AddEvacuateNodes(self):
10858
    """Add evacuate nodes data to allocator structure.
10859

10860
    """
10861
    request = {
10862
      "evac_nodes": self.evac_nodes
10863
      }
10864
    return request
10865

    
10866
  def _BuildInputData(self, fn):
10867
    """Build input data structures.
10868

10869
    """
10870
    self._ComputeClusterData()
10871

    
10872
    request = fn()
10873
    request["type"] = self.mode
10874
    self.in_data["request"] = request
10875

    
10876
    self.in_text = serializer.Dump(self.in_data)
10877

    
10878
  def Run(self, name, validate=True, call_fn=None):
10879
    """Run an instance allocator and return the results.
10880

10881
    """
10882
    if call_fn is None:
10883
      call_fn = self.rpc.call_iallocator_runner
10884

    
10885
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10886
    result.Raise("Failure while running the iallocator script")
10887

    
10888
    self.out_text = result.payload
10889
    if validate:
10890
      self._ValidateResult()
10891

    
10892
  def _ValidateResult(self):
10893
    """Process the allocator results.
10894

10895
    This will process and if successful save the result in
10896
    self.out_data and the other parameters.
10897

10898
    """
10899
    try:
10900
      rdict = serializer.Load(self.out_text)
10901
    except Exception, err:
10902
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10903

    
10904
    if not isinstance(rdict, dict):
10905
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
10906

    
10907
    # TODO: remove backwards compatiblity in later versions
10908
    if "nodes" in rdict and "result" not in rdict:
10909
      rdict["result"] = rdict["nodes"]
10910
      del rdict["nodes"]
10911

    
10912
    for key in "success", "info", "result":
10913
      if key not in rdict:
10914
        raise errors.OpExecError("Can't parse iallocator results:"
10915
                                 " missing key '%s'" % key)
10916
      setattr(self, key, rdict[key])
10917

    
10918
    if not isinstance(rdict["result"], list):
10919
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10920
                               " is not a list")
10921
    self.out_data = rdict
10922

    
10923

    
10924
class LUTestAllocator(NoHooksLU):
10925
  """Run allocator tests.
10926

10927
  This LU runs the allocator tests
10928

10929
  """
10930
  def CheckPrereq(self):
10931
    """Check prerequisites.
10932

10933
    This checks the opcode parameters depending on the director and mode test.
10934

10935
    """
10936
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10937
      for attr in ["mem_size", "disks", "disk_template",
10938
                   "os", "tags", "nics", "vcpus"]:
10939
        if not hasattr(self.op, attr):
10940
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10941
                                     attr, errors.ECODE_INVAL)
10942
      iname = self.cfg.ExpandInstanceName(self.op.name)
10943
      if iname is not None:
10944
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10945
                                   iname, errors.ECODE_EXISTS)
10946
      if not isinstance(self.op.nics, list):
10947
        raise errors.OpPrereqError("Invalid parameter 'nics'",
10948
                                   errors.ECODE_INVAL)
10949
      if not isinstance(self.op.disks, list):
10950
        raise errors.OpPrereqError("Invalid parameter 'disks'",
10951
                                   errors.ECODE_INVAL)
10952
      for row in self.op.disks:
10953
        if (not isinstance(row, dict) or
10954
            "size" not in row or
10955
            not isinstance(row["size"], int) or
10956
            "mode" not in row or
10957
            row["mode"] not in ['r', 'w']):
10958
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
10959
                                     " parameter", errors.ECODE_INVAL)
10960
      if self.op.hypervisor is None:
10961
        self.op.hypervisor = self.cfg.GetHypervisorType()
10962
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10963
      fname = _ExpandInstanceName(self.cfg, self.op.name)
10964
      self.op.name = fname
10965
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10966
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10967
      if not hasattr(self.op, "evac_nodes"):
10968
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10969
                                   " opcode input", errors.ECODE_INVAL)
10970
    else:
10971
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10972
                                 self.op.mode, errors.ECODE_INVAL)
10973

    
10974
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10975
      if self.op.allocator is None:
10976
        raise errors.OpPrereqError("Missing allocator name",
10977
                                   errors.ECODE_INVAL)
10978
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10979
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
10980
                                 self.op.direction, errors.ECODE_INVAL)
10981

    
10982
  def Exec(self, feedback_fn):
10983
    """Run the allocator test.
10984

10985
    """
10986
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10987
      ial = IAllocator(self.cfg, self.rpc,
10988
                       mode=self.op.mode,
10989
                       name=self.op.name,
10990
                       mem_size=self.op.mem_size,
10991
                       disks=self.op.disks,
10992
                       disk_template=self.op.disk_template,
10993
                       os=self.op.os,
10994
                       tags=self.op.tags,
10995
                       nics=self.op.nics,
10996
                       vcpus=self.op.vcpus,
10997
                       hypervisor=self.op.hypervisor,
10998
                       )
10999
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11000
      ial = IAllocator(self.cfg, self.rpc,
11001
                       mode=self.op.mode,
11002
                       name=self.op.name,
11003
                       relocate_from=list(self.relocate_from),
11004
                       )
11005
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11006
      ial = IAllocator(self.cfg, self.rpc,
11007
                       mode=self.op.mode,
11008
                       evac_nodes=self.op.evac_nodes)
11009
    else:
11010
      raise errors.ProgrammerError("Uncatched mode %s in"
11011
                                   " LUTestAllocator.Exec", self.op.mode)
11012

    
11013
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
11014
      result = ial.in_text
11015
    else:
11016
      ial.Run(self.op.allocator, validate=False)
11017
      result = ial.out_text
11018
    return result
11019

    
11020

    
11021
#: Query type implementations
11022
_QUERY_IMPL = {
11023
  constants.QR_INSTANCE: _InstanceQuery,
11024
  constants.QR_NODE: _NodeQuery,
11025
  constants.QR_GROUP: _GroupQuery,
11026
  }
11027

    
11028

    
11029
def _GetQueryImplementation(name):
11030
  """Returns the implemtnation for a query type.
11031

11032
  @param name: Query type, must be one of L{constants.QR_OP_QUERY}
11033

11034
  """
11035
  try:
11036
    return _QUERY_IMPL[name]
11037
  except KeyError:
11038
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11039
                               errors.ECODE_INVAL)