Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 71910715

History | View | Annotate | Download (392.2 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import query
58
from ganeti import qlang
59
from ganeti import opcodes
60

    
61
import ganeti.masterd.instance # pylint: disable-msg=W0611
62

    
63

    
64
def _SupportsOob(cfg, node):
65
  """Tells if node supports OOB.
66

67
  @type cfg: L{config.ConfigWriter}
68
  @param cfg: The cluster configuration
69
  @type node: L{objects.Node}
70
  @param node: The node
71
  @return: The OOB script if supported or an empty string otherwise
72

73
  """
74
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
75

    
76

    
77
# End types
78
class LogicalUnit(object):
79
  """Logical Unit base class.
80

81
  Subclasses must follow these rules:
82
    - implement ExpandNames
83
    - implement CheckPrereq (except when tasklets are used)
84
    - implement Exec (except when tasklets are used)
85
    - implement BuildHooksEnv
86
    - redefine HPATH and HTYPE
87
    - optionally redefine their run requirements:
88
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
89

90
  Note that all commands require root permissions.
91

92
  @ivar dry_run_result: the value (if any) that will be returned to the caller
93
      in dry-run mode (signalled by opcode dry_run parameter)
94

95
  """
96
  HPATH = None
97
  HTYPE = None
98
  REQ_BGL = True
99

    
100
  def __init__(self, processor, op, context, rpc):
101
    """Constructor for LogicalUnit.
102

103
    This needs to be overridden in derived classes in order to check op
104
    validity.
105

106
    """
107
    self.proc = processor
108
    self.op = op
109
    self.cfg = context.cfg
110
    self.context = context
111
    self.rpc = rpc
112
    # Dicts used to declare locking needs to mcpu
113
    self.needed_locks = None
114
    self.acquired_locks = {}
115
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
116
    self.add_locks = {}
117
    self.remove_locks = {}
118
    # Used to force good behavior when calling helper functions
119
    self.recalculate_locks = {}
120
    self.__ssh = None
121
    # logging
122
    self.Log = processor.Log # pylint: disable-msg=C0103
123
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
124
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
125
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
126
    # support for dry-run
127
    self.dry_run_result = None
128
    # support for generic debug attribute
129
    if (not hasattr(self.op, "debug_level") or
130
        not isinstance(self.op.debug_level, int)):
131
      self.op.debug_level = 0
132

    
133
    # Tasklets
134
    self.tasklets = None
135

    
136
    # Validate opcode parameters and set defaults
137
    self.op.Validate(True)
138

    
139
    self.CheckArguments()
140

    
141
  def __GetSSH(self):
142
    """Returns the SshRunner object
143

144
    """
145
    if not self.__ssh:
146
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
147
    return self.__ssh
148

    
149
  ssh = property(fget=__GetSSH)
150

    
151
  def CheckArguments(self):
152
    """Check syntactic validity for the opcode arguments.
153

154
    This method is for doing a simple syntactic check and ensure
155
    validity of opcode parameters, without any cluster-related
156
    checks. While the same can be accomplished in ExpandNames and/or
157
    CheckPrereq, doing these separate is better because:
158

159
      - ExpandNames is left as as purely a lock-related function
160
      - CheckPrereq is run after we have acquired locks (and possible
161
        waited for them)
162

163
    The function is allowed to change the self.op attribute so that
164
    later methods can no longer worry about missing parameters.
165

166
    """
167
    pass
168

    
169
  def ExpandNames(self):
170
    """Expand names for this LU.
171

172
    This method is called before starting to execute the opcode, and it should
173
    update all the parameters of the opcode to their canonical form (e.g. a
174
    short node name must be fully expanded after this method has successfully
175
    completed). This way locking, hooks, logging, etc. can work correctly.
176

177
    LUs which implement this method must also populate the self.needed_locks
178
    member, as a dict with lock levels as keys, and a list of needed lock names
179
    as values. Rules:
180

181
      - use an empty dict if you don't need any lock
182
      - if you don't need any lock at a particular level omit that level
183
      - don't put anything for the BGL level
184
      - if you want all locks at a level use locking.ALL_SET as a value
185

186
    If you need to share locks (rather than acquire them exclusively) at one
187
    level you can modify self.share_locks, setting a true value (usually 1) for
188
    that level. By default locks are not shared.
189

190
    This function can also define a list of tasklets, which then will be
191
    executed in order instead of the usual LU-level CheckPrereq and Exec
192
    functions, if those are not defined by the LU.
193

194
    Examples::
195

196
      # Acquire all nodes and one instance
197
      self.needed_locks = {
198
        locking.LEVEL_NODE: locking.ALL_SET,
199
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
200
      }
201
      # Acquire just two nodes
202
      self.needed_locks = {
203
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
204
      }
205
      # Acquire no locks
206
      self.needed_locks = {} # No, you can't leave it to the default value None
207

208
    """
209
    # The implementation of this method is mandatory only if the new LU is
210
    # concurrent, so that old LUs don't need to be changed all at the same
211
    # time.
212
    if self.REQ_BGL:
213
      self.needed_locks = {} # Exclusive LUs don't need locks.
214
    else:
215
      raise NotImplementedError
216

    
217
  def DeclareLocks(self, level):
218
    """Declare LU locking needs for a level
219

220
    While most LUs can just declare their locking needs at ExpandNames time,
221
    sometimes there's the need to calculate some locks after having acquired
222
    the ones before. This function is called just before acquiring locks at a
223
    particular level, but after acquiring the ones at lower levels, and permits
224
    such calculations. It can be used to modify self.needed_locks, and by
225
    default it does nothing.
226

227
    This function is only called if you have something already set in
228
    self.needed_locks for the level.
229

230
    @param level: Locking level which is going to be locked
231
    @type level: member of ganeti.locking.LEVELS
232

233
    """
234

    
235
  def CheckPrereq(self):
236
    """Check prerequisites for this LU.
237

238
    This method should check that the prerequisites for the execution
239
    of this LU are fulfilled. It can do internode communication, but
240
    it should be idempotent - no cluster or system changes are
241
    allowed.
242

243
    The method should raise errors.OpPrereqError in case something is
244
    not fulfilled. Its return value is ignored.
245

246
    This method should also update all the parameters of the opcode to
247
    their canonical form if it hasn't been done by ExpandNames before.
248

249
    """
250
    if self.tasklets is not None:
251
      for (idx, tl) in enumerate(self.tasklets):
252
        logging.debug("Checking prerequisites for tasklet %s/%s",
253
                      idx + 1, len(self.tasklets))
254
        tl.CheckPrereq()
255
    else:
256
      pass
257

    
258
  def Exec(self, feedback_fn):
259
    """Execute the LU.
260

261
    This method should implement the actual work. It should raise
262
    errors.OpExecError for failures that are somewhat dealt with in
263
    code, or expected.
264

265
    """
266
    if self.tasklets is not None:
267
      for (idx, tl) in enumerate(self.tasklets):
268
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
269
        tl.Exec(feedback_fn)
270
    else:
271
      raise NotImplementedError
272

    
273
  def BuildHooksEnv(self):
274
    """Build hooks environment for this LU.
275

276
    This method should return a three-node tuple consisting of: a dict
277
    containing the environment that will be used for running the
278
    specific hook for this LU, a list of node names on which the hook
279
    should run before the execution, and a list of node names on which
280
    the hook should run after the execution.
281

282
    The keys of the dict must not have 'GANETI_' prefixed as this will
283
    be handled in the hooks runner. Also note additional keys will be
284
    added by the hooks runner. If the LU doesn't define any
285
    environment, an empty dict (and not None) should be returned.
286

287
    No nodes should be returned as an empty list (and not None).
288

289
    Note that if the HPATH for a LU class is None, this function will
290
    not be called.
291

292
    """
293
    raise NotImplementedError
294

    
295
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
296
    """Notify the LU about the results of its hooks.
297

298
    This method is called every time a hooks phase is executed, and notifies
299
    the Logical Unit about the hooks' result. The LU can then use it to alter
300
    its result based on the hooks.  By default the method does nothing and the
301
    previous result is passed back unchanged but any LU can define it if it
302
    wants to use the local cluster hook-scripts somehow.
303

304
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
305
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
306
    @param hook_results: the results of the multi-node hooks rpc call
307
    @param feedback_fn: function used send feedback back to the caller
308
    @param lu_result: the previous Exec result this LU had, or None
309
        in the PRE phase
310
    @return: the new Exec result, based on the previous result
311
        and hook results
312

313
    """
314
    # API must be kept, thus we ignore the unused argument and could
315
    # be a function warnings
316
    # pylint: disable-msg=W0613,R0201
317
    return lu_result
318

    
319
  def _ExpandAndLockInstance(self):
320
    """Helper function to expand and lock an instance.
321

322
    Many LUs that work on an instance take its name in self.op.instance_name
323
    and need to expand it and then declare the expanded name for locking. This
324
    function does it, and then updates self.op.instance_name to the expanded
325
    name. It also initializes needed_locks as a dict, if this hasn't been done
326
    before.
327

328
    """
329
    if self.needed_locks is None:
330
      self.needed_locks = {}
331
    else:
332
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
333
        "_ExpandAndLockInstance called with instance-level locks set"
334
    self.op.instance_name = _ExpandInstanceName(self.cfg,
335
                                                self.op.instance_name)
336
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
337

    
338
  def _LockInstancesNodes(self, primary_only=False):
339
    """Helper function to declare instances' nodes for locking.
340

341
    This function should be called after locking one or more instances to lock
342
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
343
    with all primary or secondary nodes for instances already locked and
344
    present in self.needed_locks[locking.LEVEL_INSTANCE].
345

346
    It should be called from DeclareLocks, and for safety only works if
347
    self.recalculate_locks[locking.LEVEL_NODE] is set.
348

349
    In the future it may grow parameters to just lock some instance's nodes, or
350
    to just lock primaries or secondary nodes, if needed.
351

352
    If should be called in DeclareLocks in a way similar to::
353

354
      if level == locking.LEVEL_NODE:
355
        self._LockInstancesNodes()
356

357
    @type primary_only: boolean
358
    @param primary_only: only lock primary nodes of locked instances
359

360
    """
361
    assert locking.LEVEL_NODE in self.recalculate_locks, \
362
      "_LockInstancesNodes helper function called with no nodes to recalculate"
363

    
364
    # TODO: check if we're really been called with the instance locks held
365

    
366
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
367
    # future we might want to have different behaviors depending on the value
368
    # of self.recalculate_locks[locking.LEVEL_NODE]
369
    wanted_nodes = []
370
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
371
      instance = self.context.cfg.GetInstanceInfo(instance_name)
372
      wanted_nodes.append(instance.primary_node)
373
      if not primary_only:
374
        wanted_nodes.extend(instance.secondary_nodes)
375

    
376
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
377
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
378
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
379
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
380

    
381
    del self.recalculate_locks[locking.LEVEL_NODE]
382

    
383

    
384
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
385
  """Simple LU which runs no hooks.
386

387
  This LU is intended as a parent for other LogicalUnits which will
388
  run no hooks, in order to reduce duplicate code.
389

390
  """
391
  HPATH = None
392
  HTYPE = None
393

    
394
  def BuildHooksEnv(self):
395
    """Empty BuildHooksEnv for NoHooksLu.
396

397
    This just raises an error.
398

399
    """
400
    assert False, "BuildHooksEnv called for NoHooksLUs"
401

    
402

    
403
class Tasklet:
404
  """Tasklet base class.
405

406
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
407
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
408
  tasklets know nothing about locks.
409

410
  Subclasses must follow these rules:
411
    - Implement CheckPrereq
412
    - Implement Exec
413

414
  """
415
  def __init__(self, lu):
416
    self.lu = lu
417

    
418
    # Shortcuts
419
    self.cfg = lu.cfg
420
    self.rpc = lu.rpc
421

    
422
  def CheckPrereq(self):
423
    """Check prerequisites for this tasklets.
424

425
    This method should check whether the prerequisites for the execution of
426
    this tasklet are fulfilled. It can do internode communication, but it
427
    should be idempotent - no cluster or system changes are allowed.
428

429
    The method should raise errors.OpPrereqError in case something is not
430
    fulfilled. Its return value is ignored.
431

432
    This method should also update all parameters to their canonical form if it
433
    hasn't been done before.
434

435
    """
436
    pass
437

    
438
  def Exec(self, feedback_fn):
439
    """Execute the tasklet.
440

441
    This method should implement the actual work. It should raise
442
    errors.OpExecError for failures that are somewhat dealt with in code, or
443
    expected.
444

445
    """
446
    raise NotImplementedError
447

    
448

    
449
class _QueryBase:
450
  """Base for query utility classes.
451

452
  """
453
  #: Attribute holding field definitions
454
  FIELDS = None
455

    
456
  def __init__(self, names, fields, use_locking):
457
    """Initializes this class.
458

459
    """
460
    self.names = names
461
    self.use_locking = use_locking
462

    
463
    self.query = query.Query(self.FIELDS, fields)
464
    self.requested_data = self.query.RequestedData()
465

    
466
    self.do_locking = None
467
    self.wanted = None
468

    
469
  def _GetNames(self, lu, all_names, lock_level):
470
    """Helper function to determine names asked for in the query.
471

472
    """
473
    if self.do_locking:
474
      names = lu.acquired_locks[lock_level]
475
    else:
476
      names = all_names
477

    
478
    if self.wanted == locking.ALL_SET:
479
      assert not self.names
480
      # caller didn't specify names, so ordering is not important
481
      return utils.NiceSort(names)
482

    
483
    # caller specified names and we must keep the same order
484
    assert self.names
485
    assert not self.do_locking or lu.acquired_locks[lock_level]
486

    
487
    missing = set(self.wanted).difference(names)
488
    if missing:
489
      raise errors.OpExecError("Some items were removed before retrieving"
490
                               " their data: %s" % missing)
491

    
492
    # Return expanded names
493
    return self.wanted
494

    
495
  @classmethod
496
  def FieldsQuery(cls, fields):
497
    """Returns list of available fields.
498

499
    @return: List of L{objects.QueryFieldDefinition}
500

501
    """
502
    return query.QueryFields(cls.FIELDS, fields)
503

    
504
  def ExpandNames(self, lu):
505
    """Expand names for this query.
506

507
    See L{LogicalUnit.ExpandNames}.
508

509
    """
510
    raise NotImplementedError()
511

    
512
  def DeclareLocks(self, lu, level):
513
    """Declare locks for this query.
514

515
    See L{LogicalUnit.DeclareLocks}.
516

517
    """
518
    raise NotImplementedError()
519

    
520
  def _GetQueryData(self, lu):
521
    """Collects all data for this query.
522

523
    @return: Query data object
524

525
    """
526
    raise NotImplementedError()
527

    
528
  def NewStyleQuery(self, lu):
529
    """Collect data and execute query.
530

531
    """
532
    return query.GetQueryResponse(self.query, self._GetQueryData(lu))
533

    
534
  def OldStyleQuery(self, lu):
535
    """Collect data and execute query.
536

537
    """
538
    return self.query.OldStyleQuery(self._GetQueryData(lu))
539

    
540

    
541
def _GetWantedNodes(lu, nodes):
542
  """Returns list of checked and expanded node names.
543

544
  @type lu: L{LogicalUnit}
545
  @param lu: the logical unit on whose behalf we execute
546
  @type nodes: list
547
  @param nodes: list of node names or None for all nodes
548
  @rtype: list
549
  @return: the list of nodes, sorted
550
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
551

552
  """
553
  if nodes:
554
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
555

    
556
  return utils.NiceSort(lu.cfg.GetNodeList())
557

    
558

    
559
def _GetWantedInstances(lu, instances):
560
  """Returns list of checked and expanded instance names.
561

562
  @type lu: L{LogicalUnit}
563
  @param lu: the logical unit on whose behalf we execute
564
  @type instances: list
565
  @param instances: list of instance names or None for all instances
566
  @rtype: list
567
  @return: the list of instances, sorted
568
  @raise errors.OpPrereqError: if the instances parameter is wrong type
569
  @raise errors.OpPrereqError: if any of the passed instances is not found
570

571
  """
572
  if instances:
573
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
574
  else:
575
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
576
  return wanted
577

    
578

    
579
def _GetUpdatedParams(old_params, update_dict,
580
                      use_default=True, use_none=False):
581
  """Return the new version of a parameter dictionary.
582

583
  @type old_params: dict
584
  @param old_params: old parameters
585
  @type update_dict: dict
586
  @param update_dict: dict containing new parameter values, or
587
      constants.VALUE_DEFAULT to reset the parameter to its default
588
      value
589
  @param use_default: boolean
590
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
591
      values as 'to be deleted' values
592
  @param use_none: boolean
593
  @type use_none: whether to recognise C{None} values as 'to be
594
      deleted' values
595
  @rtype: dict
596
  @return: the new parameter dictionary
597

598
  """
599
  params_copy = copy.deepcopy(old_params)
600
  for key, val in update_dict.iteritems():
601
    if ((use_default and val == constants.VALUE_DEFAULT) or
602
        (use_none and val is None)):
603
      try:
604
        del params_copy[key]
605
      except KeyError:
606
        pass
607
    else:
608
      params_copy[key] = val
609
  return params_copy
610

    
611

    
612
def _CheckOutputFields(static, dynamic, selected):
613
  """Checks whether all selected fields are valid.
614

615
  @type static: L{utils.FieldSet}
616
  @param static: static fields set
617
  @type dynamic: L{utils.FieldSet}
618
  @param dynamic: dynamic fields set
619

620
  """
621
  f = utils.FieldSet()
622
  f.Extend(static)
623
  f.Extend(dynamic)
624

    
625
  delta = f.NonMatching(selected)
626
  if delta:
627
    raise errors.OpPrereqError("Unknown output fields selected: %s"
628
                               % ",".join(delta), errors.ECODE_INVAL)
629

    
630

    
631
def _CheckGlobalHvParams(params):
632
  """Validates that given hypervisor params are not global ones.
633

634
  This will ensure that instances don't get customised versions of
635
  global params.
636

637
  """
638
  used_globals = constants.HVC_GLOBALS.intersection(params)
639
  if used_globals:
640
    msg = ("The following hypervisor parameters are global and cannot"
641
           " be customized at instance level, please modify them at"
642
           " cluster level: %s" % utils.CommaJoin(used_globals))
643
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
644

    
645

    
646
def _CheckNodeOnline(lu, node, msg=None):
647
  """Ensure that a given node is online.
648

649
  @param lu: the LU on behalf of which we make the check
650
  @param node: the node to check
651
  @param msg: if passed, should be a message to replace the default one
652
  @raise errors.OpPrereqError: if the node is offline
653

654
  """
655
  if msg is None:
656
    msg = "Can't use offline node"
657
  if lu.cfg.GetNodeInfo(node).offline:
658
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
659

    
660

    
661
def _CheckNodeNotDrained(lu, node):
662
  """Ensure that a given node is not drained.
663

664
  @param lu: the LU on behalf of which we make the check
665
  @param node: the node to check
666
  @raise errors.OpPrereqError: if the node is drained
667

668
  """
669
  if lu.cfg.GetNodeInfo(node).drained:
670
    raise errors.OpPrereqError("Can't use drained node %s" % node,
671
                               errors.ECODE_STATE)
672

    
673

    
674
def _CheckNodeVmCapable(lu, node):
675
  """Ensure that a given node is vm capable.
676

677
  @param lu: the LU on behalf of which we make the check
678
  @param node: the node to check
679
  @raise errors.OpPrereqError: if the node is not vm capable
680

681
  """
682
  if not lu.cfg.GetNodeInfo(node).vm_capable:
683
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
684
                               errors.ECODE_STATE)
685

    
686

    
687
def _CheckNodeHasOS(lu, node, os_name, force_variant):
688
  """Ensure that a node supports a given OS.
689

690
  @param lu: the LU on behalf of which we make the check
691
  @param node: the node to check
692
  @param os_name: the OS to query about
693
  @param force_variant: whether to ignore variant errors
694
  @raise errors.OpPrereqError: if the node is not supporting the OS
695

696
  """
697
  result = lu.rpc.call_os_get(node, os_name)
698
  result.Raise("OS '%s' not in supported OS list for node %s" %
699
               (os_name, node),
700
               prereq=True, ecode=errors.ECODE_INVAL)
701
  if not force_variant:
702
    _CheckOSVariant(result.payload, os_name)
703

    
704

    
705
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
706
  """Ensure that a node has the given secondary ip.
707

708
  @type lu: L{LogicalUnit}
709
  @param lu: the LU on behalf of which we make the check
710
  @type node: string
711
  @param node: the node to check
712
  @type secondary_ip: string
713
  @param secondary_ip: the ip to check
714
  @type prereq: boolean
715
  @param prereq: whether to throw a prerequisite or an execute error
716
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
717
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
718

719
  """
720
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
721
  result.Raise("Failure checking secondary ip on node %s" % node,
722
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
723
  if not result.payload:
724
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
725
           " please fix and re-run this command" % secondary_ip)
726
    if prereq:
727
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
728
    else:
729
      raise errors.OpExecError(msg)
730

    
731

    
732
def _GetClusterDomainSecret():
733
  """Reads the cluster domain secret.
734

735
  """
736
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
737
                               strict=True)
738

    
739

    
740
def _CheckInstanceDown(lu, instance, reason):
741
  """Ensure that an instance is not running."""
742
  if instance.admin_up:
743
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
744
                               (instance.name, reason), errors.ECODE_STATE)
745

    
746
  pnode = instance.primary_node
747
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
748
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
749
              prereq=True, ecode=errors.ECODE_ENVIRON)
750

    
751
  if instance.name in ins_l.payload:
752
    raise errors.OpPrereqError("Instance %s is running, %s" %
753
                               (instance.name, reason), errors.ECODE_STATE)
754

    
755

    
756
def _ExpandItemName(fn, name, kind):
757
  """Expand an item name.
758

759
  @param fn: the function to use for expansion
760
  @param name: requested item name
761
  @param kind: text description ('Node' or 'Instance')
762
  @return: the resolved (full) name
763
  @raise errors.OpPrereqError: if the item is not found
764

765
  """
766
  full_name = fn(name)
767
  if full_name is None:
768
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
769
                               errors.ECODE_NOENT)
770
  return full_name
771

    
772

    
773
def _ExpandNodeName(cfg, name):
774
  """Wrapper over L{_ExpandItemName} for nodes."""
775
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
776

    
777

    
778
def _ExpandInstanceName(cfg, name):
779
  """Wrapper over L{_ExpandItemName} for instance."""
780
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
781

    
782

    
783
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
784
                          memory, vcpus, nics, disk_template, disks,
785
                          bep, hvp, hypervisor_name):
786
  """Builds instance related env variables for hooks
787

788
  This builds the hook environment from individual variables.
789

790
  @type name: string
791
  @param name: the name of the instance
792
  @type primary_node: string
793
  @param primary_node: the name of the instance's primary node
794
  @type secondary_nodes: list
795
  @param secondary_nodes: list of secondary nodes as strings
796
  @type os_type: string
797
  @param os_type: the name of the instance's OS
798
  @type status: boolean
799
  @param status: the should_run status of the instance
800
  @type memory: string
801
  @param memory: the memory size of the instance
802
  @type vcpus: string
803
  @param vcpus: the count of VCPUs the instance has
804
  @type nics: list
805
  @param nics: list of tuples (ip, mac, mode, link) representing
806
      the NICs the instance has
807
  @type disk_template: string
808
  @param disk_template: the disk template of the instance
809
  @type disks: list
810
  @param disks: the list of (size, mode) pairs
811
  @type bep: dict
812
  @param bep: the backend parameters for the instance
813
  @type hvp: dict
814
  @param hvp: the hypervisor parameters for the instance
815
  @type hypervisor_name: string
816
  @param hypervisor_name: the hypervisor for the instance
817
  @rtype: dict
818
  @return: the hook environment for this instance
819

820
  """
821
  if status:
822
    str_status = "up"
823
  else:
824
    str_status = "down"
825
  env = {
826
    "OP_TARGET": name,
827
    "INSTANCE_NAME": name,
828
    "INSTANCE_PRIMARY": primary_node,
829
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
830
    "INSTANCE_OS_TYPE": os_type,
831
    "INSTANCE_STATUS": str_status,
832
    "INSTANCE_MEMORY": memory,
833
    "INSTANCE_VCPUS": vcpus,
834
    "INSTANCE_DISK_TEMPLATE": disk_template,
835
    "INSTANCE_HYPERVISOR": hypervisor_name,
836
  }
837

    
838
  if nics:
839
    nic_count = len(nics)
840
    for idx, (ip, mac, mode, link) in enumerate(nics):
841
      if ip is None:
842
        ip = ""
843
      env["INSTANCE_NIC%d_IP" % idx] = ip
844
      env["INSTANCE_NIC%d_MAC" % idx] = mac
845
      env["INSTANCE_NIC%d_MODE" % idx] = mode
846
      env["INSTANCE_NIC%d_LINK" % idx] = link
847
      if mode == constants.NIC_MODE_BRIDGED:
848
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
849
  else:
850
    nic_count = 0
851

    
852
  env["INSTANCE_NIC_COUNT"] = nic_count
853

    
854
  if disks:
855
    disk_count = len(disks)
856
    for idx, (size, mode) in enumerate(disks):
857
      env["INSTANCE_DISK%d_SIZE" % idx] = size
858
      env["INSTANCE_DISK%d_MODE" % idx] = mode
859
  else:
860
    disk_count = 0
861

    
862
  env["INSTANCE_DISK_COUNT"] = disk_count
863

    
864
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
865
    for key, value in source.items():
866
      env["INSTANCE_%s_%s" % (kind, key)] = value
867

    
868
  return env
869

    
870

    
871
def _NICListToTuple(lu, nics):
872
  """Build a list of nic information tuples.
873

874
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
875
  value in LUQueryInstanceData.
876

877
  @type lu:  L{LogicalUnit}
878
  @param lu: the logical unit on whose behalf we execute
879
  @type nics: list of L{objects.NIC}
880
  @param nics: list of nics to convert to hooks tuples
881

882
  """
883
  hooks_nics = []
884
  cluster = lu.cfg.GetClusterInfo()
885
  for nic in nics:
886
    ip = nic.ip
887
    mac = nic.mac
888
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
889
    mode = filled_params[constants.NIC_MODE]
890
    link = filled_params[constants.NIC_LINK]
891
    hooks_nics.append((ip, mac, mode, link))
892
  return hooks_nics
893

    
894

    
895
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
896
  """Builds instance related env variables for hooks from an object.
897

898
  @type lu: L{LogicalUnit}
899
  @param lu: the logical unit on whose behalf we execute
900
  @type instance: L{objects.Instance}
901
  @param instance: the instance for which we should build the
902
      environment
903
  @type override: dict
904
  @param override: dictionary with key/values that will override
905
      our values
906
  @rtype: dict
907
  @return: the hook environment dictionary
908

909
  """
910
  cluster = lu.cfg.GetClusterInfo()
911
  bep = cluster.FillBE(instance)
912
  hvp = cluster.FillHV(instance)
913
  args = {
914
    'name': instance.name,
915
    'primary_node': instance.primary_node,
916
    'secondary_nodes': instance.secondary_nodes,
917
    'os_type': instance.os,
918
    'status': instance.admin_up,
919
    'memory': bep[constants.BE_MEMORY],
920
    'vcpus': bep[constants.BE_VCPUS],
921
    'nics': _NICListToTuple(lu, instance.nics),
922
    'disk_template': instance.disk_template,
923
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
924
    'bep': bep,
925
    'hvp': hvp,
926
    'hypervisor_name': instance.hypervisor,
927
  }
928
  if override:
929
    args.update(override)
930
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
931

    
932

    
933
def _AdjustCandidatePool(lu, exceptions):
934
  """Adjust the candidate pool after node operations.
935

936
  """
937
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
938
  if mod_list:
939
    lu.LogInfo("Promoted nodes to master candidate role: %s",
940
               utils.CommaJoin(node.name for node in mod_list))
941
    for name in mod_list:
942
      lu.context.ReaddNode(name)
943
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
944
  if mc_now > mc_max:
945
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
946
               (mc_now, mc_max))
947

    
948

    
949
def _DecideSelfPromotion(lu, exceptions=None):
950
  """Decide whether I should promote myself as a master candidate.
951

952
  """
953
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
954
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
955
  # the new node will increase mc_max with one, so:
956
  mc_should = min(mc_should + 1, cp_size)
957
  return mc_now < mc_should
958

    
959

    
960
def _CheckNicsBridgesExist(lu, target_nics, target_node):
961
  """Check that the brigdes needed by a list of nics exist.
962

963
  """
964
  cluster = lu.cfg.GetClusterInfo()
965
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
966
  brlist = [params[constants.NIC_LINK] for params in paramslist
967
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
968
  if brlist:
969
    result = lu.rpc.call_bridges_exist(target_node, brlist)
970
    result.Raise("Error checking bridges on destination node '%s'" %
971
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
972

    
973

    
974
def _CheckInstanceBridgesExist(lu, instance, node=None):
975
  """Check that the brigdes needed by an instance exist.
976

977
  """
978
  if node is None:
979
    node = instance.primary_node
980
  _CheckNicsBridgesExist(lu, instance.nics, node)
981

    
982

    
983
def _CheckOSVariant(os_obj, name):
984
  """Check whether an OS name conforms to the os variants specification.
985

986
  @type os_obj: L{objects.OS}
987
  @param os_obj: OS object to check
988
  @type name: string
989
  @param name: OS name passed by the user, to check for validity
990

991
  """
992
  if not os_obj.supported_variants:
993
    return
994
  variant = objects.OS.GetVariant(name)
995
  if not variant:
996
    raise errors.OpPrereqError("OS name must include a variant",
997
                               errors.ECODE_INVAL)
998

    
999
  if variant not in os_obj.supported_variants:
1000
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1001

    
1002

    
1003
def _GetNodeInstancesInner(cfg, fn):
1004
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1005

    
1006

    
1007
def _GetNodeInstances(cfg, node_name):
1008
  """Returns a list of all primary and secondary instances on a node.
1009

1010
  """
1011

    
1012
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1013

    
1014

    
1015
def _GetNodePrimaryInstances(cfg, node_name):
1016
  """Returns primary instances on a node.
1017

1018
  """
1019
  return _GetNodeInstancesInner(cfg,
1020
                                lambda inst: node_name == inst.primary_node)
1021

    
1022

    
1023
def _GetNodeSecondaryInstances(cfg, node_name):
1024
  """Returns secondary instances on a node.
1025

1026
  """
1027
  return _GetNodeInstancesInner(cfg,
1028
                                lambda inst: node_name in inst.secondary_nodes)
1029

    
1030

    
1031
def _GetStorageTypeArgs(cfg, storage_type):
1032
  """Returns the arguments for a storage type.
1033

1034
  """
1035
  # Special case for file storage
1036
  if storage_type == constants.ST_FILE:
1037
    # storage.FileStorage wants a list of storage directories
1038
    return [[cfg.GetFileStorageDir()]]
1039

    
1040
  return []
1041

    
1042

    
1043
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1044
  faulty = []
1045

    
1046
  for dev in instance.disks:
1047
    cfg.SetDiskID(dev, node_name)
1048

    
1049
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1050
  result.Raise("Failed to get disk status from node %s" % node_name,
1051
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1052

    
1053
  for idx, bdev_status in enumerate(result.payload):
1054
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1055
      faulty.append(idx)
1056

    
1057
  return faulty
1058

    
1059

    
1060
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1061
  """Check the sanity of iallocator and node arguments and use the
1062
  cluster-wide iallocator if appropriate.
1063

1064
  Check that at most one of (iallocator, node) is specified. If none is
1065
  specified, then the LU's opcode's iallocator slot is filled with the
1066
  cluster-wide default iallocator.
1067

1068
  @type iallocator_slot: string
1069
  @param iallocator_slot: the name of the opcode iallocator slot
1070
  @type node_slot: string
1071
  @param node_slot: the name of the opcode target node slot
1072

1073
  """
1074
  node = getattr(lu.op, node_slot, None)
1075
  iallocator = getattr(lu.op, iallocator_slot, None)
1076

    
1077
  if node is not None and iallocator is not None:
1078
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1079
                               errors.ECODE_INVAL)
1080
  elif node is None and iallocator is None:
1081
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1082
    if default_iallocator:
1083
      setattr(lu.op, iallocator_slot, default_iallocator)
1084
    else:
1085
      raise errors.OpPrereqError("No iallocator or node given and no"
1086
                                 " cluster-wide default iallocator found."
1087
                                 " Please specify either an iallocator or a"
1088
                                 " node, or set a cluster-wide default"
1089
                                 " iallocator.")
1090

    
1091

    
1092
class LUPostInitCluster(LogicalUnit):
1093
  """Logical unit for running hooks after cluster initialization.
1094

1095
  """
1096
  HPATH = "cluster-init"
1097
  HTYPE = constants.HTYPE_CLUSTER
1098

    
1099
  def BuildHooksEnv(self):
1100
    """Build hooks env.
1101

1102
    """
1103
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1104
    mn = self.cfg.GetMasterNode()
1105
    return env, [], [mn]
1106

    
1107
  def Exec(self, feedback_fn):
1108
    """Nothing to do.
1109

1110
    """
1111
    return True
1112

    
1113

    
1114
class LUDestroyCluster(LogicalUnit):
1115
  """Logical unit for destroying the cluster.
1116

1117
  """
1118
  HPATH = "cluster-destroy"
1119
  HTYPE = constants.HTYPE_CLUSTER
1120

    
1121
  def BuildHooksEnv(self):
1122
    """Build hooks env.
1123

1124
    """
1125
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1126
    return env, [], []
1127

    
1128
  def CheckPrereq(self):
1129
    """Check prerequisites.
1130

1131
    This checks whether the cluster is empty.
1132

1133
    Any errors are signaled by raising errors.OpPrereqError.
1134

1135
    """
1136
    master = self.cfg.GetMasterNode()
1137

    
1138
    nodelist = self.cfg.GetNodeList()
1139
    if len(nodelist) != 1 or nodelist[0] != master:
1140
      raise errors.OpPrereqError("There are still %d node(s) in"
1141
                                 " this cluster." % (len(nodelist) - 1),
1142
                                 errors.ECODE_INVAL)
1143
    instancelist = self.cfg.GetInstanceList()
1144
    if instancelist:
1145
      raise errors.OpPrereqError("There are still %d instance(s) in"
1146
                                 " this cluster." % len(instancelist),
1147
                                 errors.ECODE_INVAL)
1148

    
1149
  def Exec(self, feedback_fn):
1150
    """Destroys the cluster.
1151

1152
    """
1153
    master = self.cfg.GetMasterNode()
1154

    
1155
    # Run post hooks on master node before it's removed
1156
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1157
    try:
1158
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1159
    except:
1160
      # pylint: disable-msg=W0702
1161
      self.LogWarning("Errors occurred running hooks on %s" % master)
1162

    
1163
    result = self.rpc.call_node_stop_master(master, False)
1164
    result.Raise("Could not disable the master role")
1165

    
1166
    return master
1167

    
1168

    
1169
def _VerifyCertificate(filename):
1170
  """Verifies a certificate for LUVerifyCluster.
1171

1172
  @type filename: string
1173
  @param filename: Path to PEM file
1174

1175
  """
1176
  try:
1177
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1178
                                           utils.ReadFile(filename))
1179
  except Exception, err: # pylint: disable-msg=W0703
1180
    return (LUVerifyCluster.ETYPE_ERROR,
1181
            "Failed to load X509 certificate %s: %s" % (filename, err))
1182

    
1183
  (errcode, msg) = \
1184
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1185
                                constants.SSL_CERT_EXPIRATION_ERROR)
1186

    
1187
  if msg:
1188
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1189
  else:
1190
    fnamemsg = None
1191

    
1192
  if errcode is None:
1193
    return (None, fnamemsg)
1194
  elif errcode == utils.CERT_WARNING:
1195
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1196
  elif errcode == utils.CERT_ERROR:
1197
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1198

    
1199
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1200

    
1201

    
1202
class LUVerifyCluster(LogicalUnit):
1203
  """Verifies the cluster status.
1204

1205
  """
1206
  HPATH = "cluster-verify"
1207
  HTYPE = constants.HTYPE_CLUSTER
1208
  REQ_BGL = False
1209

    
1210
  TCLUSTER = "cluster"
1211
  TNODE = "node"
1212
  TINSTANCE = "instance"
1213

    
1214
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1215
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1216
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1217
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1218
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1219
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1220
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1221
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1222
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1223
  ENODEDRBD = (TNODE, "ENODEDRBD")
1224
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1225
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1226
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1227
  ENODEHV = (TNODE, "ENODEHV")
1228
  ENODELVM = (TNODE, "ENODELVM")
1229
  ENODEN1 = (TNODE, "ENODEN1")
1230
  ENODENET = (TNODE, "ENODENET")
1231
  ENODEOS = (TNODE, "ENODEOS")
1232
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1233
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1234
  ENODERPC = (TNODE, "ENODERPC")
1235
  ENODESSH = (TNODE, "ENODESSH")
1236
  ENODEVERSION = (TNODE, "ENODEVERSION")
1237
  ENODESETUP = (TNODE, "ENODESETUP")
1238
  ENODETIME = (TNODE, "ENODETIME")
1239
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1240

    
1241
  ETYPE_FIELD = "code"
1242
  ETYPE_ERROR = "ERROR"
1243
  ETYPE_WARNING = "WARNING"
1244

    
1245
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1246

    
1247
  class NodeImage(object):
1248
    """A class representing the logical and physical status of a node.
1249

1250
    @type name: string
1251
    @ivar name: the node name to which this object refers
1252
    @ivar volumes: a structure as returned from
1253
        L{ganeti.backend.GetVolumeList} (runtime)
1254
    @ivar instances: a list of running instances (runtime)
1255
    @ivar pinst: list of configured primary instances (config)
1256
    @ivar sinst: list of configured secondary instances (config)
1257
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1258
        of this node (config)
1259
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1260
    @ivar dfree: free disk, as reported by the node (runtime)
1261
    @ivar offline: the offline status (config)
1262
    @type rpc_fail: boolean
1263
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1264
        not whether the individual keys were correct) (runtime)
1265
    @type lvm_fail: boolean
1266
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1267
    @type hyp_fail: boolean
1268
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1269
    @type ghost: boolean
1270
    @ivar ghost: whether this is a known node or not (config)
1271
    @type os_fail: boolean
1272
    @ivar os_fail: whether the RPC call didn't return valid OS data
1273
    @type oslist: list
1274
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1275
    @type vm_capable: boolean
1276
    @ivar vm_capable: whether the node can host instances
1277

1278
    """
1279
    def __init__(self, offline=False, name=None, vm_capable=True):
1280
      self.name = name
1281
      self.volumes = {}
1282
      self.instances = []
1283
      self.pinst = []
1284
      self.sinst = []
1285
      self.sbp = {}
1286
      self.mfree = 0
1287
      self.dfree = 0
1288
      self.offline = offline
1289
      self.vm_capable = vm_capable
1290
      self.rpc_fail = False
1291
      self.lvm_fail = False
1292
      self.hyp_fail = False
1293
      self.ghost = False
1294
      self.os_fail = False
1295
      self.oslist = {}
1296

    
1297
  def ExpandNames(self):
1298
    self.needed_locks = {
1299
      locking.LEVEL_NODE: locking.ALL_SET,
1300
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1301
    }
1302
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1303

    
1304
  def _Error(self, ecode, item, msg, *args, **kwargs):
1305
    """Format an error message.
1306

1307
    Based on the opcode's error_codes parameter, either format a
1308
    parseable error code, or a simpler error string.
1309

1310
    This must be called only from Exec and functions called from Exec.
1311

1312
    """
1313
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1314
    itype, etxt = ecode
1315
    # first complete the msg
1316
    if args:
1317
      msg = msg % args
1318
    # then format the whole message
1319
    if self.op.error_codes:
1320
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1321
    else:
1322
      if item:
1323
        item = " " + item
1324
      else:
1325
        item = ""
1326
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1327
    # and finally report it via the feedback_fn
1328
    self._feedback_fn("  - %s" % msg)
1329

    
1330
  def _ErrorIf(self, cond, *args, **kwargs):
1331
    """Log an error message if the passed condition is True.
1332

1333
    """
1334
    cond = bool(cond) or self.op.debug_simulate_errors
1335
    if cond:
1336
      self._Error(*args, **kwargs)
1337
    # do not mark the operation as failed for WARN cases only
1338
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1339
      self.bad = self.bad or cond
1340

    
1341
  def _VerifyNode(self, ninfo, nresult):
1342
    """Perform some basic validation on data returned from a node.
1343

1344
      - check the result data structure is well formed and has all the
1345
        mandatory fields
1346
      - check ganeti version
1347

1348
    @type ninfo: L{objects.Node}
1349
    @param ninfo: the node to check
1350
    @param nresult: the results from the node
1351
    @rtype: boolean
1352
    @return: whether overall this call was successful (and we can expect
1353
         reasonable values in the respose)
1354

1355
    """
1356
    node = ninfo.name
1357
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1358

    
1359
    # main result, nresult should be a non-empty dict
1360
    test = not nresult or not isinstance(nresult, dict)
1361
    _ErrorIf(test, self.ENODERPC, node,
1362
                  "unable to verify node: no data returned")
1363
    if test:
1364
      return False
1365

    
1366
    # compares ganeti version
1367
    local_version = constants.PROTOCOL_VERSION
1368
    remote_version = nresult.get("version", None)
1369
    test = not (remote_version and
1370
                isinstance(remote_version, (list, tuple)) and
1371
                len(remote_version) == 2)
1372
    _ErrorIf(test, self.ENODERPC, node,
1373
             "connection to node returned invalid data")
1374
    if test:
1375
      return False
1376

    
1377
    test = local_version != remote_version[0]
1378
    _ErrorIf(test, self.ENODEVERSION, node,
1379
             "incompatible protocol versions: master %s,"
1380
             " node %s", local_version, remote_version[0])
1381
    if test:
1382
      return False
1383

    
1384
    # node seems compatible, we can actually try to look into its results
1385

    
1386
    # full package version
1387
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1388
                  self.ENODEVERSION, node,
1389
                  "software version mismatch: master %s, node %s",
1390
                  constants.RELEASE_VERSION, remote_version[1],
1391
                  code=self.ETYPE_WARNING)
1392

    
1393
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1394
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1395
      for hv_name, hv_result in hyp_result.iteritems():
1396
        test = hv_result is not None
1397
        _ErrorIf(test, self.ENODEHV, node,
1398
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1399

    
1400
    test = nresult.get(constants.NV_NODESETUP,
1401
                           ["Missing NODESETUP results"])
1402
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1403
             "; ".join(test))
1404

    
1405
    return True
1406

    
1407
  def _VerifyNodeTime(self, ninfo, nresult,
1408
                      nvinfo_starttime, nvinfo_endtime):
1409
    """Check the node time.
1410

1411
    @type ninfo: L{objects.Node}
1412
    @param ninfo: the node to check
1413
    @param nresult: the remote results for the node
1414
    @param nvinfo_starttime: the start time of the RPC call
1415
    @param nvinfo_endtime: the end time of the RPC call
1416

1417
    """
1418
    node = ninfo.name
1419
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1420

    
1421
    ntime = nresult.get(constants.NV_TIME, None)
1422
    try:
1423
      ntime_merged = utils.MergeTime(ntime)
1424
    except (ValueError, TypeError):
1425
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1426
      return
1427

    
1428
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1429
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1430
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1431
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1432
    else:
1433
      ntime_diff = None
1434

    
1435
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1436
             "Node time diverges by at least %s from master node time",
1437
             ntime_diff)
1438

    
1439
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1440
    """Check the node time.
1441

1442
    @type ninfo: L{objects.Node}
1443
    @param ninfo: the node to check
1444
    @param nresult: the remote results for the node
1445
    @param vg_name: the configured VG name
1446

1447
    """
1448
    if vg_name is None:
1449
      return
1450

    
1451
    node = ninfo.name
1452
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1453

    
1454
    # checks vg existence and size > 20G
1455
    vglist = nresult.get(constants.NV_VGLIST, None)
1456
    test = not vglist
1457
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1458
    if not test:
1459
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1460
                                            constants.MIN_VG_SIZE)
1461
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1462

    
1463
    # check pv names
1464
    pvlist = nresult.get(constants.NV_PVLIST, None)
1465
    test = pvlist is None
1466
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1467
    if not test:
1468
      # check that ':' is not present in PV names, since it's a
1469
      # special character for lvcreate (denotes the range of PEs to
1470
      # use on the PV)
1471
      for _, pvname, owner_vg in pvlist:
1472
        test = ":" in pvname
1473
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1474
                 " '%s' of VG '%s'", pvname, owner_vg)
1475

    
1476
  def _VerifyNodeNetwork(self, ninfo, nresult):
1477
    """Check the node time.
1478

1479
    @type ninfo: L{objects.Node}
1480
    @param ninfo: the node to check
1481
    @param nresult: the remote results for the node
1482

1483
    """
1484
    node = ninfo.name
1485
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1486

    
1487
    test = constants.NV_NODELIST not in nresult
1488
    _ErrorIf(test, self.ENODESSH, node,
1489
             "node hasn't returned node ssh connectivity data")
1490
    if not test:
1491
      if nresult[constants.NV_NODELIST]:
1492
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1493
          _ErrorIf(True, self.ENODESSH, node,
1494
                   "ssh communication with node '%s': %s", a_node, a_msg)
1495

    
1496
    test = constants.NV_NODENETTEST not in nresult
1497
    _ErrorIf(test, self.ENODENET, node,
1498
             "node hasn't returned node tcp connectivity data")
1499
    if not test:
1500
      if nresult[constants.NV_NODENETTEST]:
1501
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1502
        for anode in nlist:
1503
          _ErrorIf(True, self.ENODENET, node,
1504
                   "tcp communication with node '%s': %s",
1505
                   anode, nresult[constants.NV_NODENETTEST][anode])
1506

    
1507
    test = constants.NV_MASTERIP not in nresult
1508
    _ErrorIf(test, self.ENODENET, node,
1509
             "node hasn't returned node master IP reachability data")
1510
    if not test:
1511
      if not nresult[constants.NV_MASTERIP]:
1512
        if node == self.master_node:
1513
          msg = "the master node cannot reach the master IP (not configured?)"
1514
        else:
1515
          msg = "cannot reach the master IP"
1516
        _ErrorIf(True, self.ENODENET, node, msg)
1517

    
1518
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1519
                      diskstatus):
1520
    """Verify an instance.
1521

1522
    This function checks to see if the required block devices are
1523
    available on the instance's node.
1524

1525
    """
1526
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1527
    node_current = instanceconfig.primary_node
1528

    
1529
    node_vol_should = {}
1530
    instanceconfig.MapLVsByNode(node_vol_should)
1531

    
1532
    for node in node_vol_should:
1533
      n_img = node_image[node]
1534
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1535
        # ignore missing volumes on offline or broken nodes
1536
        continue
1537
      for volume in node_vol_should[node]:
1538
        test = volume not in n_img.volumes
1539
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1540
                 "volume %s missing on node %s", volume, node)
1541

    
1542
    if instanceconfig.admin_up:
1543
      pri_img = node_image[node_current]
1544
      test = instance not in pri_img.instances and not pri_img.offline
1545
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1546
               "instance not running on its primary node %s",
1547
               node_current)
1548

    
1549
    for node, n_img in node_image.items():
1550
      if (not node == node_current):
1551
        test = instance in n_img.instances
1552
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1553
                 "instance should not run on node %s", node)
1554

    
1555
    diskdata = [(nname, success, status, idx)
1556
                for (nname, disks) in diskstatus.items()
1557
                for idx, (success, status) in enumerate(disks)]
1558

    
1559
    for nname, success, bdev_status, idx in diskdata:
1560
      _ErrorIf(instanceconfig.admin_up and not success,
1561
               self.EINSTANCEFAULTYDISK, instance,
1562
               "couldn't retrieve status for disk/%s on %s: %s",
1563
               idx, nname, bdev_status)
1564
      _ErrorIf((instanceconfig.admin_up and success and
1565
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1566
               self.EINSTANCEFAULTYDISK, instance,
1567
               "disk/%s on %s is faulty", idx, nname)
1568

    
1569
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1570
    """Verify if there are any unknown volumes in the cluster.
1571

1572
    The .os, .swap and backup volumes are ignored. All other volumes are
1573
    reported as unknown.
1574

1575
    @type reserved: L{ganeti.utils.FieldSet}
1576
    @param reserved: a FieldSet of reserved volume names
1577

1578
    """
1579
    for node, n_img in node_image.items():
1580
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1581
        # skip non-healthy nodes
1582
        continue
1583
      for volume in n_img.volumes:
1584
        test = ((node not in node_vol_should or
1585
                volume not in node_vol_should[node]) and
1586
                not reserved.Matches(volume))
1587
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1588
                      "volume %s is unknown", volume)
1589

    
1590
  def _VerifyOrphanInstances(self, instancelist, node_image):
1591
    """Verify the list of running instances.
1592

1593
    This checks what instances are running but unknown to the cluster.
1594

1595
    """
1596
    for node, n_img in node_image.items():
1597
      for o_inst in n_img.instances:
1598
        test = o_inst not in instancelist
1599
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1600
                      "instance %s on node %s should not exist", o_inst, node)
1601

    
1602
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1603
    """Verify N+1 Memory Resilience.
1604

1605
    Check that if one single node dies we can still start all the
1606
    instances it was primary for.
1607

1608
    """
1609
    for node, n_img in node_image.items():
1610
      # This code checks that every node which is now listed as
1611
      # secondary has enough memory to host all instances it is
1612
      # supposed to should a single other node in the cluster fail.
1613
      # FIXME: not ready for failover to an arbitrary node
1614
      # FIXME: does not support file-backed instances
1615
      # WARNING: we currently take into account down instances as well
1616
      # as up ones, considering that even if they're down someone
1617
      # might want to start them even in the event of a node failure.
1618
      for prinode, instances in n_img.sbp.items():
1619
        needed_mem = 0
1620
        for instance in instances:
1621
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1622
          if bep[constants.BE_AUTO_BALANCE]:
1623
            needed_mem += bep[constants.BE_MEMORY]
1624
        test = n_img.mfree < needed_mem
1625
        self._ErrorIf(test, self.ENODEN1, node,
1626
                      "not enough memory to accomodate instance failovers"
1627
                      " should node %s fail", prinode)
1628

    
1629
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1630
                       master_files):
1631
    """Verifies and computes the node required file checksums.
1632

1633
    @type ninfo: L{objects.Node}
1634
    @param ninfo: the node to check
1635
    @param nresult: the remote results for the node
1636
    @param file_list: required list of files
1637
    @param local_cksum: dictionary of local files and their checksums
1638
    @param master_files: list of files that only masters should have
1639

1640
    """
1641
    node = ninfo.name
1642
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1643

    
1644
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1645
    test = not isinstance(remote_cksum, dict)
1646
    _ErrorIf(test, self.ENODEFILECHECK, node,
1647
             "node hasn't returned file checksum data")
1648
    if test:
1649
      return
1650

    
1651
    for file_name in file_list:
1652
      node_is_mc = ninfo.master_candidate
1653
      must_have = (file_name not in master_files) or node_is_mc
1654
      # missing
1655
      test1 = file_name not in remote_cksum
1656
      # invalid checksum
1657
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1658
      # existing and good
1659
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1660
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1661
               "file '%s' missing", file_name)
1662
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1663
               "file '%s' has wrong checksum", file_name)
1664
      # not candidate and this is not a must-have file
1665
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1666
               "file '%s' should not exist on non master"
1667
               " candidates (and the file is outdated)", file_name)
1668
      # all good, except non-master/non-must have combination
1669
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1670
               "file '%s' should not exist"
1671
               " on non master candidates", file_name)
1672

    
1673
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1674
                      drbd_map):
1675
    """Verifies and the node DRBD status.
1676

1677
    @type ninfo: L{objects.Node}
1678
    @param ninfo: the node to check
1679
    @param nresult: the remote results for the node
1680
    @param instanceinfo: the dict of instances
1681
    @param drbd_helper: the configured DRBD usermode helper
1682
    @param drbd_map: the DRBD map as returned by
1683
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1684

1685
    """
1686
    node = ninfo.name
1687
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1688

    
1689
    if drbd_helper:
1690
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1691
      test = (helper_result == None)
1692
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1693
               "no drbd usermode helper returned")
1694
      if helper_result:
1695
        status, payload = helper_result
1696
        test = not status
1697
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1698
                 "drbd usermode helper check unsuccessful: %s", payload)
1699
        test = status and (payload != drbd_helper)
1700
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1701
                 "wrong drbd usermode helper: %s", payload)
1702

    
1703
    # compute the DRBD minors
1704
    node_drbd = {}
1705
    for minor, instance in drbd_map[node].items():
1706
      test = instance not in instanceinfo
1707
      _ErrorIf(test, self.ECLUSTERCFG, None,
1708
               "ghost instance '%s' in temporary DRBD map", instance)
1709
        # ghost instance should not be running, but otherwise we
1710
        # don't give double warnings (both ghost instance and
1711
        # unallocated minor in use)
1712
      if test:
1713
        node_drbd[minor] = (instance, False)
1714
      else:
1715
        instance = instanceinfo[instance]
1716
        node_drbd[minor] = (instance.name, instance.admin_up)
1717

    
1718
    # and now check them
1719
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1720
    test = not isinstance(used_minors, (tuple, list))
1721
    _ErrorIf(test, self.ENODEDRBD, node,
1722
             "cannot parse drbd status file: %s", str(used_minors))
1723
    if test:
1724
      # we cannot check drbd status
1725
      return
1726

    
1727
    for minor, (iname, must_exist) in node_drbd.items():
1728
      test = minor not in used_minors and must_exist
1729
      _ErrorIf(test, self.ENODEDRBD, node,
1730
               "drbd minor %d of instance %s is not active", minor, iname)
1731
    for minor in used_minors:
1732
      test = minor not in node_drbd
1733
      _ErrorIf(test, self.ENODEDRBD, node,
1734
               "unallocated drbd minor %d is in use", minor)
1735

    
1736
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1737
    """Builds the node OS structures.
1738

1739
    @type ninfo: L{objects.Node}
1740
    @param ninfo: the node to check
1741
    @param nresult: the remote results for the node
1742
    @param nimg: the node image object
1743

1744
    """
1745
    node = ninfo.name
1746
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1747

    
1748
    remote_os = nresult.get(constants.NV_OSLIST, None)
1749
    test = (not isinstance(remote_os, list) or
1750
            not compat.all(isinstance(v, list) and len(v) == 7
1751
                           for v in remote_os))
1752

    
1753
    _ErrorIf(test, self.ENODEOS, node,
1754
             "node hasn't returned valid OS data")
1755

    
1756
    nimg.os_fail = test
1757

    
1758
    if test:
1759
      return
1760

    
1761
    os_dict = {}
1762

    
1763
    for (name, os_path, status, diagnose,
1764
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1765

    
1766
      if name not in os_dict:
1767
        os_dict[name] = []
1768

    
1769
      # parameters is a list of lists instead of list of tuples due to
1770
      # JSON lacking a real tuple type, fix it:
1771
      parameters = [tuple(v) for v in parameters]
1772
      os_dict[name].append((os_path, status, diagnose,
1773
                            set(variants), set(parameters), set(api_ver)))
1774

    
1775
    nimg.oslist = os_dict
1776

    
1777
  def _VerifyNodeOS(self, ninfo, nimg, base):
1778
    """Verifies the node OS list.
1779

1780
    @type ninfo: L{objects.Node}
1781
    @param ninfo: the node to check
1782
    @param nimg: the node image object
1783
    @param base: the 'template' node we match against (e.g. from the master)
1784

1785
    """
1786
    node = ninfo.name
1787
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1788

    
1789
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1790

    
1791
    for os_name, os_data in nimg.oslist.items():
1792
      assert os_data, "Empty OS status for OS %s?!" % os_name
1793
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1794
      _ErrorIf(not f_status, self.ENODEOS, node,
1795
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1796
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1797
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1798
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1799
      # this will catched in backend too
1800
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1801
               and not f_var, self.ENODEOS, node,
1802
               "OS %s with API at least %d does not declare any variant",
1803
               os_name, constants.OS_API_V15)
1804
      # comparisons with the 'base' image
1805
      test = os_name not in base.oslist
1806
      _ErrorIf(test, self.ENODEOS, node,
1807
               "Extra OS %s not present on reference node (%s)",
1808
               os_name, base.name)
1809
      if test:
1810
        continue
1811
      assert base.oslist[os_name], "Base node has empty OS status?"
1812
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1813
      if not b_status:
1814
        # base OS is invalid, skipping
1815
        continue
1816
      for kind, a, b in [("API version", f_api, b_api),
1817
                         ("variants list", f_var, b_var),
1818
                         ("parameters", f_param, b_param)]:
1819
        _ErrorIf(a != b, self.ENODEOS, node,
1820
                 "OS %s %s differs from reference node %s: %s vs. %s",
1821
                 kind, os_name, base.name,
1822
                 utils.CommaJoin(a), utils.CommaJoin(b))
1823

    
1824
    # check any missing OSes
1825
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1826
    _ErrorIf(missing, self.ENODEOS, node,
1827
             "OSes present on reference node %s but missing on this node: %s",
1828
             base.name, utils.CommaJoin(missing))
1829

    
1830
  def _VerifyOob(self, ninfo, nresult):
1831
    """Verifies out of band functionality of a node.
1832

1833
    @type ninfo: L{objects.Node}
1834
    @param ninfo: the node to check
1835
    @param nresult: the remote results for the node
1836

1837
    """
1838
    node = ninfo.name
1839
    # We just have to verify the paths on master and/or master candidates
1840
    # as the oob helper is invoked on the master
1841
    if ((ninfo.master_candidate or ninfo.master_capable) and
1842
        constants.NV_OOB_PATHS in nresult):
1843
      for path_result in nresult[constants.NV_OOB_PATHS]:
1844
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1845

    
1846
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1847
    """Verifies and updates the node volume data.
1848

1849
    This function will update a L{NodeImage}'s internal structures
1850
    with data from the remote call.
1851

1852
    @type ninfo: L{objects.Node}
1853
    @param ninfo: the node to check
1854
    @param nresult: the remote results for the node
1855
    @param nimg: the node image object
1856
    @param vg_name: the configured VG name
1857

1858
    """
1859
    node = ninfo.name
1860
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1861

    
1862
    nimg.lvm_fail = True
1863
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1864
    if vg_name is None:
1865
      pass
1866
    elif isinstance(lvdata, basestring):
1867
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1868
               utils.SafeEncode(lvdata))
1869
    elif not isinstance(lvdata, dict):
1870
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1871
    else:
1872
      nimg.volumes = lvdata
1873
      nimg.lvm_fail = False
1874

    
1875
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1876
    """Verifies and updates the node instance list.
1877

1878
    If the listing was successful, then updates this node's instance
1879
    list. Otherwise, it marks the RPC call as failed for the instance
1880
    list key.
1881

1882
    @type ninfo: L{objects.Node}
1883
    @param ninfo: the node to check
1884
    @param nresult: the remote results for the node
1885
    @param nimg: the node image object
1886

1887
    """
1888
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1889
    test = not isinstance(idata, list)
1890
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1891
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1892
    if test:
1893
      nimg.hyp_fail = True
1894
    else:
1895
      nimg.instances = idata
1896

    
1897
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1898
    """Verifies and computes a node information map
1899

1900
    @type ninfo: L{objects.Node}
1901
    @param ninfo: the node to check
1902
    @param nresult: the remote results for the node
1903
    @param nimg: the node image object
1904
    @param vg_name: the configured VG name
1905

1906
    """
1907
    node = ninfo.name
1908
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1909

    
1910
    # try to read free memory (from the hypervisor)
1911
    hv_info = nresult.get(constants.NV_HVINFO, None)
1912
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1913
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1914
    if not test:
1915
      try:
1916
        nimg.mfree = int(hv_info["memory_free"])
1917
      except (ValueError, TypeError):
1918
        _ErrorIf(True, self.ENODERPC, node,
1919
                 "node returned invalid nodeinfo, check hypervisor")
1920

    
1921
    # FIXME: devise a free space model for file based instances as well
1922
    if vg_name is not None:
1923
      test = (constants.NV_VGLIST not in nresult or
1924
              vg_name not in nresult[constants.NV_VGLIST])
1925
      _ErrorIf(test, self.ENODELVM, node,
1926
               "node didn't return data for the volume group '%s'"
1927
               " - it is either missing or broken", vg_name)
1928
      if not test:
1929
        try:
1930
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1931
        except (ValueError, TypeError):
1932
          _ErrorIf(True, self.ENODERPC, node,
1933
                   "node returned invalid LVM info, check LVM status")
1934

    
1935
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1936
    """Gets per-disk status information for all instances.
1937

1938
    @type nodelist: list of strings
1939
    @param nodelist: Node names
1940
    @type node_image: dict of (name, L{objects.Node})
1941
    @param node_image: Node objects
1942
    @type instanceinfo: dict of (name, L{objects.Instance})
1943
    @param instanceinfo: Instance objects
1944
    @rtype: {instance: {node: [(succes, payload)]}}
1945
    @return: a dictionary of per-instance dictionaries with nodes as
1946
        keys and disk information as values; the disk information is a
1947
        list of tuples (success, payload)
1948

1949
    """
1950
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1951

    
1952
    node_disks = {}
1953
    node_disks_devonly = {}
1954
    diskless_instances = set()
1955
    diskless = constants.DT_DISKLESS
1956

    
1957
    for nname in nodelist:
1958
      node_instances = list(itertools.chain(node_image[nname].pinst,
1959
                                            node_image[nname].sinst))
1960
      diskless_instances.update(inst for inst in node_instances
1961
                                if instanceinfo[inst].disk_template == diskless)
1962
      disks = [(inst, disk)
1963
               for inst in node_instances
1964
               for disk in instanceinfo[inst].disks]
1965

    
1966
      if not disks:
1967
        # No need to collect data
1968
        continue
1969

    
1970
      node_disks[nname] = disks
1971

    
1972
      # Creating copies as SetDiskID below will modify the objects and that can
1973
      # lead to incorrect data returned from nodes
1974
      devonly = [dev.Copy() for (_, dev) in disks]
1975

    
1976
      for dev in devonly:
1977
        self.cfg.SetDiskID(dev, nname)
1978

    
1979
      node_disks_devonly[nname] = devonly
1980

    
1981
    assert len(node_disks) == len(node_disks_devonly)
1982

    
1983
    # Collect data from all nodes with disks
1984
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
1985
                                                          node_disks_devonly)
1986

    
1987
    assert len(result) == len(node_disks)
1988

    
1989
    instdisk = {}
1990

    
1991
    for (nname, nres) in result.items():
1992
      disks = node_disks[nname]
1993

    
1994
      if nres.offline:
1995
        # No data from this node
1996
        data = len(disks) * [(False, "node offline")]
1997
      else:
1998
        msg = nres.fail_msg
1999
        _ErrorIf(msg, self.ENODERPC, nname,
2000
                 "while getting disk information: %s", msg)
2001
        if msg:
2002
          # No data from this node
2003
          data = len(disks) * [(False, msg)]
2004
        else:
2005
          data = []
2006
          for idx, i in enumerate(nres.payload):
2007
            if isinstance(i, (tuple, list)) and len(i) == 2:
2008
              data.append(i)
2009
            else:
2010
              logging.warning("Invalid result from node %s, entry %d: %s",
2011
                              nname, idx, i)
2012
              data.append((False, "Invalid result from the remote node"))
2013

    
2014
      for ((inst, _), status) in zip(disks, data):
2015
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2016

    
2017
    # Add empty entries for diskless instances.
2018
    for inst in diskless_instances:
2019
      assert inst not in instdisk
2020
      instdisk[inst] = {}
2021

    
2022
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2023
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2024
                      compat.all(isinstance(s, (tuple, list)) and
2025
                                 len(s) == 2 for s in statuses)
2026
                      for inst, nnames in instdisk.items()
2027
                      for nname, statuses in nnames.items())
2028
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2029

    
2030
    return instdisk
2031

    
2032
  def BuildHooksEnv(self):
2033
    """Build hooks env.
2034

2035
    Cluster-Verify hooks just ran in the post phase and their failure makes
2036
    the output be logged in the verify output and the verification to fail.
2037

2038
    """
2039
    all_nodes = self.cfg.GetNodeList()
2040
    env = {
2041
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2042
      }
2043
    for node in self.cfg.GetAllNodesInfo().values():
2044
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2045

    
2046
    return env, [], all_nodes
2047

    
2048
  def Exec(self, feedback_fn):
2049
    """Verify integrity of cluster, performing various test on nodes.
2050

2051
    """
2052
    # This method has too many local variables. pylint: disable-msg=R0914
2053
    self.bad = False
2054
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2055
    verbose = self.op.verbose
2056
    self._feedback_fn = feedback_fn
2057
    feedback_fn("* Verifying global settings")
2058
    for msg in self.cfg.VerifyConfig():
2059
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2060

    
2061
    # Check the cluster certificates
2062
    for cert_filename in constants.ALL_CERT_FILES:
2063
      (errcode, msg) = _VerifyCertificate(cert_filename)
2064
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2065

    
2066
    vg_name = self.cfg.GetVGName()
2067
    drbd_helper = self.cfg.GetDRBDHelper()
2068
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2069
    cluster = self.cfg.GetClusterInfo()
2070
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2071
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2072
    nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2073
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2074
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2075
                        for iname in instancelist)
2076
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2077
    i_non_redundant = [] # Non redundant instances
2078
    i_non_a_balanced = [] # Non auto-balanced instances
2079
    n_offline = 0 # Count of offline nodes
2080
    n_drained = 0 # Count of nodes being drained
2081
    node_vol_should = {}
2082

    
2083
    # FIXME: verify OS list
2084
    # do local checksums
2085
    master_files = [constants.CLUSTER_CONF_FILE]
2086
    master_node = self.master_node = self.cfg.GetMasterNode()
2087
    master_ip = self.cfg.GetMasterIP()
2088

    
2089
    file_names = ssconf.SimpleStore().GetFileList()
2090
    file_names.extend(constants.ALL_CERT_FILES)
2091
    file_names.extend(master_files)
2092
    if cluster.modify_etc_hosts:
2093
      file_names.append(constants.ETC_HOSTS)
2094

    
2095
    local_checksums = utils.FingerprintFiles(file_names)
2096

    
2097
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2098
    node_verify_param = {
2099
      constants.NV_FILELIST: file_names,
2100
      constants.NV_NODELIST: [node.name for node in nodeinfo
2101
                              if not node.offline],
2102
      constants.NV_HYPERVISOR: hypervisors,
2103
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2104
                                  node.secondary_ip) for node in nodeinfo
2105
                                 if not node.offline],
2106
      constants.NV_INSTANCELIST: hypervisors,
2107
      constants.NV_VERSION: None,
2108
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2109
      constants.NV_NODESETUP: None,
2110
      constants.NV_TIME: None,
2111
      constants.NV_MASTERIP: (master_node, master_ip),
2112
      constants.NV_OSLIST: None,
2113
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2114
      }
2115

    
2116
    if vg_name is not None:
2117
      node_verify_param[constants.NV_VGLIST] = None
2118
      node_verify_param[constants.NV_LVLIST] = vg_name
2119
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2120
      node_verify_param[constants.NV_DRBDLIST] = None
2121

    
2122
    if drbd_helper:
2123
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2124

    
2125
    # Build our expected cluster state
2126
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2127
                                                 name=node.name,
2128
                                                 vm_capable=node.vm_capable))
2129
                      for node in nodeinfo)
2130

    
2131
    # Gather OOB paths
2132
    oob_paths = []
2133
    for node in nodeinfo:
2134
      path = _SupportsOob(self.cfg, node)
2135
      if path and path not in oob_paths:
2136
        oob_paths.append(path)
2137

    
2138
    if oob_paths:
2139
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2140

    
2141
    for instance in instancelist:
2142
      inst_config = instanceinfo[instance]
2143

    
2144
      for nname in inst_config.all_nodes:
2145
        if nname not in node_image:
2146
          # ghost node
2147
          gnode = self.NodeImage(name=nname)
2148
          gnode.ghost = True
2149
          node_image[nname] = gnode
2150

    
2151
      inst_config.MapLVsByNode(node_vol_should)
2152

    
2153
      pnode = inst_config.primary_node
2154
      node_image[pnode].pinst.append(instance)
2155

    
2156
      for snode in inst_config.secondary_nodes:
2157
        nimg = node_image[snode]
2158
        nimg.sinst.append(instance)
2159
        if pnode not in nimg.sbp:
2160
          nimg.sbp[pnode] = []
2161
        nimg.sbp[pnode].append(instance)
2162

    
2163
    # At this point, we have the in-memory data structures complete,
2164
    # except for the runtime information, which we'll gather next
2165

    
2166
    # Due to the way our RPC system works, exact response times cannot be
2167
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2168
    # time before and after executing the request, we can at least have a time
2169
    # window.
2170
    nvinfo_starttime = time.time()
2171
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2172
                                           self.cfg.GetClusterName())
2173
    nvinfo_endtime = time.time()
2174

    
2175
    all_drbd_map = self.cfg.ComputeDRBDMap()
2176

    
2177
    feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2178
    instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2179

    
2180
    feedback_fn("* Verifying node status")
2181

    
2182
    refos_img = None
2183

    
2184
    for node_i in nodeinfo:
2185
      node = node_i.name
2186
      nimg = node_image[node]
2187

    
2188
      if node_i.offline:
2189
        if verbose:
2190
          feedback_fn("* Skipping offline node %s" % (node,))
2191
        n_offline += 1
2192
        continue
2193

    
2194
      if node == master_node:
2195
        ntype = "master"
2196
      elif node_i.master_candidate:
2197
        ntype = "master candidate"
2198
      elif node_i.drained:
2199
        ntype = "drained"
2200
        n_drained += 1
2201
      else:
2202
        ntype = "regular"
2203
      if verbose:
2204
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2205

    
2206
      msg = all_nvinfo[node].fail_msg
2207
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2208
      if msg:
2209
        nimg.rpc_fail = True
2210
        continue
2211

    
2212
      nresult = all_nvinfo[node].payload
2213

    
2214
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2215
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2216
      self._VerifyNodeNetwork(node_i, nresult)
2217
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2218
                            master_files)
2219

    
2220
      self._VerifyOob(node_i, nresult)
2221

    
2222
      if nimg.vm_capable:
2223
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2224
        self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2225
                             all_drbd_map)
2226

    
2227
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2228
        self._UpdateNodeInstances(node_i, nresult, nimg)
2229
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2230
        self._UpdateNodeOS(node_i, nresult, nimg)
2231
        if not nimg.os_fail:
2232
          if refos_img is None:
2233
            refos_img = nimg
2234
          self._VerifyNodeOS(node_i, nimg, refos_img)
2235

    
2236
    feedback_fn("* Verifying instance status")
2237
    for instance in instancelist:
2238
      if verbose:
2239
        feedback_fn("* Verifying instance %s" % instance)
2240
      inst_config = instanceinfo[instance]
2241
      self._VerifyInstance(instance, inst_config, node_image,
2242
                           instdisk[instance])
2243
      inst_nodes_offline = []
2244

    
2245
      pnode = inst_config.primary_node
2246
      pnode_img = node_image[pnode]
2247
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2248
               self.ENODERPC, pnode, "instance %s, connection to"
2249
               " primary node failed", instance)
2250

    
2251
      if pnode_img.offline:
2252
        inst_nodes_offline.append(pnode)
2253

    
2254
      # If the instance is non-redundant we cannot survive losing its primary
2255
      # node, so we are not N+1 compliant. On the other hand we have no disk
2256
      # templates with more than one secondary so that situation is not well
2257
      # supported either.
2258
      # FIXME: does not support file-backed instances
2259
      if not inst_config.secondary_nodes:
2260
        i_non_redundant.append(instance)
2261

    
2262
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2263
               instance, "instance has multiple secondary nodes: %s",
2264
               utils.CommaJoin(inst_config.secondary_nodes),
2265
               code=self.ETYPE_WARNING)
2266

    
2267
      if inst_config.disk_template in constants.DTS_NET_MIRROR:
2268
        pnode = inst_config.primary_node
2269
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2270
        instance_groups = {}
2271

    
2272
        for node in instance_nodes:
2273
          instance_groups.setdefault(nodeinfo_byname[node].group,
2274
                                     []).append(node)
2275

    
2276
        pretty_list = [
2277
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2278
          # Sort so that we always list the primary node first.
2279
          for group, nodes in sorted(instance_groups.items(),
2280
                                     key=lambda (_, nodes): pnode in nodes,
2281
                                     reverse=True)]
2282

    
2283
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2284
                      instance, "instance has primary and secondary nodes in"
2285
                      " different groups: %s", utils.CommaJoin(pretty_list),
2286
                      code=self.ETYPE_WARNING)
2287

    
2288
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2289
        i_non_a_balanced.append(instance)
2290

    
2291
      for snode in inst_config.secondary_nodes:
2292
        s_img = node_image[snode]
2293
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2294
                 "instance %s, connection to secondary node failed", instance)
2295

    
2296
        if s_img.offline:
2297
          inst_nodes_offline.append(snode)
2298

    
2299
      # warn that the instance lives on offline nodes
2300
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2301
               "instance lives on offline node(s) %s",
2302
               utils.CommaJoin(inst_nodes_offline))
2303
      # ... or ghost/non-vm_capable nodes
2304
      for node in inst_config.all_nodes:
2305
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2306
                 "instance lives on ghost node %s", node)
2307
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2308
                 instance, "instance lives on non-vm_capable node %s", node)
2309

    
2310
    feedback_fn("* Verifying orphan volumes")
2311
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2312
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2313

    
2314
    feedback_fn("* Verifying orphan instances")
2315
    self._VerifyOrphanInstances(instancelist, node_image)
2316

    
2317
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2318
      feedback_fn("* Verifying N+1 Memory redundancy")
2319
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2320

    
2321
    feedback_fn("* Other Notes")
2322
    if i_non_redundant:
2323
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2324
                  % len(i_non_redundant))
2325

    
2326
    if i_non_a_balanced:
2327
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2328
                  % len(i_non_a_balanced))
2329

    
2330
    if n_offline:
2331
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2332

    
2333
    if n_drained:
2334
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2335

    
2336
    return not self.bad
2337

    
2338
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2339
    """Analyze the post-hooks' result
2340

2341
    This method analyses the hook result, handles it, and sends some
2342
    nicely-formatted feedback back to the user.
2343

2344
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2345
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2346
    @param hooks_results: the results of the multi-node hooks rpc call
2347
    @param feedback_fn: function used send feedback back to the caller
2348
    @param lu_result: previous Exec result
2349
    @return: the new Exec result, based on the previous result
2350
        and hook results
2351

2352
    """
2353
    # We only really run POST phase hooks, and are only interested in
2354
    # their results
2355
    if phase == constants.HOOKS_PHASE_POST:
2356
      # Used to change hooks' output to proper indentation
2357
      feedback_fn("* Hooks Results")
2358
      assert hooks_results, "invalid result from hooks"
2359

    
2360
      for node_name in hooks_results:
2361
        res = hooks_results[node_name]
2362
        msg = res.fail_msg
2363
        test = msg and not res.offline
2364
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2365
                      "Communication failure in hooks execution: %s", msg)
2366
        if res.offline or msg:
2367
          # No need to investigate payload if node is offline or gave an error.
2368
          # override manually lu_result here as _ErrorIf only
2369
          # overrides self.bad
2370
          lu_result = 1
2371
          continue
2372
        for script, hkr, output in res.payload:
2373
          test = hkr == constants.HKR_FAIL
2374
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2375
                        "Script %s failed, output:", script)
2376
          if test:
2377
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2378
            feedback_fn("%s" % output)
2379
            lu_result = 0
2380

    
2381
      return lu_result
2382

    
2383

    
2384
class LUVerifyDisks(NoHooksLU):
2385
  """Verifies the cluster disks status.
2386

2387
  """
2388
  REQ_BGL = False
2389

    
2390
  def ExpandNames(self):
2391
    self.needed_locks = {
2392
      locking.LEVEL_NODE: locking.ALL_SET,
2393
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2394
    }
2395
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2396

    
2397
  def Exec(self, feedback_fn):
2398
    """Verify integrity of cluster disks.
2399

2400
    @rtype: tuple of three items
2401
    @return: a tuple of (dict of node-to-node_error, list of instances
2402
        which need activate-disks, dict of instance: (node, volume) for
2403
        missing volumes
2404

2405
    """
2406
    result = res_nodes, res_instances, res_missing = {}, [], {}
2407

    
2408
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2409
    instances = [self.cfg.GetInstanceInfo(name)
2410
                 for name in self.cfg.GetInstanceList()]
2411

    
2412
    nv_dict = {}
2413
    for inst in instances:
2414
      inst_lvs = {}
2415
      if (not inst.admin_up or
2416
          inst.disk_template not in constants.DTS_NET_MIRROR):
2417
        continue
2418
      inst.MapLVsByNode(inst_lvs)
2419
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2420
      for node, vol_list in inst_lvs.iteritems():
2421
        for vol in vol_list:
2422
          nv_dict[(node, vol)] = inst
2423

    
2424
    if not nv_dict:
2425
      return result
2426

    
2427
    vg_names = self.rpc.call_vg_list(nodes)
2428
    vg_names.Raise("Cannot get list of VGs")
2429

    
2430
    for node in nodes:
2431
      # node_volume
2432
      node_res = self.rpc.call_lv_list([node],
2433
                                       vg_names[node].payload.keys())[node]
2434
      if node_res.offline:
2435
        continue
2436
      msg = node_res.fail_msg
2437
      if msg:
2438
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2439
        res_nodes[node] = msg
2440
        continue
2441

    
2442
      lvs = node_res.payload
2443
      for lv_name, (_, _, lv_online) in lvs.items():
2444
        inst = nv_dict.pop((node, lv_name), None)
2445
        if (not lv_online and inst is not None
2446
            and inst.name not in res_instances):
2447
          res_instances.append(inst.name)
2448

    
2449
    # any leftover items in nv_dict are missing LVs, let's arrange the
2450
    # data better
2451
    for key, inst in nv_dict.iteritems():
2452
      if inst.name not in res_missing:
2453
        res_missing[inst.name] = []
2454
      res_missing[inst.name].append(key)
2455

    
2456
    return result
2457

    
2458

    
2459
class LURepairDiskSizes(NoHooksLU):
2460
  """Verifies the cluster disks sizes.
2461

2462
  """
2463
  REQ_BGL = False
2464

    
2465
  def ExpandNames(self):
2466
    if self.op.instances:
2467
      self.wanted_names = []
2468
      for name in self.op.instances:
2469
        full_name = _ExpandInstanceName(self.cfg, name)
2470
        self.wanted_names.append(full_name)
2471
      self.needed_locks = {
2472
        locking.LEVEL_NODE: [],
2473
        locking.LEVEL_INSTANCE: self.wanted_names,
2474
        }
2475
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2476
    else:
2477
      self.wanted_names = None
2478
      self.needed_locks = {
2479
        locking.LEVEL_NODE: locking.ALL_SET,
2480
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2481
        }
2482
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2483

    
2484
  def DeclareLocks(self, level):
2485
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2486
      self._LockInstancesNodes(primary_only=True)
2487

    
2488
  def CheckPrereq(self):
2489
    """Check prerequisites.
2490

2491
    This only checks the optional instance list against the existing names.
2492

2493
    """
2494
    if self.wanted_names is None:
2495
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2496

    
2497
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2498
                             in self.wanted_names]
2499

    
2500
  def _EnsureChildSizes(self, disk):
2501
    """Ensure children of the disk have the needed disk size.
2502

2503
    This is valid mainly for DRBD8 and fixes an issue where the
2504
    children have smaller disk size.
2505

2506
    @param disk: an L{ganeti.objects.Disk} object
2507

2508
    """
2509
    if disk.dev_type == constants.LD_DRBD8:
2510
      assert disk.children, "Empty children for DRBD8?"
2511
      fchild = disk.children[0]
2512
      mismatch = fchild.size < disk.size
2513
      if mismatch:
2514
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2515
                     fchild.size, disk.size)
2516
        fchild.size = disk.size
2517

    
2518
      # and we recurse on this child only, not on the metadev
2519
      return self._EnsureChildSizes(fchild) or mismatch
2520
    else:
2521
      return False
2522

    
2523
  def Exec(self, feedback_fn):
2524
    """Verify the size of cluster disks.
2525

2526
    """
2527
    # TODO: check child disks too
2528
    # TODO: check differences in size between primary/secondary nodes
2529
    per_node_disks = {}
2530
    for instance in self.wanted_instances:
2531
      pnode = instance.primary_node
2532
      if pnode not in per_node_disks:
2533
        per_node_disks[pnode] = []
2534
      for idx, disk in enumerate(instance.disks):
2535
        per_node_disks[pnode].append((instance, idx, disk))
2536

    
2537
    changed = []
2538
    for node, dskl in per_node_disks.items():
2539
      newl = [v[2].Copy() for v in dskl]
2540
      for dsk in newl:
2541
        self.cfg.SetDiskID(dsk, node)
2542
      result = self.rpc.call_blockdev_getsizes(node, newl)
2543
      if result.fail_msg:
2544
        self.LogWarning("Failure in blockdev_getsizes call to node"
2545
                        " %s, ignoring", node)
2546
        continue
2547
      if len(result.data) != len(dskl):
2548
        self.LogWarning("Invalid result from node %s, ignoring node results",
2549
                        node)
2550
        continue
2551
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2552
        if size is None:
2553
          self.LogWarning("Disk %d of instance %s did not return size"
2554
                          " information, ignoring", idx, instance.name)
2555
          continue
2556
        if not isinstance(size, (int, long)):
2557
          self.LogWarning("Disk %d of instance %s did not return valid"
2558
                          " size information, ignoring", idx, instance.name)
2559
          continue
2560
        size = size >> 20
2561
        if size != disk.size:
2562
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2563
                       " correcting: recorded %d, actual %d", idx,
2564
                       instance.name, disk.size, size)
2565
          disk.size = size
2566
          self.cfg.Update(instance, feedback_fn)
2567
          changed.append((instance.name, idx, size))
2568
        if self._EnsureChildSizes(disk):
2569
          self.cfg.Update(instance, feedback_fn)
2570
          changed.append((instance.name, idx, disk.size))
2571
    return changed
2572

    
2573

    
2574
class LURenameCluster(LogicalUnit):
2575
  """Rename the cluster.
2576

2577
  """
2578
  HPATH = "cluster-rename"
2579
  HTYPE = constants.HTYPE_CLUSTER
2580

    
2581
  def BuildHooksEnv(self):
2582
    """Build hooks env.
2583

2584
    """
2585
    env = {
2586
      "OP_TARGET": self.cfg.GetClusterName(),
2587
      "NEW_NAME": self.op.name,
2588
      }
2589
    mn = self.cfg.GetMasterNode()
2590
    all_nodes = self.cfg.GetNodeList()
2591
    return env, [mn], all_nodes
2592

    
2593
  def CheckPrereq(self):
2594
    """Verify that the passed name is a valid one.
2595

2596
    """
2597
    hostname = netutils.GetHostname(name=self.op.name,
2598
                                    family=self.cfg.GetPrimaryIPFamily())
2599

    
2600
    new_name = hostname.name
2601
    self.ip = new_ip = hostname.ip
2602
    old_name = self.cfg.GetClusterName()
2603
    old_ip = self.cfg.GetMasterIP()
2604
    if new_name == old_name and new_ip == old_ip:
2605
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2606
                                 " cluster has changed",
2607
                                 errors.ECODE_INVAL)
2608
    if new_ip != old_ip:
2609
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2610
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2611
                                   " reachable on the network" %
2612
                                   new_ip, errors.ECODE_NOTUNIQUE)
2613

    
2614
    self.op.name = new_name
2615

    
2616
  def Exec(self, feedback_fn):
2617
    """Rename the cluster.
2618

2619
    """
2620
    clustername = self.op.name
2621
    ip = self.ip
2622

    
2623
    # shutdown the master IP
2624
    master = self.cfg.GetMasterNode()
2625
    result = self.rpc.call_node_stop_master(master, False)
2626
    result.Raise("Could not disable the master role")
2627

    
2628
    try:
2629
      cluster = self.cfg.GetClusterInfo()
2630
      cluster.cluster_name = clustername
2631
      cluster.master_ip = ip
2632
      self.cfg.Update(cluster, feedback_fn)
2633

    
2634
      # update the known hosts file
2635
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2636
      node_list = self.cfg.GetOnlineNodeList()
2637
      try:
2638
        node_list.remove(master)
2639
      except ValueError:
2640
        pass
2641
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2642
    finally:
2643
      result = self.rpc.call_node_start_master(master, False, False)
2644
      msg = result.fail_msg
2645
      if msg:
2646
        self.LogWarning("Could not re-enable the master role on"
2647
                        " the master, please restart manually: %s", msg)
2648

    
2649
    return clustername
2650

    
2651

    
2652
class LUSetClusterParams(LogicalUnit):
2653
  """Change the parameters of the cluster.
2654

2655
  """
2656
  HPATH = "cluster-modify"
2657
  HTYPE = constants.HTYPE_CLUSTER
2658
  REQ_BGL = False
2659

    
2660
  def CheckArguments(self):
2661
    """Check parameters
2662

2663
    """
2664
    if self.op.uid_pool:
2665
      uidpool.CheckUidPool(self.op.uid_pool)
2666

    
2667
    if self.op.add_uids:
2668
      uidpool.CheckUidPool(self.op.add_uids)
2669

    
2670
    if self.op.remove_uids:
2671
      uidpool.CheckUidPool(self.op.remove_uids)
2672

    
2673
  def ExpandNames(self):
2674
    # FIXME: in the future maybe other cluster params won't require checking on
2675
    # all nodes to be modified.
2676
    self.needed_locks = {
2677
      locking.LEVEL_NODE: locking.ALL_SET,
2678
    }
2679
    self.share_locks[locking.LEVEL_NODE] = 1
2680

    
2681
  def BuildHooksEnv(self):
2682
    """Build hooks env.
2683

2684
    """
2685
    env = {
2686
      "OP_TARGET": self.cfg.GetClusterName(),
2687
      "NEW_VG_NAME": self.op.vg_name,
2688
      }
2689
    mn = self.cfg.GetMasterNode()
2690
    return env, [mn], [mn]
2691

    
2692
  def CheckPrereq(self):
2693
    """Check prerequisites.
2694

2695
    This checks whether the given params don't conflict and
2696
    if the given volume group is valid.
2697

2698
    """
2699
    if self.op.vg_name is not None and not self.op.vg_name:
2700
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2701
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2702
                                   " instances exist", errors.ECODE_INVAL)
2703

    
2704
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2705
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2706
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2707
                                   " drbd-based instances exist",
2708
                                   errors.ECODE_INVAL)
2709

    
2710
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2711

    
2712
    # if vg_name not None, checks given volume group on all nodes
2713
    if self.op.vg_name:
2714
      vglist = self.rpc.call_vg_list(node_list)
2715
      for node in node_list:
2716
        msg = vglist[node].fail_msg
2717
        if msg:
2718
          # ignoring down node
2719
          self.LogWarning("Error while gathering data on node %s"
2720
                          " (ignoring node): %s", node, msg)
2721
          continue
2722
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2723
                                              self.op.vg_name,
2724
                                              constants.MIN_VG_SIZE)
2725
        if vgstatus:
2726
          raise errors.OpPrereqError("Error on node '%s': %s" %
2727
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2728

    
2729
    if self.op.drbd_helper:
2730
      # checks given drbd helper on all nodes
2731
      helpers = self.rpc.call_drbd_helper(node_list)
2732
      for node in node_list:
2733
        ninfo = self.cfg.GetNodeInfo(node)
2734
        if ninfo.offline:
2735
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2736
          continue
2737
        msg = helpers[node].fail_msg
2738
        if msg:
2739
          raise errors.OpPrereqError("Error checking drbd helper on node"
2740
                                     " '%s': %s" % (node, msg),
2741
                                     errors.ECODE_ENVIRON)
2742
        node_helper = helpers[node].payload
2743
        if node_helper != self.op.drbd_helper:
2744
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2745
                                     (node, node_helper), errors.ECODE_ENVIRON)
2746

    
2747
    self.cluster = cluster = self.cfg.GetClusterInfo()
2748
    # validate params changes
2749
    if self.op.beparams:
2750
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2751
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2752

    
2753
    if self.op.ndparams:
2754
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2755
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2756

    
2757
    if self.op.nicparams:
2758
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2759
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2760
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2761
      nic_errors = []
2762

    
2763
      # check all instances for consistency
2764
      for instance in self.cfg.GetAllInstancesInfo().values():
2765
        for nic_idx, nic in enumerate(instance.nics):
2766
          params_copy = copy.deepcopy(nic.nicparams)
2767
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2768

    
2769
          # check parameter syntax
2770
          try:
2771
            objects.NIC.CheckParameterSyntax(params_filled)
2772
          except errors.ConfigurationError, err:
2773
            nic_errors.append("Instance %s, nic/%d: %s" %
2774
                              (instance.name, nic_idx, err))
2775

    
2776
          # if we're moving instances to routed, check that they have an ip
2777
          target_mode = params_filled[constants.NIC_MODE]
2778
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2779
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2780
                              (instance.name, nic_idx))
2781
      if nic_errors:
2782
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2783
                                   "\n".join(nic_errors))
2784

    
2785
    # hypervisor list/parameters
2786
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2787
    if self.op.hvparams:
2788
      for hv_name, hv_dict in self.op.hvparams.items():
2789
        if hv_name not in self.new_hvparams:
2790
          self.new_hvparams[hv_name] = hv_dict
2791
        else:
2792
          self.new_hvparams[hv_name].update(hv_dict)
2793

    
2794
    # os hypervisor parameters
2795
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2796
    if self.op.os_hvp:
2797
      for os_name, hvs in self.op.os_hvp.items():
2798
        if os_name not in self.new_os_hvp:
2799
          self.new_os_hvp[os_name] = hvs
2800
        else:
2801
          for hv_name, hv_dict in hvs.items():
2802
            if hv_name not in self.new_os_hvp[os_name]:
2803
              self.new_os_hvp[os_name][hv_name] = hv_dict
2804
            else:
2805
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2806

    
2807
    # os parameters
2808
    self.new_osp = objects.FillDict(cluster.osparams, {})
2809
    if self.op.osparams:
2810
      for os_name, osp in self.op.osparams.items():
2811
        if os_name not in self.new_osp:
2812
          self.new_osp[os_name] = {}
2813

    
2814
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2815
                                                  use_none=True)
2816

    
2817
        if not self.new_osp[os_name]:
2818
          # we removed all parameters
2819
          del self.new_osp[os_name]
2820
        else:
2821
          # check the parameter validity (remote check)
2822
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2823
                         os_name, self.new_osp[os_name])
2824

    
2825
    # changes to the hypervisor list
2826
    if self.op.enabled_hypervisors is not None:
2827
      self.hv_list = self.op.enabled_hypervisors
2828
      for hv in self.hv_list:
2829
        # if the hypervisor doesn't already exist in the cluster
2830
        # hvparams, we initialize it to empty, and then (in both
2831
        # cases) we make sure to fill the defaults, as we might not
2832
        # have a complete defaults list if the hypervisor wasn't
2833
        # enabled before
2834
        if hv not in new_hvp:
2835
          new_hvp[hv] = {}
2836
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2837
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2838
    else:
2839
      self.hv_list = cluster.enabled_hypervisors
2840

    
2841
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2842
      # either the enabled list has changed, or the parameters have, validate
2843
      for hv_name, hv_params in self.new_hvparams.items():
2844
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2845
            (self.op.enabled_hypervisors and
2846
             hv_name in self.op.enabled_hypervisors)):
2847
          # either this is a new hypervisor, or its parameters have changed
2848
          hv_class = hypervisor.GetHypervisor(hv_name)
2849
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2850
          hv_class.CheckParameterSyntax(hv_params)
2851
          _CheckHVParams(self, node_list, hv_name, hv_params)
2852

    
2853
    if self.op.os_hvp:
2854
      # no need to check any newly-enabled hypervisors, since the
2855
      # defaults have already been checked in the above code-block
2856
      for os_name, os_hvp in self.new_os_hvp.items():
2857
        for hv_name, hv_params in os_hvp.items():
2858
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2859
          # we need to fill in the new os_hvp on top of the actual hv_p
2860
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2861
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2862
          hv_class = hypervisor.GetHypervisor(hv_name)
2863
          hv_class.CheckParameterSyntax(new_osp)
2864
          _CheckHVParams(self, node_list, hv_name, new_osp)
2865

    
2866
    if self.op.default_iallocator:
2867
      alloc_script = utils.FindFile(self.op.default_iallocator,
2868
                                    constants.IALLOCATOR_SEARCH_PATH,
2869
                                    os.path.isfile)
2870
      if alloc_script is None:
2871
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2872
                                   " specified" % self.op.default_iallocator,
2873
                                   errors.ECODE_INVAL)
2874

    
2875
  def Exec(self, feedback_fn):
2876
    """Change the parameters of the cluster.
2877

2878
    """
2879
    if self.op.vg_name is not None:
2880
      new_volume = self.op.vg_name
2881
      if not new_volume:
2882
        new_volume = None
2883
      if new_volume != self.cfg.GetVGName():
2884
        self.cfg.SetVGName(new_volume)
2885
      else:
2886
        feedback_fn("Cluster LVM configuration already in desired"
2887
                    " state, not changing")
2888
    if self.op.drbd_helper is not None:
2889
      new_helper = self.op.drbd_helper
2890
      if not new_helper:
2891
        new_helper = None
2892
      if new_helper != self.cfg.GetDRBDHelper():
2893
        self.cfg.SetDRBDHelper(new_helper)
2894
      else:
2895
        feedback_fn("Cluster DRBD helper already in desired state,"
2896
                    " not changing")
2897
    if self.op.hvparams:
2898
      self.cluster.hvparams = self.new_hvparams
2899
    if self.op.os_hvp:
2900
      self.cluster.os_hvp = self.new_os_hvp
2901
    if self.op.enabled_hypervisors is not None:
2902
      self.cluster.hvparams = self.new_hvparams
2903
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2904
    if self.op.beparams:
2905
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2906
    if self.op.nicparams:
2907
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2908
    if self.op.osparams:
2909
      self.cluster.osparams = self.new_osp
2910
    if self.op.ndparams:
2911
      self.cluster.ndparams = self.new_ndparams
2912

    
2913
    if self.op.candidate_pool_size is not None:
2914
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2915
      # we need to update the pool size here, otherwise the save will fail
2916
      _AdjustCandidatePool(self, [])
2917

    
2918
    if self.op.maintain_node_health is not None:
2919
      self.cluster.maintain_node_health = self.op.maintain_node_health
2920

    
2921
    if self.op.prealloc_wipe_disks is not None:
2922
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2923

    
2924
    if self.op.add_uids is not None:
2925
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2926

    
2927
    if self.op.remove_uids is not None:
2928
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2929

    
2930
    if self.op.uid_pool is not None:
2931
      self.cluster.uid_pool = self.op.uid_pool
2932

    
2933
    if self.op.default_iallocator is not None:
2934
      self.cluster.default_iallocator = self.op.default_iallocator
2935

    
2936
    if self.op.reserved_lvs is not None:
2937
      self.cluster.reserved_lvs = self.op.reserved_lvs
2938

    
2939
    def helper_os(aname, mods, desc):
2940
      desc += " OS list"
2941
      lst = getattr(self.cluster, aname)
2942
      for key, val in mods:
2943
        if key == constants.DDM_ADD:
2944
          if val in lst:
2945
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2946
          else:
2947
            lst.append(val)
2948
        elif key == constants.DDM_REMOVE:
2949
          if val in lst:
2950
            lst.remove(val)
2951
          else:
2952
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
2953
        else:
2954
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
2955

    
2956
    if self.op.hidden_os:
2957
      helper_os("hidden_os", self.op.hidden_os, "hidden")
2958

    
2959
    if self.op.blacklisted_os:
2960
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2961

    
2962
    if self.op.master_netdev:
2963
      master = self.cfg.GetMasterNode()
2964
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
2965
                  self.cluster.master_netdev)
2966
      result = self.rpc.call_node_stop_master(master, False)
2967
      result.Raise("Could not disable the master ip")
2968
      feedback_fn("Changing master_netdev from %s to %s" %
2969
                  (self.cluster.master_netdev, self.op.master_netdev))
2970
      self.cluster.master_netdev = self.op.master_netdev
2971

    
2972
    self.cfg.Update(self.cluster, feedback_fn)
2973

    
2974
    if self.op.master_netdev:
2975
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
2976
                  self.op.master_netdev)
2977
      result = self.rpc.call_node_start_master(master, False, False)
2978
      if result.fail_msg:
2979
        self.LogWarning("Could not re-enable the master ip on"
2980
                        " the master, please restart manually: %s",
2981
                        result.fail_msg)
2982

    
2983

    
2984
def _UploadHelper(lu, nodes, fname):
2985
  """Helper for uploading a file and showing warnings.
2986

2987
  """
2988
  if os.path.exists(fname):
2989
    result = lu.rpc.call_upload_file(nodes, fname)
2990
    for to_node, to_result in result.items():
2991
      msg = to_result.fail_msg
2992
      if msg:
2993
        msg = ("Copy of file %s to node %s failed: %s" %
2994
               (fname, to_node, msg))
2995
        lu.proc.LogWarning(msg)
2996

    
2997

    
2998
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
2999
  """Distribute additional files which are part of the cluster configuration.
3000

3001
  ConfigWriter takes care of distributing the config and ssconf files, but
3002
  there are more files which should be distributed to all nodes. This function
3003
  makes sure those are copied.
3004

3005
  @param lu: calling logical unit
3006
  @param additional_nodes: list of nodes not in the config to distribute to
3007
  @type additional_vm: boolean
3008
  @param additional_vm: whether the additional nodes are vm-capable or not
3009

3010
  """
3011
  # 1. Gather target nodes
3012
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3013
  dist_nodes = lu.cfg.GetOnlineNodeList()
3014
  nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3015
  vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3016
  if additional_nodes is not None:
3017
    dist_nodes.extend(additional_nodes)
3018
    if additional_vm:
3019
      vm_nodes.extend(additional_nodes)
3020
  if myself.name in dist_nodes:
3021
    dist_nodes.remove(myself.name)
3022
  if myself.name in vm_nodes:
3023
    vm_nodes.remove(myself.name)
3024

    
3025
  # 2. Gather files to distribute
3026
  dist_files = set([constants.ETC_HOSTS,
3027
                    constants.SSH_KNOWN_HOSTS_FILE,
3028
                    constants.RAPI_CERT_FILE,
3029
                    constants.RAPI_USERS_FILE,
3030
                    constants.CONFD_HMAC_KEY,
3031
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
3032
                   ])
3033

    
3034
  vm_files = set()
3035
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3036
  for hv_name in enabled_hypervisors:
3037
    hv_class = hypervisor.GetHypervisor(hv_name)
3038
    vm_files.update(hv_class.GetAncillaryFiles())
3039

    
3040
  # 3. Perform the files upload
3041
  for fname in dist_files:
3042
    _UploadHelper(lu, dist_nodes, fname)
3043
  for fname in vm_files:
3044
    _UploadHelper(lu, vm_nodes, fname)
3045

    
3046

    
3047
class LURedistributeConfig(NoHooksLU):
3048
  """Force the redistribution of cluster configuration.
3049

3050
  This is a very simple LU.
3051

3052
  """
3053
  REQ_BGL = False
3054

    
3055
  def ExpandNames(self):
3056
    self.needed_locks = {
3057
      locking.LEVEL_NODE: locking.ALL_SET,
3058
    }
3059
    self.share_locks[locking.LEVEL_NODE] = 1
3060

    
3061
  def Exec(self, feedback_fn):
3062
    """Redistribute the configuration.
3063

3064
    """
3065
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3066
    _RedistributeAncillaryFiles(self)
3067

    
3068

    
3069
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3070
  """Sleep and poll for an instance's disk to sync.
3071

3072
  """
3073
  if not instance.disks or disks is not None and not disks:
3074
    return True
3075

    
3076
  disks = _ExpandCheckDisks(instance, disks)
3077

    
3078
  if not oneshot:
3079
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3080

    
3081
  node = instance.primary_node
3082

    
3083
  for dev in disks:
3084
    lu.cfg.SetDiskID(dev, node)
3085

    
3086
  # TODO: Convert to utils.Retry
3087

    
3088
  retries = 0
3089
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3090
  while True:
3091
    max_time = 0
3092
    done = True
3093
    cumul_degraded = False
3094
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3095
    msg = rstats.fail_msg
3096
    if msg:
3097
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3098
      retries += 1
3099
      if retries >= 10:
3100
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3101
                                 " aborting." % node)
3102
      time.sleep(6)
3103
      continue
3104
    rstats = rstats.payload
3105
    retries = 0
3106
    for i, mstat in enumerate(rstats):
3107
      if mstat is None:
3108
        lu.LogWarning("Can't compute data for node %s/%s",
3109
                           node, disks[i].iv_name)
3110
        continue
3111

    
3112
      cumul_degraded = (cumul_degraded or
3113
                        (mstat.is_degraded and mstat.sync_percent is None))
3114
      if mstat.sync_percent is not None:
3115
        done = False
3116
        if mstat.estimated_time is not None:
3117
          rem_time = ("%s remaining (estimated)" %
3118
                      utils.FormatSeconds(mstat.estimated_time))
3119
          max_time = mstat.estimated_time
3120
        else:
3121
          rem_time = "no time estimate"
3122
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3123
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3124

    
3125
    # if we're done but degraded, let's do a few small retries, to
3126
    # make sure we see a stable and not transient situation; therefore
3127
    # we force restart of the loop
3128
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3129
      logging.info("Degraded disks found, %d retries left", degr_retries)
3130
      degr_retries -= 1
3131
      time.sleep(1)
3132
      continue
3133

    
3134
    if done or oneshot:
3135
      break
3136

    
3137
    time.sleep(min(60, max_time))
3138

    
3139
  if done:
3140
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3141
  return not cumul_degraded
3142

    
3143

    
3144
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3145
  """Check that mirrors are not degraded.
3146

3147
  The ldisk parameter, if True, will change the test from the
3148
  is_degraded attribute (which represents overall non-ok status for
3149
  the device(s)) to the ldisk (representing the local storage status).
3150

3151
  """
3152
  lu.cfg.SetDiskID(dev, node)
3153

    
3154
  result = True
3155

    
3156
  if on_primary or dev.AssembleOnSecondary():
3157
    rstats = lu.rpc.call_blockdev_find(node, dev)
3158
    msg = rstats.fail_msg
3159
    if msg:
3160
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3161
      result = False
3162
    elif not rstats.payload:
3163
      lu.LogWarning("Can't find disk on node %s", node)
3164
      result = False
3165
    else:
3166
      if ldisk:
3167
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3168
      else:
3169
        result = result and not rstats.payload.is_degraded
3170

    
3171
  if dev.children:
3172
    for child in dev.children:
3173
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3174

    
3175
  return result
3176

    
3177

    
3178
class LUOobCommand(NoHooksLU):
3179
  """Logical unit for OOB handling.
3180

3181
  """
3182
  REG_BGL = False
3183

    
3184
  def CheckPrereq(self):
3185
    """Check prerequisites.
3186

3187
    This checks:
3188
     - the node exists in the configuration
3189
     - OOB is supported
3190

3191
    Any errors are signaled by raising errors.OpPrereqError.
3192

3193
    """
3194
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3195
    node = self.cfg.GetNodeInfo(self.op.node_name)
3196

    
3197
    if node is None:
3198
      raise errors.OpPrereqError("Node %s not found" % self.op.node_name)
3199

    
3200
    self.oob_program = _SupportsOob(self.cfg, node)
3201

    
3202
    if not self.oob_program:
3203
      raise errors.OpPrereqError("OOB is not supported for node %s" %
3204
                                 self.op.node_name)
3205

    
3206
    if self.op.command == constants.OOB_POWER_OFF and not node.offline:
3207
      raise errors.OpPrereqError(("Cannot power off node %s because it is"
3208
                                  " not marked offline") % self.op.node_name)
3209

    
3210
    self.node = node
3211

    
3212
  def ExpandNames(self):
3213
    """Gather locks we need.
3214

3215
    """
3216
    node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3217
    self.needed_locks = {
3218
      locking.LEVEL_NODE: [node_name],
3219
      }
3220

    
3221
  def Exec(self, feedback_fn):
3222
    """Execute OOB and return result if we expect any.
3223

3224
    """
3225
    master_node = self.cfg.GetMasterNode()
3226
    node = self.node
3227

    
3228
    logging.info("Executing out-of-band command '%s' using '%s' on %s",
3229
                 self.op.command, self.oob_program, self.op.node_name)
3230
    result = self.rpc.call_run_oob(master_node, self.oob_program,
3231
                                   self.op.command, self.op.node_name,
3232
                                   self.op.timeout)
3233

    
3234
    result.Raise("An error occurred on execution of OOB helper")
3235

    
3236
    self._CheckPayload(result)
3237

    
3238
    if self.op.command == constants.OOB_HEALTH:
3239
      # For health we should log important events
3240
      for item, status in result.payload:
3241
        if status in [constants.OOB_STATUS_WARNING,
3242
                      constants.OOB_STATUS_CRITICAL]:
3243
          logging.warning("On node '%s' item '%s' has status '%s'",
3244
                          self.op.node_name, item, status)
3245

    
3246
    if self.op.command == constants.OOB_POWER_ON:
3247
      node.powered = True
3248
    elif self.op.command == constants.OOB_POWER_OFF:
3249
      node.powered = False
3250
    elif self.op.command == constants.OOB_POWER_STATUS:
3251
      powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3252
      if powered != self.node.powered:
3253
        logging.warning(("Recorded power state (%s) of node '%s' does not match"
3254
                         " actual power state (%s)"), node.powered,
3255
                        self.op.node_name, powered)
3256

    
3257
    self.cfg.Update(node, feedback_fn)
3258

    
3259
    return result.payload
3260

    
3261
  def _CheckPayload(self, result):
3262
    """Checks if the payload is valid.
3263

3264
    @param result: RPC result
3265
    @raises errors.OpExecError: If payload is not valid
3266

3267
    """
3268
    errs = []
3269
    if self.op.command == constants.OOB_HEALTH:
3270
      if not isinstance(result.payload, list):
3271
        errs.append("command 'health' is expected to return a list but got %s" %
3272
                    type(result.payload))
3273
      for item, status in result.payload:
3274
        if status not in constants.OOB_STATUSES:
3275
          errs.append("health item '%s' has invalid status '%s'" %
3276
                      (item, status))
3277

    
3278
    if self.op.command == constants.OOB_POWER_STATUS:
3279
      if not isinstance(result.payload, dict):
3280
        errs.append("power-status is expected to return a dict but got %s" %
3281
                    type(result.payload))
3282

    
3283
    if self.op.command in [
3284
        constants.OOB_POWER_ON,
3285
        constants.OOB_POWER_OFF,
3286
        constants.OOB_POWER_CYCLE,
3287
        ]:
3288
      if result.payload is not None:
3289
        errs.append("%s is expected to not return payload but got '%s'" %
3290
                    (self.op.command, result.payload))
3291

    
3292
    if errs:
3293
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3294
                               utils.CommaJoin(errs))
3295

    
3296

    
3297

    
3298
class LUDiagnoseOS(NoHooksLU):
3299
  """Logical unit for OS diagnose/query.
3300

3301
  """
3302
  REQ_BGL = False
3303
  _HID = "hidden"
3304
  _BLK = "blacklisted"
3305
  _VLD = "valid"
3306
  _FIELDS_STATIC = utils.FieldSet()
3307
  _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3308
                                   "parameters", "api_versions", _HID, _BLK)
3309

    
3310
  def CheckArguments(self):
3311
    if self.op.names:
3312
      raise errors.OpPrereqError("Selective OS query not supported",
3313
                                 errors.ECODE_INVAL)
3314

    
3315
    _CheckOutputFields(static=self._FIELDS_STATIC,
3316
                       dynamic=self._FIELDS_DYNAMIC,
3317
                       selected=self.op.output_fields)
3318

    
3319
  def ExpandNames(self):
3320
    # Lock all nodes, in shared mode
3321
    # Temporary removal of locks, should be reverted later
3322
    # TODO: reintroduce locks when they are lighter-weight
3323
    self.needed_locks = {}
3324
    #self.share_locks[locking.LEVEL_NODE] = 1
3325
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3326

    
3327
  @staticmethod
3328
  def _DiagnoseByOS(rlist):
3329
    """Remaps a per-node return list into an a per-os per-node dictionary
3330

3331
    @param rlist: a map with node names as keys and OS objects as values
3332

3333
    @rtype: dict
3334
    @return: a dictionary with osnames as keys and as value another
3335
        map, with nodes as keys and tuples of (path, status, diagnose,
3336
        variants, parameters, api_versions) as values, eg::
3337

3338
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3339
                                     (/srv/..., False, "invalid api")],
3340
                           "node2": [(/srv/..., True, "", [], [])]}
3341
          }
3342

3343
    """
3344
    all_os = {}
3345
    # we build here the list of nodes that didn't fail the RPC (at RPC
3346
    # level), so that nodes with a non-responding node daemon don't
3347
    # make all OSes invalid
3348
    good_nodes = [node_name for node_name in rlist
3349
                  if not rlist[node_name].fail_msg]
3350
    for node_name, nr in rlist.items():
3351
      if nr.fail_msg or not nr.payload:
3352
        continue
3353
      for (name, path, status, diagnose, variants,
3354
           params, api_versions) in nr.payload:
3355
        if name not in all_os:
3356
          # build a list of nodes for this os containing empty lists
3357
          # for each node in node_list
3358
          all_os[name] = {}
3359
          for nname in good_nodes:
3360
            all_os[name][nname] = []
3361
        # convert params from [name, help] to (name, help)
3362
        params = [tuple(v) for v in params]
3363
        all_os[name][node_name].append((path, status, diagnose,
3364
                                        variants, params, api_versions))
3365
    return all_os
3366

    
3367
  def Exec(self, feedback_fn):
3368
    """Compute the list of OSes.
3369

3370
    """
3371
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3372
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3373
    pol = self._DiagnoseByOS(node_data)
3374
    output = []
3375
    cluster = self.cfg.GetClusterInfo()
3376

    
3377
    for os_name in utils.NiceSort(pol.keys()):
3378
      os_data = pol[os_name]
3379
      row = []
3380
      valid = True
3381
      (variants, params, api_versions) = null_state = (set(), set(), set())
3382
      for idx, osl in enumerate(os_data.values()):
3383
        valid = bool(valid and osl and osl[0][1])
3384
        if not valid:
3385
          (variants, params, api_versions) = null_state
3386
          break
3387
        node_variants, node_params, node_api = osl[0][3:6]
3388
        if idx == 0: # first entry
3389
          variants = set(node_variants)
3390
          params = set(node_params)
3391
          api_versions = set(node_api)
3392
        else: # keep consistency
3393
          variants.intersection_update(node_variants)
3394
          params.intersection_update(node_params)
3395
          api_versions.intersection_update(node_api)
3396

    
3397
      is_hid = os_name in cluster.hidden_os
3398
      is_blk = os_name in cluster.blacklisted_os
3399
      if ((self._HID not in self.op.output_fields and is_hid) or
3400
          (self._BLK not in self.op.output_fields and is_blk) or
3401
          (self._VLD not in self.op.output_fields and not valid)):
3402
        continue
3403

    
3404
      for field in self.op.output_fields:
3405
        if field == "name":
3406
          val = os_name
3407
        elif field == self._VLD:
3408
          val = valid
3409
        elif field == "node_status":
3410
          # this is just a copy of the dict
3411
          val = {}
3412
          for node_name, nos_list in os_data.items():
3413
            val[node_name] = nos_list
3414
        elif field == "variants":
3415
          val = utils.NiceSort(list(variants))
3416
        elif field == "parameters":
3417
          val = list(params)
3418
        elif field == "api_versions":
3419
          val = list(api_versions)
3420
        elif field == self._HID:
3421
          val = is_hid
3422
        elif field == self._BLK:
3423
          val = is_blk
3424
        else:
3425
          raise errors.ParameterError(field)
3426
        row.append(val)
3427
      output.append(row)
3428

    
3429
    return output
3430

    
3431

    
3432
class LURemoveNode(LogicalUnit):
3433
  """Logical unit for removing a node.
3434

3435
  """
3436
  HPATH = "node-remove"
3437
  HTYPE = constants.HTYPE_NODE
3438

    
3439
  def BuildHooksEnv(self):
3440
    """Build hooks env.
3441

3442
    This doesn't run on the target node in the pre phase as a failed
3443
    node would then be impossible to remove.
3444

3445
    """
3446
    env = {
3447
      "OP_TARGET": self.op.node_name,
3448
      "NODE_NAME": self.op.node_name,
3449
      }
3450
    all_nodes = self.cfg.GetNodeList()
3451
    try:
3452
      all_nodes.remove(self.op.node_name)
3453
    except ValueError:
3454
      logging.warning("Node %s which is about to be removed not found"
3455
                      " in the all nodes list", self.op.node_name)
3456
    return env, all_nodes, all_nodes
3457

    
3458
  def CheckPrereq(self):
3459
    """Check prerequisites.
3460

3461
    This checks:
3462
     - the node exists in the configuration
3463
     - it does not have primary or secondary instances
3464
     - it's not the master
3465

3466
    Any errors are signaled by raising errors.OpPrereqError.
3467

3468
    """
3469
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3470
    node = self.cfg.GetNodeInfo(self.op.node_name)
3471
    assert node is not None
3472

    
3473
    instance_list = self.cfg.GetInstanceList()
3474

    
3475
    masternode = self.cfg.GetMasterNode()
3476
    if node.name == masternode:
3477
      raise errors.OpPrereqError("Node is the master node,"
3478
                                 " you need to failover first.",
3479
                                 errors.ECODE_INVAL)
3480

    
3481
    for instance_name in instance_list:
3482
      instance = self.cfg.GetInstanceInfo(instance_name)
3483
      if node.name in instance.all_nodes:
3484
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3485
                                   " please remove first." % instance_name,
3486
                                   errors.ECODE_INVAL)
3487
    self.op.node_name = node.name
3488
    self.node = node
3489

    
3490
  def Exec(self, feedback_fn):
3491
    """Removes the node from the cluster.
3492

3493
    """
3494
    node = self.node
3495
    logging.info("Stopping the node daemon and removing configs from node %s",
3496
                 node.name)
3497

    
3498
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3499

    
3500
    # Promote nodes to master candidate as needed
3501
    _AdjustCandidatePool(self, exceptions=[node.name])
3502
    self.context.RemoveNode(node.name)
3503

    
3504
    # Run post hooks on the node before it's removed
3505
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3506
    try:
3507
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3508
    except:
3509
      # pylint: disable-msg=W0702
3510
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3511

    
3512
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3513
    msg = result.fail_msg
3514
    if msg:
3515
      self.LogWarning("Errors encountered on the remote node while leaving"
3516
                      " the cluster: %s", msg)
3517

    
3518
    # Remove node from our /etc/hosts
3519
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3520
      master_node = self.cfg.GetMasterNode()
3521
      result = self.rpc.call_etc_hosts_modify(master_node,
3522
                                              constants.ETC_HOSTS_REMOVE,
3523
                                              node.name, None)
3524
      result.Raise("Can't update hosts file with new host data")
3525
      _RedistributeAncillaryFiles(self)
3526

    
3527

    
3528
class _NodeQuery(_QueryBase):
3529
  FIELDS = query.NODE_FIELDS
3530

    
3531
  def ExpandNames(self, lu):
3532
    lu.needed_locks = {}
3533
    lu.share_locks[locking.LEVEL_NODE] = 1
3534

    
3535
    if self.names:
3536
      self.wanted = _GetWantedNodes(lu, self.names)
3537
    else:
3538
      self.wanted = locking.ALL_SET
3539

    
3540
    self.do_locking = (self.use_locking and
3541
                       query.NQ_LIVE in self.requested_data)
3542

    
3543
    if self.do_locking:
3544
      # if we don't request only static fields, we need to lock the nodes
3545
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3546

    
3547
  def DeclareLocks(self, lu, level):
3548
    pass
3549

    
3550
  def _GetQueryData(self, lu):
3551
    """Computes the list of nodes and their attributes.
3552

3553
    """
3554
    all_info = lu.cfg.GetAllNodesInfo()
3555

    
3556
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3557

    
3558
    # Gather data as requested
3559
    if query.NQ_LIVE in self.requested_data:
3560
      node_data = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3561
                                        lu.cfg.GetHypervisorType())
3562
      live_data = dict((name, nresult.payload)
3563
                       for (name, nresult) in node_data.items()
3564
                       if not nresult.fail_msg and nresult.payload)
3565
    else:
3566
      live_data = None
3567

    
3568
    if query.NQ_INST in self.requested_data:
3569
      node_to_primary = dict([(name, set()) for name in nodenames])
3570
      node_to_secondary = dict([(name, set()) for name in nodenames])
3571

    
3572
      inst_data = lu.cfg.GetAllInstancesInfo()
3573

    
3574
      for inst in inst_data.values():
3575
        if inst.primary_node in node_to_primary:
3576
          node_to_primary[inst.primary_node].add(inst.name)
3577
        for secnode in inst.secondary_nodes:
3578
          if secnode in node_to_secondary:
3579
            node_to_secondary[secnode].add(inst.name)
3580
    else:
3581
      node_to_primary = None
3582
      node_to_secondary = None
3583

    
3584
    if query.NQ_OOB in self.requested_data:
3585
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3586
                         for name, node in all_info.iteritems())
3587
    else:
3588
      oob_support = None
3589

    
3590
    if query.NQ_GROUP in self.requested_data:
3591
      groups = lu.cfg.GetAllNodeGroupsInfo()
3592
    else:
3593
      groups = {}
3594

    
3595
    return query.NodeQueryData([all_info[name] for name in nodenames],
3596
                               live_data, lu.cfg.GetMasterNode(),
3597
                               node_to_primary, node_to_secondary, groups,
3598
                               oob_support, lu.cfg.GetClusterInfo())
3599

    
3600

    
3601
class LUQueryNodes(NoHooksLU):
3602
  """Logical unit for querying nodes.
3603

3604
  """
3605
  # pylint: disable-msg=W0142
3606
  REQ_BGL = False
3607

    
3608
  def CheckArguments(self):
3609
    self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3610
                         self.op.use_locking)
3611

    
3612
  def ExpandNames(self):
3613
    self.nq.ExpandNames(self)
3614

    
3615
  def Exec(self, feedback_fn):
3616
    return self.nq.OldStyleQuery(self)
3617

    
3618

    
3619
class LUQueryNodeVolumes(NoHooksLU):
3620
  """Logical unit for getting volumes on node(s).
3621

3622
  """
3623
  REQ_BGL = False
3624
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3625
  _FIELDS_STATIC = utils.FieldSet("node")
3626

    
3627
  def CheckArguments(self):
3628
    _CheckOutputFields(static=self._FIELDS_STATIC,
3629
                       dynamic=self._FIELDS_DYNAMIC,
3630
                       selected=self.op.output_fields)
3631

    
3632
  def ExpandNames(self):
3633
    self.needed_locks = {}
3634
    self.share_locks[locking.LEVEL_NODE] = 1
3635
    if not self.op.nodes:
3636
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3637
    else:
3638
      self.needed_locks[locking.LEVEL_NODE] = \
3639
        _GetWantedNodes(self, self.op.nodes)
3640

    
3641
  def Exec(self, feedback_fn):
3642
    """Computes the list of nodes and their attributes.
3643

3644
    """
3645
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3646
    volumes = self.rpc.call_node_volumes(nodenames)
3647

    
3648
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3649
             in self.cfg.GetInstanceList()]
3650

    
3651
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3652

    
3653
    output = []
3654
    for node in nodenames:
3655
      nresult = volumes[node]
3656
      if nresult.offline:
3657
        continue
3658
      msg = nresult.fail_msg
3659
      if msg:
3660
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3661
        continue
3662

    
3663
      node_vols = nresult.payload[:]
3664
      node_vols.sort(key=lambda vol: vol['dev'])
3665

    
3666
      for vol in node_vols:
3667
        node_output = []
3668
        for field in self.op.output_fields:
3669
          if field == "node":
3670
            val = node
3671
          elif field == "phys":
3672
            val = vol['dev']
3673
          elif field == "vg":
3674
            val = vol['vg']
3675
          elif field == "name":
3676
            val = vol['name']
3677
          elif field == "size":
3678
            val = int(float(vol['size']))
3679
          elif field == "instance":
3680
            for inst in ilist:
3681
              if node not in lv_by_node[inst]:
3682
                continue
3683
              if vol['name'] in lv_by_node[inst][node]:
3684
                val = inst.name
3685
                break
3686
            else:
3687
              val = '-'
3688
          else:
3689
            raise errors.ParameterError(field)
3690
          node_output.append(str(val))
3691

    
3692
        output.append(node_output)
3693

    
3694
    return output
3695

    
3696

    
3697
class LUQueryNodeStorage(NoHooksLU):
3698
  """Logical unit for getting information on storage units on node(s).
3699

3700
  """
3701
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3702
  REQ_BGL = False
3703

    
3704
  def CheckArguments(self):
3705
    _CheckOutputFields(static=self._FIELDS_STATIC,
3706
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3707
                       selected=self.op.output_fields)
3708

    
3709
  def ExpandNames(self):
3710
    self.needed_locks = {}
3711
    self.share_locks[locking.LEVEL_NODE] = 1
3712

    
3713
    if self.op.nodes:
3714
      self.needed_locks[locking.LEVEL_NODE] = \
3715
        _GetWantedNodes(self, self.op.nodes)
3716
    else:
3717
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3718

    
3719
  def Exec(self, feedback_fn):
3720
    """Computes the list of nodes and their attributes.
3721

3722
    """
3723
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3724

    
3725
    # Always get name to sort by
3726
    if constants.SF_NAME in self.op.output_fields:
3727
      fields = self.op.output_fields[:]
3728
    else:
3729
      fields = [constants.SF_NAME] + self.op.output_fields
3730

    
3731
    # Never ask for node or type as it's only known to the LU
3732
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3733
      while extra in fields:
3734
        fields.remove(extra)
3735

    
3736
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3737
    name_idx = field_idx[constants.SF_NAME]
3738

    
3739
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3740
    data = self.rpc.call_storage_list(self.nodes,
3741
                                      self.op.storage_type, st_args,
3742
                                      self.op.name, fields)
3743

    
3744
    result = []
3745

    
3746
    for node in utils.NiceSort(self.nodes):
3747
      nresult = data[node]
3748
      if nresult.offline:
3749
        continue
3750

    
3751
      msg = nresult.fail_msg
3752
      if msg:
3753
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3754
        continue
3755

    
3756
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3757

    
3758
      for name in utils.NiceSort(rows.keys()):
3759
        row = rows[name]
3760

    
3761
        out = []
3762

    
3763
        for field in self.op.output_fields:
3764
          if field == constants.SF_NODE:
3765
            val = node
3766
          elif field == constants.SF_TYPE:
3767
            val = self.op.storage_type
3768
          elif field in field_idx:
3769
            val = row[field_idx[field]]
3770
          else:
3771
            raise errors.ParameterError(field)
3772

    
3773
          out.append(val)
3774

    
3775
        result.append(out)
3776

    
3777
    return result
3778

    
3779

    
3780
class _InstanceQuery(_QueryBase):
3781
  FIELDS = query.INSTANCE_FIELDS
3782

    
3783
  def ExpandNames(self, lu):
3784
    lu.needed_locks = {}
3785
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
3786
    lu.share_locks[locking.LEVEL_NODE] = 1
3787

    
3788
    if self.names:
3789
      self.wanted = _GetWantedInstances(lu, self.names)
3790
    else:
3791
      self.wanted = locking.ALL_SET
3792

    
3793
    self.do_locking = (self.use_locking and
3794
                       query.IQ_LIVE in self.requested_data)
3795
    if self.do_locking:
3796
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3797
      lu.needed_locks[locking.LEVEL_NODE] = []
3798
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3799

    
3800
  def DeclareLocks(self, lu, level):
3801
    if level == locking.LEVEL_NODE and self.do_locking:
3802
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
3803

    
3804
  def _GetQueryData(self, lu):
3805
    """Computes the list of instances and their attributes.
3806

3807
    """
3808
    all_info = lu.cfg.GetAllInstancesInfo()
3809

    
3810
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3811

    
3812
    instance_list = [all_info[name] for name in instance_names]
3813
    nodes = frozenset([inst.primary_node for inst in instance_list])
3814
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3815
    bad_nodes = []
3816
    offline_nodes = []
3817

    
3818
    # Gather data as requested
3819
    if query.IQ_LIVE in self.requested_data:
3820
      live_data = {}
3821
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3822
      for name in nodes:
3823
        result = node_data[name]
3824
        if result.offline:
3825
          # offline nodes will be in both lists
3826
          assert result.fail_msg
3827
          offline_nodes.append(name)
3828
        if result.fail_msg:
3829
          bad_nodes.append(name)
3830
        elif result.payload:
3831
          live_data.update(result.payload)
3832
        # else no instance is alive
3833
    else:
3834
      live_data = {}
3835

    
3836
    if query.IQ_DISKUSAGE in self.requested_data:
3837
      disk_usage = dict((inst.name,
3838
                         _ComputeDiskSize(inst.disk_template,
3839
                                          [{"size": disk.size}
3840
                                           for disk in inst.disks]))
3841
                        for inst in instance_list)
3842
    else:
3843
      disk_usage = None
3844

    
3845
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3846
                                   disk_usage, offline_nodes, bad_nodes,
3847
                                   live_data)
3848

    
3849

    
3850
class LUQuery(NoHooksLU):
3851
  """Query for resources/items of a certain kind.
3852

3853
  """
3854
  # pylint: disable-msg=W0142
3855
  REQ_BGL = False
3856

    
3857
  def CheckArguments(self):
3858
    qcls = _GetQueryImplementation(self.op.what)
3859
    names = qlang.ReadSimpleFilter("name", self.op.filter)
3860

    
3861
    self.impl = qcls(names, self.op.fields, False)
3862

    
3863
  def ExpandNames(self):
3864
    self.impl.ExpandNames(self)
3865

    
3866
  def DeclareLocks(self, level):
3867
    self.impl.DeclareLocks(self, level)
3868

    
3869
  def Exec(self, feedback_fn):
3870
    return self.impl.NewStyleQuery(self)
3871

    
3872

    
3873
class LUQueryFields(NoHooksLU):
3874
  """Query for resources/items of a certain kind.
3875

3876
  """
3877
  # pylint: disable-msg=W0142
3878
  REQ_BGL = False
3879

    
3880
  def CheckArguments(self):
3881
    self.qcls = _GetQueryImplementation(self.op.what)
3882

    
3883
  def ExpandNames(self):
3884
    self.needed_locks = {}
3885

    
3886
  def Exec(self, feedback_fn):
3887
    return self.qcls.FieldsQuery(self.op.fields)
3888

    
3889

    
3890
class LUModifyNodeStorage(NoHooksLU):
3891
  """Logical unit for modifying a storage volume on a node.
3892

3893
  """
3894
  REQ_BGL = False
3895

    
3896
  def CheckArguments(self):
3897
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3898

    
3899
    storage_type = self.op.storage_type
3900

    
3901
    try:
3902
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3903
    except KeyError:
3904
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3905
                                 " modified" % storage_type,
3906
                                 errors.ECODE_INVAL)
3907

    
3908
    diff = set(self.op.changes.keys()) - modifiable
3909
    if diff:
3910
      raise errors.OpPrereqError("The following fields can not be modified for"
3911
                                 " storage units of type '%s': %r" %
3912
                                 (storage_type, list(diff)),
3913
                                 errors.ECODE_INVAL)
3914

    
3915
  def ExpandNames(self):
3916
    self.needed_locks = {
3917
      locking.LEVEL_NODE: self.op.node_name,
3918
      }
3919

    
3920
  def Exec(self, feedback_fn):
3921
    """Computes the list of nodes and their attributes.
3922

3923
    """
3924
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3925
    result = self.rpc.call_storage_modify(self.op.node_name,
3926
                                          self.op.storage_type, st_args,
3927
                                          self.op.name, self.op.changes)
3928
    result.Raise("Failed to modify storage unit '%s' on %s" %
3929
                 (self.op.name, self.op.node_name))
3930

    
3931

    
3932
class LUAddNode(LogicalUnit):
3933
  """Logical unit for adding node to the cluster.
3934

3935
  """
3936
  HPATH = "node-add"
3937
  HTYPE = constants.HTYPE_NODE
3938
  _NFLAGS = ["master_capable", "vm_capable"]
3939

    
3940
  def CheckArguments(self):
3941
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3942
    # validate/normalize the node name
3943
    self.hostname = netutils.GetHostname(name=self.op.node_name,
3944
                                         family=self.primary_ip_family)
3945
    self.op.node_name = self.hostname.name
3946
    if self.op.readd and self.op.group:
3947
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
3948
                                 " being readded", errors.ECODE_INVAL)
3949

    
3950
  def BuildHooksEnv(self):
3951
    """Build hooks env.
3952

3953
    This will run on all nodes before, and on all nodes + the new node after.
3954

3955
    """
3956
    env = {
3957
      "OP_TARGET": self.op.node_name,
3958
      "NODE_NAME": self.op.node_name,
3959
      "NODE_PIP": self.op.primary_ip,
3960
      "NODE_SIP": self.op.secondary_ip,
3961
      "MASTER_CAPABLE": str(self.op.master_capable),
3962
      "VM_CAPABLE": str(self.op.vm_capable),
3963
      }
3964
    nodes_0 = self.cfg.GetNodeList()
3965
    nodes_1 = nodes_0 + [self.op.node_name, ]
3966
    return env, nodes_0, nodes_1
3967

    
3968
  def CheckPrereq(self):
3969
    """Check prerequisites.
3970

3971
    This checks:
3972
     - the new node is not already in the config
3973
     - it is resolvable
3974
     - its parameters (single/dual homed) matches the cluster
3975

3976
    Any errors are signaled by raising errors.OpPrereqError.
3977

3978
    """
3979
    cfg = self.cfg
3980
    hostname = self.hostname
3981
    node = hostname.name
3982
    primary_ip = self.op.primary_ip = hostname.ip
3983
    if self.op.secondary_ip is None:
3984
      if self.primary_ip_family == netutils.IP6Address.family:
3985
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3986
                                   " IPv4 address must be given as secondary",
3987
                                   errors.ECODE_INVAL)
3988
      self.op.secondary_ip = primary_ip
3989

    
3990
    secondary_ip = self.op.secondary_ip
3991
    if not netutils.IP4Address.IsValid(secondary_ip):
3992
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3993
                                 " address" % secondary_ip, errors.ECODE_INVAL)
3994

    
3995
    node_list = cfg.GetNodeList()
3996
    if not self.op.readd and node in node_list:
3997
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3998
                                 node, errors.ECODE_EXISTS)
3999
    elif self.op.readd and node not in node_list:
4000
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4001
                                 errors.ECODE_NOENT)
4002

    
4003
    self.changed_primary_ip = False
4004

    
4005
    for existing_node_name in node_list:
4006
      existing_node = cfg.GetNodeInfo(existing_node_name)
4007

    
4008
      if self.op.readd and node == existing_node_name:
4009
        if existing_node.secondary_ip != secondary_ip:
4010
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4011
                                     " address configuration as before",
4012
                                     errors.ECODE_INVAL)
4013
        if existing_node.primary_ip != primary_ip:
4014
          self.changed_primary_ip = True
4015

    
4016
        continue
4017

    
4018
      if (existing_node.primary_ip == primary_ip or
4019
          existing_node.secondary_ip == primary_ip or
4020
          existing_node.primary_ip == secondary_ip or
4021
          existing_node.secondary_ip == secondary_ip):
4022
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4023
                                   " existing node %s" % existing_node.name,
4024
                                   errors.ECODE_NOTUNIQUE)
4025

    
4026
    # After this 'if' block, None is no longer a valid value for the
4027
    # _capable op attributes
4028
    if self.op.readd:
4029
      old_node = self.cfg.GetNodeInfo(node)
4030
      assert old_node is not None, "Can't retrieve locked node %s" % node
4031
      for attr in self._NFLAGS:
4032
        if getattr(self.op, attr) is None:
4033
          setattr(self.op, attr, getattr(old_node, attr))
4034
    else:
4035
      for attr in self._NFLAGS:
4036
        if getattr(self.op, attr) is None:
4037
          setattr(self.op, attr, True)
4038

    
4039
    if self.op.readd and not self.op.vm_capable:
4040
      pri, sec = cfg.GetNodeInstances(node)
4041
      if pri or sec:
4042
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4043
                                   " flag set to false, but it already holds"
4044
                                   " instances" % node,
4045
                                   errors.ECODE_STATE)
4046

    
4047
    # check that the type of the node (single versus dual homed) is the
4048
    # same as for the master
4049
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4050
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4051
    newbie_singlehomed = secondary_ip == primary_ip
4052
    if master_singlehomed != newbie_singlehomed:
4053
      if master_singlehomed:
4054
        raise errors.OpPrereqError("The master has no secondary ip but the"
4055
                                   " new node has one",
4056
                                   errors.ECODE_INVAL)
4057
      else:
4058
        raise errors.OpPrereqError("The master has a secondary ip but the"
4059
                                   " new node doesn't have one",
4060
                                   errors.ECODE_INVAL)
4061

    
4062
    # checks reachability
4063
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4064
      raise errors.OpPrereqError("Node not reachable by ping",
4065
                                 errors.ECODE_ENVIRON)
4066

    
4067
    if not newbie_singlehomed:
4068
      # check reachability from my secondary ip to newbie's secondary ip
4069
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4070
                           source=myself.secondary_ip):
4071
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4072
                                   " based ping to node daemon port",
4073
                                   errors.ECODE_ENVIRON)
4074

    
4075
    if self.op.readd:
4076
      exceptions = [node]
4077
    else:
4078
      exceptions = []
4079

    
4080
    if self.op.master_capable:
4081
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4082
    else:
4083
      self.master_candidate = False
4084

    
4085
    if self.op.readd:
4086
      self.new_node = old_node
4087
    else:
4088
      node_group = cfg.LookupNodeGroup(self.op.group)
4089
      self.new_node = objects.Node(name=node,
4090
                                   primary_ip=primary_ip,
4091
                                   secondary_ip=secondary_ip,
4092
                                   master_candidate=self.master_candidate,
4093
                                   offline=False, drained=False,
4094
                                   group=node_group)
4095

    
4096
    if self.op.ndparams:
4097
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4098

    
4099
  def Exec(self, feedback_fn):
4100
    """Adds the new node to the cluster.
4101

4102
    """
4103
    new_node = self.new_node
4104
    node = new_node.name
4105

    
4106
    # We adding a new node so we assume it's powered
4107
    new_node.powered = True
4108

    
4109
    # for re-adds, reset the offline/drained/master-candidate flags;
4110
    # we need to reset here, otherwise offline would prevent RPC calls
4111
    # later in the procedure; this also means that if the re-add
4112
    # fails, we are left with a non-offlined, broken node
4113
    if self.op.readd:
4114
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4115
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4116
      # if we demote the node, we do cleanup later in the procedure
4117
      new_node.master_candidate = self.master_candidate
4118
      if self.changed_primary_ip:
4119
        new_node.primary_ip = self.op.primary_ip
4120

    
4121
    # copy the master/vm_capable flags
4122
    for attr in self._NFLAGS:
4123
      setattr(new_node, attr, getattr(self.op, attr))
4124

    
4125
    # notify the user about any possible mc promotion
4126
    if new_node.master_candidate:
4127
      self.LogInfo("Node will be a master candidate")
4128

    
4129
    if self.op.ndparams:
4130
      new_node.ndparams = self.op.ndparams
4131
    else:
4132
      new_node.ndparams = {}
4133

    
4134
    # check connectivity
4135
    result = self.rpc.call_version([node])[node]
4136
    result.Raise("Can't get version information from node %s" % node)
4137
    if constants.PROTOCOL_VERSION == result.payload:
4138
      logging.info("Communication to node %s fine, sw version %s match",
4139
                   node, result.payload)
4140
    else:
4141
      raise errors.OpExecError("Version mismatch master version %s,"
4142
                               " node version %s" %
4143
                               (constants.PROTOCOL_VERSION, result.payload))
4144

    
4145
    # Add node to our /etc/hosts, and add key to known_hosts
4146
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4147
      master_node = self.cfg.GetMasterNode()
4148
      result = self.rpc.call_etc_hosts_modify(master_node,
4149
                                              constants.ETC_HOSTS_ADD,
4150
                                              self.hostname.name,
4151
                                              self.hostname.ip)
4152
      result.Raise("Can't update hosts file with new host data")
4153

    
4154
    if new_node.secondary_ip != new_node.primary_ip:
4155
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4156
                               False)
4157

    
4158
    node_verify_list = [self.cfg.GetMasterNode()]
4159
    node_verify_param = {
4160
      constants.NV_NODELIST: [node],
4161
      # TODO: do a node-net-test as well?
4162
    }
4163

    
4164
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4165
                                       self.cfg.GetClusterName())
4166
    for verifier in node_verify_list:
4167
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4168
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4169
      if nl_payload:
4170
        for failed in nl_payload:
4171
          feedback_fn("ssh/hostname verification failed"
4172
                      " (checking from %s): %s" %
4173
                      (verifier, nl_payload[failed]))
4174
        raise errors.OpExecError("ssh/hostname verification failed.")
4175

    
4176
    if self.op.readd:
4177
      _RedistributeAncillaryFiles(self)
4178
      self.context.ReaddNode(new_node)
4179
      # make sure we redistribute the config
4180
      self.cfg.Update(new_node, feedback_fn)
4181
      # and make sure the new node will not have old files around
4182
      if not new_node.master_candidate:
4183
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4184
        msg = result.fail_msg
4185
        if msg:
4186
          self.LogWarning("Node failed to demote itself from master"
4187
                          " candidate status: %s" % msg)
4188
    else:
4189
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4190
                                  additional_vm=self.op.vm_capable)
4191
      self.context.AddNode(new_node, self.proc.GetECId())
4192

    
4193

    
4194
class LUSetNodeParams(LogicalUnit):
4195
  """Modifies the parameters of a node.
4196

4197
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4198
      to the node role (as _ROLE_*)
4199
  @cvar _R2F: a dictionary from node role to tuples of flags
4200
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4201

4202
  """
4203
  HPATH = "node-modify"
4204
  HTYPE = constants.HTYPE_NODE
4205
  REQ_BGL = False
4206
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4207
  _F2R = {
4208
    (True, False, False): _ROLE_CANDIDATE,
4209
    (False, True, False): _ROLE_DRAINED,
4210
    (False, False, True): _ROLE_OFFLINE,
4211
    (False, False, False): _ROLE_REGULAR,
4212
    }
4213
  _R2F = dict((v, k) for k, v in _F2R.items())
4214
  _FLAGS = ["master_candidate", "drained", "offline"]
4215

    
4216
  def CheckArguments(self):
4217
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4218
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4219
                self.op.master_capable, self.op.vm_capable,
4220
                self.op.secondary_ip, self.op.ndparams]
4221
    if all_mods.count(None) == len(all_mods):
4222
      raise errors.OpPrereqError("Please pass at least one modification",
4223
                                 errors.ECODE_INVAL)
4224
    if all_mods.count(True) > 1:
4225
      raise errors.OpPrereqError("Can't set the node into more than one"
4226
                                 " state at the same time",
4227
                                 errors.ECODE_INVAL)
4228

    
4229
    # Boolean value that tells us whether we might be demoting from MC
4230
    self.might_demote = (self.op.master_candidate == False or
4231
                         self.op.offline == True or
4232
                         self.op.drained == True or
4233
                         self.op.master_capable == False)
4234

    
4235
    if self.op.secondary_ip:
4236
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4237
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4238
                                   " address" % self.op.secondary_ip,
4239
                                   errors.ECODE_INVAL)
4240

    
4241
    self.lock_all = self.op.auto_promote and self.might_demote
4242
    self.lock_instances = self.op.secondary_ip is not None
4243

    
4244
  def ExpandNames(self):
4245
    if self.lock_all:
4246
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4247
    else:
4248
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4249

    
4250
    if self.lock_instances:
4251
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4252

    
4253
  def DeclareLocks(self, level):
4254
    # If we have locked all instances, before waiting to lock nodes, release
4255
    # all the ones living on nodes unrelated to the current operation.
4256
    if level == locking.LEVEL_NODE and self.lock_instances:
4257
      instances_release = []
4258
      instances_keep = []
4259
      self.affected_instances = []
4260
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4261
        for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4262
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4263
          i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4264
          if i_mirrored and self.op.node_name in instance.all_nodes:
4265
            instances_keep.append(instance_name)
4266
            self.affected_instances.append(instance)
4267
          else:
4268
            instances_release.append(instance_name)
4269
        if instances_release:
4270
          self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4271
          self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4272

    
4273
  def BuildHooksEnv(self):
4274
    """Build hooks env.
4275

4276
    This runs on the master node.
4277

4278
    """
4279
    env = {
4280
      "OP_TARGET": self.op.node_name,
4281
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4282
      "OFFLINE": str(self.op.offline),
4283
      "DRAINED": str(self.op.drained),
4284
      "MASTER_CAPABLE": str(self.op.master_capable),
4285
      "VM_CAPABLE": str(self.op.vm_capable),
4286
      }
4287
    nl = [self.cfg.GetMasterNode(),
4288
          self.op.node_name]
4289
    return env, nl, nl
4290

    
4291
  def CheckPrereq(self):
4292
    """Check prerequisites.
4293

4294
    This only checks the instance list against the existing names.
4295

4296
    """
4297
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4298

    
4299
    if (self.op.master_candidate is not None or
4300
        self.op.drained is not None or
4301
        self.op.offline is not None):
4302
      # we can't change the master's node flags
4303
      if self.op.node_name == self.cfg.GetMasterNode():
4304
        raise errors.OpPrereqError("The master role can be changed"
4305
                                   " only via master-failover",
4306
                                   errors.ECODE_INVAL)
4307

    
4308
    if self.op.master_candidate and not node.master_capable:
4309
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4310
                                 " it a master candidate" % node.name,
4311
                                 errors.ECODE_STATE)
4312

    
4313
    if self.op.vm_capable == False:
4314
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4315
      if ipri or isec:
4316
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4317
                                   " the vm_capable flag" % node.name,
4318
                                   errors.ECODE_STATE)
4319

    
4320
    if node.master_candidate and self.might_demote and not self.lock_all:
4321
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
4322
      # check if after removing the current node, we're missing master
4323
      # candidates
4324
      (mc_remaining, mc_should, _) = \
4325
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4326
      if mc_remaining < mc_should:
4327
        raise errors.OpPrereqError("Not enough master candidates, please"
4328
                                   " pass auto_promote to allow promotion",
4329
                                   errors.ECODE_STATE)
4330

    
4331
    self.old_flags = old_flags = (node.master_candidate,
4332
                                  node.drained, node.offline)
4333
    assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4334
    self.old_role = old_role = self._F2R[old_flags]
4335

    
4336
    # Check for ineffective changes
4337
    for attr in self._FLAGS:
4338
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4339
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4340
        setattr(self.op, attr, None)
4341

    
4342
    # Past this point, any flag change to False means a transition
4343
    # away from the respective state, as only real changes are kept
4344

    
4345
    # TODO: We might query the real power state if it supports OOB
4346
    if _SupportsOob(self.cfg, node):
4347
      if self.op.offline is False and not (node.powered or
4348
                                           self.op.powered == True):
4349
        raise errors.OpPrereqError(("Please power on node %s first before you"
4350
                                    " can reset offline state") %
4351
                                   self.op.node_name)
4352
    elif self.op.powered is not None:
4353
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
4354
                                  " which does not support out-of-band"
4355
                                  " handling") % self.op.node_name)
4356

    
4357
    # If we're being deofflined/drained, we'll MC ourself if needed
4358
    if (self.op.drained == False or self.op.offline == False or
4359
        (self.op.master_capable and not node.master_capable)):
4360
      if _DecideSelfPromotion(self):
4361
        self.op.master_candidate = True
4362
        self.LogInfo("Auto-promoting node to master candidate")
4363

    
4364
    # If we're no longer master capable, we'll demote ourselves from MC
4365
    if self.op.master_capable == False and node.master_candidate:
4366
      self.LogInfo("Demoting from master candidate")
4367
      self.op.master_candidate = False
4368

    
4369
    # Compute new role
4370
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4371
    if self.op.master_candidate:
4372
      new_role = self._ROLE_CANDIDATE
4373
    elif self.op.drained:
4374
      new_role = self._ROLE_DRAINED
4375
    elif self.op.offline:
4376
      new_role = self._ROLE_OFFLINE
4377
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4378
      # False is still in new flags, which means we're un-setting (the
4379
      # only) True flag
4380
      new_role = self._ROLE_REGULAR
4381
    else: # no new flags, nothing, keep old role
4382
      new_role = old_role
4383

    
4384
    self.new_role = new_role
4385

    
4386
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4387
      # Trying to transition out of offline status
4388
      result = self.rpc.call_version([node.name])[node.name]
4389
      if result.fail_msg:
4390
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4391
                                   " to report its version: %s" %
4392
                                   (node.name, result.fail_msg),
4393
                                   errors.ECODE_STATE)
4394
      else:
4395
        self.LogWarning("Transitioning node from offline to online state"
4396
                        " without using re-add. Please make sure the node"
4397
                        " is healthy!")
4398

    
4399
    if self.op.secondary_ip:
4400
      # Ok even without locking, because this can't be changed by any LU
4401
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4402
      master_singlehomed = master.secondary_ip == master.primary_ip
4403
      if master_singlehomed and self.op.secondary_ip:
4404
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4405
                                   " homed cluster", errors.ECODE_INVAL)
4406

    
4407
      if node.offline:
4408
        if self.affected_instances:
4409
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4410
                                     " node has instances (%s) configured"
4411
                                     " to use it" % self.affected_instances)
4412
      else:
4413
        # On online nodes, check that no instances are running, and that
4414
        # the node has the new ip and we can reach it.
4415
        for instance in self.affected_instances:
4416
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4417

    
4418
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4419
        if master.name != node.name:
4420
          # check reachability from master secondary ip to new secondary ip
4421
          if not netutils.TcpPing(self.op.secondary_ip,
4422
                                  constants.DEFAULT_NODED_PORT,
4423
                                  source=master.secondary_ip):
4424
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4425
                                       " based ping to node daemon port",
4426
                                       errors.ECODE_ENVIRON)
4427

    
4428
    if self.op.ndparams:
4429
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4430
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4431
      self.new_ndparams = new_ndparams
4432

    
4433
  def Exec(self, feedback_fn):
4434
    """Modifies a node.
4435

4436
    """
4437
    node = self.node
4438
    old_role = self.old_role
4439
    new_role = self.new_role
4440

    
4441
    result = []
4442

    
4443
    if self.op.ndparams:
4444
      node.ndparams = self.new_ndparams
4445

    
4446
    if self.op.powered is not None:
4447
      node.powered = self.op.powered
4448

    
4449
    for attr in ["master_capable", "vm_capable"]:
4450
      val = getattr(self.op, attr)
4451
      if val is not None:
4452
        setattr(node, attr, val)
4453
        result.append((attr, str(val)))
4454

    
4455
    if new_role != old_role:
4456
      # Tell the node to demote itself, if no longer MC and not offline
4457
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4458
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4459
        if msg:
4460
          self.LogWarning("Node failed to demote itself: %s", msg)
4461

    
4462
      new_flags = self._R2F[new_role]
4463
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4464
        if of != nf:
4465
          result.append((desc, str(nf)))
4466
      (node.master_candidate, node.drained, node.offline) = new_flags
4467

    
4468
      # we locked all nodes, we adjust the CP before updating this node
4469
      if self.lock_all:
4470
        _AdjustCandidatePool(self, [node.name])
4471

    
4472
    if self.op.secondary_ip:
4473
      node.secondary_ip = self.op.secondary_ip
4474
      result.append(("secondary_ip", self.op.secondary_ip))
4475

    
4476
    # this will trigger configuration file update, if needed
4477
    self.cfg.Update(node, feedback_fn)
4478

    
4479
    # this will trigger job queue propagation or cleanup if the mc
4480
    # flag changed
4481
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4482
      self.context.ReaddNode(node)
4483

    
4484
    return result
4485

    
4486

    
4487
class LUPowercycleNode(NoHooksLU):
4488
  """Powercycles a node.
4489

4490
  """
4491
  REQ_BGL = False
4492

    
4493
  def CheckArguments(self):
4494
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4495
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4496
      raise errors.OpPrereqError("The node is the master and the force"
4497
                                 " parameter was not set",
4498
                                 errors.ECODE_INVAL)
4499

    
4500
  def ExpandNames(self):
4501
    """Locking for PowercycleNode.
4502

4503
    This is a last-resort option and shouldn't block on other
4504
    jobs. Therefore, we grab no locks.
4505

4506
    """
4507
    self.needed_locks = {}
4508

    
4509
  def Exec(self, feedback_fn):
4510
    """Reboots a node.
4511

4512
    """
4513
    result = self.rpc.call_node_powercycle(self.op.node_name,
4514
                                           self.cfg.GetHypervisorType())
4515
    result.Raise("Failed to schedule the reboot")
4516
    return result.payload
4517

    
4518

    
4519
class LUQueryClusterInfo(NoHooksLU):
4520
  """Query cluster configuration.
4521

4522
  """
4523
  REQ_BGL = False
4524

    
4525
  def ExpandNames(self):
4526
    self.needed_locks = {}
4527

    
4528
  def Exec(self, feedback_fn):
4529
    """Return cluster config.
4530

4531
    """
4532
    cluster = self.cfg.GetClusterInfo()
4533
    os_hvp = {}
4534

    
4535
    # Filter just for enabled hypervisors
4536
    for os_name, hv_dict in cluster.os_hvp.items():
4537
      os_hvp[os_name] = {}
4538
      for hv_name, hv_params in hv_dict.items():
4539
        if hv_name in cluster.enabled_hypervisors:
4540
          os_hvp[os_name][hv_name] = hv_params
4541

    
4542
    # Convert ip_family to ip_version
4543
    primary_ip_version = constants.IP4_VERSION
4544
    if cluster.primary_ip_family == netutils.IP6Address.family:
4545
      primary_ip_version = constants.IP6_VERSION
4546

    
4547
    result = {
4548
      "software_version": constants.RELEASE_VERSION,
4549
      "protocol_version": constants.PROTOCOL_VERSION,
4550
      "config_version": constants.CONFIG_VERSION,
4551
      "os_api_version": max(constants.OS_API_VERSIONS),
4552
      "export_version": constants.EXPORT_VERSION,
4553
      "architecture": (platform.architecture()[0], platform.machine()),
4554
      "name": cluster.cluster_name,
4555
      "master": cluster.master_node,
4556
      "default_hypervisor": cluster.enabled_hypervisors[0],
4557
      "enabled_hypervisors": cluster.enabled_hypervisors,
4558
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4559
                        for hypervisor_name in cluster.enabled_hypervisors]),
4560
      "os_hvp": os_hvp,
4561
      "beparams": cluster.beparams,
4562
      "osparams": cluster.osparams,
4563
      "nicparams": cluster.nicparams,
4564
      "ndparams": cluster.ndparams,
4565
      "candidate_pool_size": cluster.candidate_pool_size,
4566
      "master_netdev": cluster.master_netdev,
4567
      "volume_group_name": cluster.volume_group_name,
4568
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4569
      "file_storage_dir": cluster.file_storage_dir,
4570
      "maintain_node_health": cluster.maintain_node_health,
4571
      "ctime": cluster.ctime,
4572
      "mtime": cluster.mtime,
4573
      "uuid": cluster.uuid,
4574
      "tags": list(cluster.GetTags()),
4575
      "uid_pool": cluster.uid_pool,
4576
      "default_iallocator": cluster.default_iallocator,
4577
      "reserved_lvs": cluster.reserved_lvs,
4578
      "primary_ip_version": primary_ip_version,
4579
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4580
      }
4581

    
4582
    return result
4583

    
4584

    
4585
class LUQueryConfigValues(NoHooksLU):
4586
  """Return configuration values.
4587

4588
  """
4589
  REQ_BGL = False
4590
  _FIELDS_DYNAMIC = utils.FieldSet()
4591
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4592
                                  "watcher_pause", "volume_group_name")
4593

    
4594
  def CheckArguments(self):
4595
    _CheckOutputFields(static=self._FIELDS_STATIC,
4596
                       dynamic=self._FIELDS_DYNAMIC,
4597
                       selected=self.op.output_fields)
4598

    
4599
  def ExpandNames(self):
4600
    self.needed_locks = {}
4601

    
4602
  def Exec(self, feedback_fn):
4603
    """Dump a representation of the cluster config to the standard output.
4604

4605
    """
4606
    values = []
4607
    for field in self.op.output_fields:
4608
      if field == "cluster_name":
4609
        entry = self.cfg.GetClusterName()
4610
      elif field == "master_node":
4611
        entry = self.cfg.GetMasterNode()
4612
      elif field == "drain_flag":
4613
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4614
      elif field == "watcher_pause":
4615
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4616
      elif field == "volume_group_name":
4617
        entry = self.cfg.GetVGName()
4618
      else:
4619
        raise errors.ParameterError(field)
4620
      values.append(entry)
4621
    return values
4622

    
4623

    
4624
class LUActivateInstanceDisks(NoHooksLU):
4625
  """Bring up an instance's disks.
4626

4627
  """
4628
  REQ_BGL = False
4629

    
4630
  def ExpandNames(self):
4631
    self._ExpandAndLockInstance()
4632
    self.needed_locks[locking.LEVEL_NODE] = []
4633
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4634

    
4635
  def DeclareLocks(self, level):
4636
    if level == locking.LEVEL_NODE:
4637
      self._LockInstancesNodes()
4638

    
4639
  def CheckPrereq(self):
4640
    """Check prerequisites.
4641

4642
    This checks that the instance is in the cluster.
4643

4644
    """
4645
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4646
    assert self.instance is not None, \
4647
      "Cannot retrieve locked instance %s" % self.op.instance_name
4648
    _CheckNodeOnline(self, self.instance.primary_node)
4649

    
4650
  def Exec(self, feedback_fn):
4651
    """Activate the disks.
4652

4653
    """
4654
    disks_ok, disks_info = \
4655
              _AssembleInstanceDisks(self, self.instance,
4656
                                     ignore_size=self.op.ignore_size)
4657
    if not disks_ok:
4658
      raise errors.OpExecError("Cannot activate block devices")
4659

    
4660
    return disks_info
4661

    
4662

    
4663
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4664
                           ignore_size=False):
4665
  """Prepare the block devices for an instance.
4666

4667
  This sets up the block devices on all nodes.
4668

4669
  @type lu: L{LogicalUnit}
4670
  @param lu: the logical unit on whose behalf we execute
4671
  @type instance: L{objects.Instance}
4672
  @param instance: the instance for whose disks we assemble
4673
  @type disks: list of L{objects.Disk} or None
4674
  @param disks: which disks to assemble (or all, if None)
4675
  @type ignore_secondaries: boolean
4676
  @param ignore_secondaries: if true, errors on secondary nodes
4677
      won't result in an error return from the function
4678
  @type ignore_size: boolean
4679
  @param ignore_size: if true, the current known size of the disk
4680
      will not be used during the disk activation, useful for cases
4681
      when the size is wrong
4682
  @return: False if the operation failed, otherwise a list of
4683
      (host, instance_visible_name, node_visible_name)
4684
      with the mapping from node devices to instance devices
4685

4686
  """
4687
  device_info = []
4688
  disks_ok = True
4689
  iname = instance.name
4690
  disks = _ExpandCheckDisks(instance, disks)
4691

    
4692
  # With the two passes mechanism we try to reduce the window of
4693
  # opportunity for the race condition of switching DRBD to primary
4694
  # before handshaking occured, but we do not eliminate it
4695

    
4696
  # The proper fix would be to wait (with some limits) until the
4697
  # connection has been made and drbd transitions from WFConnection
4698
  # into any other network-connected state (Connected, SyncTarget,
4699
  # SyncSource, etc.)
4700

    
4701
  # 1st pass, assemble on all nodes in secondary mode
4702
  for inst_disk in disks:
4703
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4704
      if ignore_size:
4705
        node_disk = node_disk.Copy()
4706
        node_disk.UnsetSize()
4707
      lu.cfg.SetDiskID(node_disk, node)
4708
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4709
      msg = result.fail_msg
4710
      if msg:
4711
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4712
                           " (is_primary=False, pass=1): %s",
4713
                           inst_disk.iv_name, node, msg)
4714
        if not ignore_secondaries:
4715
          disks_ok = False
4716

    
4717
  # FIXME: race condition on drbd migration to primary
4718

    
4719
  # 2nd pass, do only the primary node
4720
  for inst_disk in disks:
4721
    dev_path = None
4722

    
4723
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4724
      if node != instance.primary_node:
4725
        continue
4726
      if ignore_size:
4727
        node_disk = node_disk.Copy()
4728
        node_disk.UnsetSize()
4729
      lu.cfg.SetDiskID(node_disk, node)
4730
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4731
      msg = result.fail_msg
4732
      if msg:
4733
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4734
                           " (is_primary=True, pass=2): %s",
4735
                           inst_disk.iv_name, node, msg)
4736
        disks_ok = False
4737
      else:
4738
        dev_path = result.payload
4739

    
4740
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4741

    
4742
  # leave the disks configured for the primary node
4743
  # this is a workaround that would be fixed better by
4744
  # improving the logical/physical id handling
4745
  for disk in disks:
4746
    lu.cfg.SetDiskID(disk, instance.primary_node)
4747

    
4748
  return disks_ok, device_info
4749

    
4750

    
4751
def _StartInstanceDisks(lu, instance, force):
4752
  """Start the disks of an instance.
4753

4754
  """
4755
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4756
                                           ignore_secondaries=force)
4757
  if not disks_ok:
4758
    _ShutdownInstanceDisks(lu, instance)
4759
    if force is not None and not force:
4760
      lu.proc.LogWarning("", hint="If the message above refers to a"
4761
                         " secondary node,"
4762
                         " you can retry the operation using '--force'.")
4763
    raise errors.OpExecError("Disk consistency error")
4764

    
4765

    
4766
class LUDeactivateInstanceDisks(NoHooksLU):
4767
  """Shutdown an instance's disks.
4768

4769
  """
4770
  REQ_BGL = False
4771

    
4772
  def ExpandNames(self):
4773
    self._ExpandAndLockInstance()
4774
    self.needed_locks[locking.LEVEL_NODE] = []
4775
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4776

    
4777
  def DeclareLocks(self, level):
4778
    if level == locking.LEVEL_NODE:
4779
      self._LockInstancesNodes()
4780

    
4781
  def CheckPrereq(self):
4782
    """Check prerequisites.
4783

4784
    This checks that the instance is in the cluster.
4785

4786
    """
4787
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4788
    assert self.instance is not None, \
4789
      "Cannot retrieve locked instance %s" % self.op.instance_name
4790

    
4791
  def Exec(self, feedback_fn):
4792
    """Deactivate the disks
4793

4794
    """
4795
    instance = self.instance
4796
    _SafeShutdownInstanceDisks(self, instance)
4797

    
4798

    
4799
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4800
  """Shutdown block devices of an instance.
4801

4802
  This function checks if an instance is running, before calling
4803
  _ShutdownInstanceDisks.
4804

4805
  """
4806
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4807
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4808

    
4809

    
4810
def _ExpandCheckDisks(instance, disks):
4811
  """Return the instance disks selected by the disks list
4812

4813
  @type disks: list of L{objects.Disk} or None
4814
  @param disks: selected disks
4815
  @rtype: list of L{objects.Disk}
4816
  @return: selected instance disks to act on
4817

4818
  """
4819
  if disks is None:
4820
    return instance.disks
4821
  else:
4822
    if not set(disks).issubset(instance.disks):
4823
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4824
                                   " target instance")
4825
    return disks
4826

    
4827

    
4828
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4829
  """Shutdown block devices of an instance.
4830

4831
  This does the shutdown on all nodes of the instance.
4832

4833
  If the ignore_primary is false, errors on the primary node are
4834
  ignored.
4835

4836
  """
4837
  all_result = True
4838
  disks = _ExpandCheckDisks(instance, disks)
4839

    
4840
  for disk in disks:
4841
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4842
      lu.cfg.SetDiskID(top_disk, node)
4843
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4844
      msg = result.fail_msg
4845
      if msg:
4846
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4847
                      disk.iv_name, node, msg)
4848
        if ((node == instance.primary_node and not ignore_primary) or
4849
            (node != instance.primary_node and not result.offline)):
4850
          all_result = False
4851
  return all_result
4852

    
4853

    
4854
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4855
  """Checks if a node has enough free memory.
4856

4857
  This function check if a given node has the needed amount of free
4858
  memory. In case the node has less memory or we cannot get the
4859
  information from the node, this function raise an OpPrereqError
4860
  exception.
4861

4862
  @type lu: C{LogicalUnit}
4863
  @param lu: a logical unit from which we get configuration data
4864
  @type node: C{str}
4865
  @param node: the node to check
4866
  @type reason: C{str}
4867
  @param reason: string to use in the error message
4868
  @type requested: C{int}
4869
  @param requested: the amount of memory in MiB to check for
4870
  @type hypervisor_name: C{str}
4871
  @param hypervisor_name: the hypervisor to ask for memory stats
4872
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4873
      we cannot check the node
4874

4875
  """
4876
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
4877
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4878
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4879
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4880
  if not isinstance(free_mem, int):
4881
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4882
                               " was '%s'" % (node, free_mem),
4883
                               errors.ECODE_ENVIRON)
4884
  if requested > free_mem:
4885
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4886
                               " needed %s MiB, available %s MiB" %
4887
                               (node, reason, requested, free_mem),
4888
                               errors.ECODE_NORES)
4889

    
4890

    
4891
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
4892
  """Checks if nodes have enough free disk space in the all VGs.
4893

4894
  This function check if all given nodes have the needed amount of
4895
  free disk. In case any node has less disk or we cannot get the
4896
  information from the node, this function raise an OpPrereqError
4897
  exception.
4898

4899
  @type lu: C{LogicalUnit}
4900
  @param lu: a logical unit from which we get configuration data
4901
  @type nodenames: C{list}
4902
  @param nodenames: the list of node names to check
4903
  @type req_sizes: C{dict}
4904
  @param req_sizes: the hash of vg and corresponding amount of disk in
4905
      MiB to check for
4906
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
4907
      or we cannot check the node
4908

4909
  """
4910
  for vg, req_size in req_sizes.items():
4911
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
4912

    
4913

    
4914
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
4915
  """Checks if nodes have enough free disk space in the specified VG.
4916

4917
  This function check if all given nodes have the needed amount of
4918
  free disk. In case any node has less disk or we cannot get the
4919
  information from the node, this function raise an OpPrereqError
4920
  exception.
4921

4922
  @type lu: C{LogicalUnit}
4923
  @param lu: a logical unit from which we get configuration data
4924
  @type nodenames: C{list}
4925
  @param nodenames: the list of node names to check
4926
  @type vg: C{str}
4927
  @param vg: the volume group to check
4928
  @type requested: C{int}
4929
  @param requested: the amount of disk in MiB to check for
4930
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
4931
      or we cannot check the node
4932

4933
  """
4934
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
4935
  for node in nodenames:
4936
    info = nodeinfo[node]
4937
    info.Raise("Cannot get current information from node %s" % node,
4938
               prereq=True, ecode=errors.ECODE_ENVIRON)
4939
    vg_free = info.payload.get("vg_free", None)
4940
    if not isinstance(vg_free, int):
4941
      raise errors.OpPrereqError("Can't compute free disk space on node"
4942
                                 " %s for vg %s, result was '%s'" %
4943
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
4944
    if requested > vg_free:
4945
      raise errors.OpPrereqError("Not enough disk space on target node %s"
4946
                                 " vg %s: required %d MiB, available %d MiB" %
4947
                                 (node, vg, requested, vg_free),
4948
                                 errors.ECODE_NORES)
4949

    
4950

    
4951
class LUStartupInstance(LogicalUnit):
4952
  """Starts an instance.
4953

4954
  """
4955
  HPATH = "instance-start"
4956
  HTYPE = constants.HTYPE_INSTANCE
4957
  REQ_BGL = False
4958

    
4959
  def CheckArguments(self):
4960
    # extra beparams
4961
    if self.op.beparams:
4962
      # fill the beparams dict
4963
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4964

    
4965
  def ExpandNames(self):
4966
    self._ExpandAndLockInstance()
4967

    
4968
  def BuildHooksEnv(self):
4969
    """Build hooks env.
4970

4971
    This runs on master, primary and secondary nodes of the instance.
4972

4973
    """
4974
    env = {
4975
      "FORCE": self.op.force,
4976
      }
4977
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4978
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4979
    return env, nl, nl
4980

    
4981
  def CheckPrereq(self):
4982
    """Check prerequisites.
4983

4984
    This checks that the instance is in the cluster.
4985

4986
    """
4987
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4988
    assert self.instance is not None, \
4989
      "Cannot retrieve locked instance %s" % self.op.instance_name
4990

    
4991
    # extra hvparams
4992
    if self.op.hvparams:
4993
      # check hypervisor parameter syntax (locally)
4994
      cluster = self.cfg.GetClusterInfo()
4995
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4996
      filled_hvp = cluster.FillHV(instance)
4997
      filled_hvp.update(self.op.hvparams)
4998
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4999
      hv_type.CheckParameterSyntax(filled_hvp)
5000
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5001

    
5002
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5003

    
5004
    if self.primary_offline and self.op.ignore_offline_nodes:
5005
      self.proc.LogWarning("Ignoring offline primary node")
5006

    
5007
      if self.op.hvparams or self.op.beparams:
5008
        self.proc.LogWarning("Overridden parameters are ignored")
5009
    else:
5010
      _CheckNodeOnline(self, instance.primary_node)
5011

    
5012
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5013

    
5014
      # check bridges existence
5015
      _CheckInstanceBridgesExist(self, instance)
5016

    
5017
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5018
                                                instance.name,
5019
                                                instance.hypervisor)
5020
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5021
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5022
      if not remote_info.payload: # not running already
5023
        _CheckNodeFreeMemory(self, instance.primary_node,
5024
                             "starting instance %s" % instance.name,
5025
                             bep[constants.BE_MEMORY], instance.hypervisor)
5026

    
5027
  def Exec(self, feedback_fn):
5028
    """Start the instance.
5029

5030
    """
5031
    instance = self.instance
5032
    force = self.op.force
5033

    
5034
    self.cfg.MarkInstanceUp(instance.name)
5035

    
5036
    if self.primary_offline:
5037
      assert self.op.ignore_offline_nodes
5038
      self.proc.LogInfo("Primary node offline, marked instance as started")
5039
    else:
5040
      node_current = instance.primary_node
5041

    
5042
      _StartInstanceDisks(self, instance, force)
5043

    
5044
      result = self.rpc.call_instance_start(node_current, instance,
5045
                                            self.op.hvparams, self.op.beparams)
5046
      msg = result.fail_msg
5047
      if msg:
5048
        _ShutdownInstanceDisks(self, instance)
5049
        raise errors.OpExecError("Could not start instance: %s" % msg)
5050

    
5051

    
5052
class LURebootInstance(LogicalUnit):
5053
  """Reboot an instance.
5054

5055
  """
5056
  HPATH = "instance-reboot"
5057
  HTYPE = constants.HTYPE_INSTANCE
5058
  REQ_BGL = False
5059

    
5060
  def ExpandNames(self):
5061
    self._ExpandAndLockInstance()
5062

    
5063
  def BuildHooksEnv(self):
5064
    """Build hooks env.
5065

5066
    This runs on master, primary and secondary nodes of the instance.
5067

5068
    """
5069
    env = {
5070
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5071
      "REBOOT_TYPE": self.op.reboot_type,
5072
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5073
      }
5074
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5075
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5076
    return env, nl, nl
5077

    
5078
  def CheckPrereq(self):
5079
    """Check prerequisites.
5080

5081
    This checks that the instance is in the cluster.
5082

5083
    """
5084
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5085
    assert self.instance is not None, \
5086
      "Cannot retrieve locked instance %s" % self.op.instance_name
5087

    
5088
    _CheckNodeOnline(self, instance.primary_node)
5089

    
5090
    # check bridges existence
5091
    _CheckInstanceBridgesExist(self, instance)
5092

    
5093
  def Exec(self, feedback_fn):
5094
    """Reboot the instance.
5095

5096
    """
5097
    instance = self.instance
5098
    ignore_secondaries = self.op.ignore_secondaries
5099
    reboot_type = self.op.reboot_type
5100

    
5101
    node_current = instance.primary_node
5102

    
5103
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5104
                       constants.INSTANCE_REBOOT_HARD]:
5105
      for disk in instance.disks:
5106
        self.cfg.SetDiskID(disk, node_current)
5107
      result = self.rpc.call_instance_reboot(node_current, instance,
5108
                                             reboot_type,
5109
                                             self.op.shutdown_timeout)
5110
      result.Raise("Could not reboot instance")
5111
    else:
5112
      result = self.rpc.call_instance_shutdown(node_current, instance,
5113
                                               self.op.shutdown_timeout)
5114
      result.Raise("Could not shutdown instance for full reboot")
5115
      _ShutdownInstanceDisks(self, instance)
5116
      _StartInstanceDisks(self, instance, ignore_secondaries)
5117
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5118
      msg = result.fail_msg
5119
      if msg:
5120
        _ShutdownInstanceDisks(self, instance)
5121
        raise errors.OpExecError("Could not start instance for"
5122
                                 " full reboot: %s" % msg)
5123

    
5124
    self.cfg.MarkInstanceUp(instance.name)
5125

    
5126

    
5127
class LUShutdownInstance(LogicalUnit):
5128
  """Shutdown an instance.
5129

5130
  """
5131
  HPATH = "instance-stop"
5132
  HTYPE = constants.HTYPE_INSTANCE
5133
  REQ_BGL = False
5134

    
5135
  def ExpandNames(self):
5136
    self._ExpandAndLockInstance()
5137

    
5138
  def BuildHooksEnv(self):
5139
    """Build hooks env.
5140

5141
    This runs on master, primary and secondary nodes of the instance.
5142

5143
    """
5144
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5145
    env["TIMEOUT"] = self.op.timeout
5146
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5147
    return env, nl, nl
5148

    
5149
  def CheckPrereq(self):
5150
    """Check prerequisites.
5151

5152
    This checks that the instance is in the cluster.
5153

5154
    """
5155
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5156
    assert self.instance is not None, \
5157
      "Cannot retrieve locked instance %s" % self.op.instance_name
5158

    
5159
    self.primary_offline = \
5160
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5161

    
5162
    if self.primary_offline and self.op.ignore_offline_nodes:
5163
      self.proc.LogWarning("Ignoring offline primary node")
5164
    else:
5165
      _CheckNodeOnline(self, self.instance.primary_node)
5166

    
5167
  def Exec(self, feedback_fn):
5168
    """Shutdown the instance.
5169

5170
    """
5171
    instance = self.instance
5172
    node_current = instance.primary_node
5173
    timeout = self.op.timeout
5174

    
5175
    self.cfg.MarkInstanceDown(instance.name)
5176

    
5177
    if self.primary_offline:
5178
      assert self.op.ignore_offline_nodes
5179
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5180
    else:
5181
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5182
      msg = result.fail_msg
5183
      if msg:
5184
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5185

    
5186
      _ShutdownInstanceDisks(self, instance)
5187

    
5188

    
5189
class LUReinstallInstance(LogicalUnit):
5190
  """Reinstall an instance.
5191

5192
  """
5193
  HPATH = "instance-reinstall"
5194
  HTYPE = constants.HTYPE_INSTANCE
5195
  REQ_BGL = False
5196

    
5197
  def ExpandNames(self):
5198
    self._ExpandAndLockInstance()
5199

    
5200
  def BuildHooksEnv(self):
5201
    """Build hooks env.
5202

5203
    This runs on master, primary and secondary nodes of the instance.
5204

5205
    """
5206
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5207
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5208
    return env, nl, nl
5209

    
5210
  def CheckPrereq(self):
5211
    """Check prerequisites.
5212

5213
    This checks that the instance is in the cluster and is not running.
5214

5215
    """
5216
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5217
    assert instance is not None, \
5218
      "Cannot retrieve locked instance %s" % self.op.instance_name
5219
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5220
                     " offline, cannot reinstall")
5221
    for node in instance.secondary_nodes:
5222
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5223
                       " cannot reinstall")
5224

    
5225
    if instance.disk_template == constants.DT_DISKLESS:
5226
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5227
                                 self.op.instance_name,
5228
                                 errors.ECODE_INVAL)
5229
    _CheckInstanceDown(self, instance, "cannot reinstall")
5230

    
5231
    if self.op.os_type is not None:
5232
      # OS verification
5233
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5234
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5235
      instance_os = self.op.os_type
5236
    else:
5237
      instance_os = instance.os
5238

    
5239
    nodelist = list(instance.all_nodes)
5240

    
5241
    if self.op.osparams:
5242
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5243
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5244
      self.os_inst = i_osdict # the new dict (without defaults)
5245
    else:
5246
      self.os_inst = None
5247

    
5248
    self.instance = instance
5249

    
5250
  def Exec(self, feedback_fn):
5251
    """Reinstall the instance.
5252

5253
    """
5254
    inst = self.instance
5255

    
5256
    if self.op.os_type is not None:
5257
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5258
      inst.os = self.op.os_type
5259
      # Write to configuration
5260
      self.cfg.Update(inst, feedback_fn)
5261

    
5262
    _StartInstanceDisks(self, inst, None)
5263
    try:
5264
      feedback_fn("Running the instance OS create scripts...")
5265
      # FIXME: pass debug option from opcode to backend
5266
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5267
                                             self.op.debug_level,
5268
                                             osparams=self.os_inst)
5269
      result.Raise("Could not install OS for instance %s on node %s" %
5270
                   (inst.name, inst.primary_node))
5271
    finally:
5272
      _ShutdownInstanceDisks(self, inst)
5273

    
5274

    
5275
class LURecreateInstanceDisks(LogicalUnit):
5276
  """Recreate an instance's missing disks.
5277

5278
  """
5279
  HPATH = "instance-recreate-disks"
5280
  HTYPE = constants.HTYPE_INSTANCE
5281
  REQ_BGL = False
5282

    
5283
  def ExpandNames(self):
5284
    self._ExpandAndLockInstance()
5285

    
5286
  def BuildHooksEnv(self):
5287
    """Build hooks env.
5288

5289
    This runs on master, primary and secondary nodes of the instance.
5290

5291
    """
5292
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5293
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5294
    return env, nl, nl
5295

    
5296
  def CheckPrereq(self):
5297
    """Check prerequisites.
5298

5299
    This checks that the instance is in the cluster and is not running.
5300

5301
    """
5302
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5303
    assert instance is not None, \
5304
      "Cannot retrieve locked instance %s" % self.op.instance_name
5305
    _CheckNodeOnline(self, instance.primary_node)
5306

    
5307
    if instance.disk_template == constants.DT_DISKLESS:
5308
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5309
                                 self.op.instance_name, errors.ECODE_INVAL)
5310
    _CheckInstanceDown(self, instance, "cannot recreate disks")
5311

    
5312
    if not self.op.disks:
5313
      self.op.disks = range(len(instance.disks))
5314
    else:
5315
      for idx in self.op.disks:
5316
        if idx >= len(instance.disks):
5317
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5318
                                     errors.ECODE_INVAL)
5319

    
5320
    self.instance = instance
5321

    
5322
  def Exec(self, feedback_fn):
5323
    """Recreate the disks.
5324

5325
    """
5326
    to_skip = []
5327
    for idx, _ in enumerate(self.instance.disks):
5328
      if idx not in self.op.disks: # disk idx has not been passed in
5329
        to_skip.append(idx)
5330
        continue
5331

    
5332
    _CreateDisks(self, self.instance, to_skip=to_skip)
5333

    
5334

    
5335
class LURenameInstance(LogicalUnit):
5336
  """Rename an instance.
5337

5338
  """
5339
  HPATH = "instance-rename"
5340
  HTYPE = constants.HTYPE_INSTANCE
5341

    
5342
  def CheckArguments(self):
5343
    """Check arguments.
5344

5345
    """
5346
    if self.op.ip_check and not self.op.name_check:
5347
      # TODO: make the ip check more flexible and not depend on the name check
5348
      raise errors.OpPrereqError("Cannot do ip check without a name check",
5349
                                 errors.ECODE_INVAL)
5350

    
5351
  def BuildHooksEnv(self):
5352
    """Build hooks env.
5353

5354
    This runs on master, primary and secondary nodes of the instance.
5355

5356
    """
5357
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5358
    env["INSTANCE_NEW_NAME"] = self.op.new_name
5359
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5360
    return env, nl, nl
5361

    
5362
  def CheckPrereq(self):
5363
    """Check prerequisites.
5364

5365
    This checks that the instance is in the cluster and is not running.
5366

5367
    """
5368
    self.op.instance_name = _ExpandInstanceName(self.cfg,
5369
                                                self.op.instance_name)
5370
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5371
    assert instance is not None
5372
    _CheckNodeOnline(self, instance.primary_node)
5373
    _CheckInstanceDown(self, instance, "cannot rename")
5374
    self.instance = instance
5375

    
5376
    new_name = self.op.new_name
5377
    if self.op.name_check:
5378
      hostname = netutils.GetHostname(name=new_name)
5379
      self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5380
                   hostname.name)
5381
      new_name = self.op.new_name = hostname.name
5382
      if (self.op.ip_check and
5383
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5384
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5385
                                   (hostname.ip, new_name),
5386
                                   errors.ECODE_NOTUNIQUE)
5387

    
5388
    instance_list = self.cfg.GetInstanceList()
5389
    if new_name in instance_list and new_name != instance.name:
5390
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5391
                                 new_name, errors.ECODE_EXISTS)
5392

    
5393
  def Exec(self, feedback_fn):
5394
    """Rename the instance.
5395

5396
    """
5397
    inst = self.instance
5398
    old_name = inst.name
5399

    
5400
    rename_file_storage = False
5401
    if (inst.disk_template == constants.DT_FILE and
5402
        self.op.new_name != inst.name):
5403
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5404
      rename_file_storage = True
5405

    
5406
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5407
    # Change the instance lock. This is definitely safe while we hold the BGL
5408
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5409
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5410

    
5411
    # re-read the instance from the configuration after rename
5412
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5413

    
5414
    if rename_file_storage:
5415
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5416
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5417
                                                     old_file_storage_dir,
5418
                                                     new_file_storage_dir)
5419
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5420
                   " (but the instance has been renamed in Ganeti)" %
5421
                   (inst.primary_node, old_file_storage_dir,
5422
                    new_file_storage_dir))
5423

    
5424
    _StartInstanceDisks(self, inst, None)
5425
    try:
5426
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5427
                                                 old_name, self.op.debug_level)
5428
      msg = result.fail_msg
5429
      if msg:
5430
        msg = ("Could not run OS rename script for instance %s on node %s"
5431
               " (but the instance has been renamed in Ganeti): %s" %
5432
               (inst.name, inst.primary_node, msg))
5433
        self.proc.LogWarning(msg)
5434
    finally:
5435
      _ShutdownInstanceDisks(self, inst)
5436

    
5437
    return inst.name
5438

    
5439

    
5440
class LURemoveInstance(LogicalUnit):
5441
  """Remove an instance.
5442

5443
  """
5444
  HPATH = "instance-remove"
5445
  HTYPE = constants.HTYPE_INSTANCE
5446
  REQ_BGL = False
5447

    
5448
  def ExpandNames(self):
5449
    self._ExpandAndLockInstance()
5450
    self.needed_locks[locking.LEVEL_NODE] = []
5451
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5452

    
5453
  def DeclareLocks(self, level):
5454
    if level == locking.LEVEL_NODE:
5455
      self._LockInstancesNodes()
5456

    
5457
  def BuildHooksEnv(self):
5458
    """Build hooks env.
5459

5460
    This runs on master, primary and secondary nodes of the instance.
5461

5462
    """
5463
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5464
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5465
    nl = [self.cfg.GetMasterNode()]
5466
    nl_post = list(self.instance.all_nodes) + nl
5467
    return env, nl, nl_post
5468

    
5469
  def CheckPrereq(self):
5470
    """Check prerequisites.
5471

5472
    This checks that the instance is in the cluster.
5473

5474
    """
5475
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5476
    assert self.instance is not None, \
5477
      "Cannot retrieve locked instance %s" % self.op.instance_name
5478

    
5479
  def Exec(self, feedback_fn):
5480
    """Remove the instance.
5481

5482
    """
5483
    instance = self.instance
5484
    logging.info("Shutting down instance %s on node %s",
5485
                 instance.name, instance.primary_node)
5486

    
5487
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5488
                                             self.op.shutdown_timeout)
5489
    msg = result.fail_msg
5490
    if msg:
5491
      if self.op.ignore_failures:
5492
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5493
      else:
5494
        raise errors.OpExecError("Could not shutdown instance %s on"
5495
                                 " node %s: %s" %
5496
                                 (instance.name, instance.primary_node, msg))
5497

    
5498
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5499

    
5500

    
5501
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5502
  """Utility function to remove an instance.
5503

5504
  """
5505
  logging.info("Removing block devices for instance %s", instance.name)
5506

    
5507
  if not _RemoveDisks(lu, instance):
5508
    if not ignore_failures:
5509
      raise errors.OpExecError("Can't remove instance's disks")
5510
    feedback_fn("Warning: can't remove instance's disks")
5511

    
5512
  logging.info("Removing instance %s out of cluster config", instance.name)
5513

    
5514
  lu.cfg.RemoveInstance(instance.name)
5515

    
5516
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5517
    "Instance lock removal conflict"
5518

    
5519
  # Remove lock for the instance
5520
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5521

    
5522

    
5523
class LUQueryInstances(NoHooksLU):
5524
  """Logical unit for querying instances.
5525

5526
  """
5527
  # pylint: disable-msg=W0142
5528
  REQ_BGL = False
5529

    
5530
  def CheckArguments(self):
5531
    self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5532
                             self.op.use_locking)
5533

    
5534
  def ExpandNames(self):
5535
    self.iq.ExpandNames(self)
5536

    
5537
  def DeclareLocks(self, level):
5538
    self.iq.DeclareLocks(self, level)
5539

    
5540
  def Exec(self, feedback_fn):
5541
    return self.iq.OldStyleQuery(self)
5542

    
5543

    
5544
class LUFailoverInstance(LogicalUnit):
5545
  """Failover an instance.
5546

5547
  """
5548
  HPATH = "instance-failover"
5549
  HTYPE = constants.HTYPE_INSTANCE
5550
  REQ_BGL = False
5551

    
5552
  def ExpandNames(self):
5553
    self._ExpandAndLockInstance()
5554
    self.needed_locks[locking.LEVEL_NODE] = []
5555
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5556

    
5557
  def DeclareLocks(self, level):
5558
    if level == locking.LEVEL_NODE:
5559
      self._LockInstancesNodes()
5560

    
5561
  def BuildHooksEnv(self):
5562
    """Build hooks env.
5563

5564
    This runs on master, primary and secondary nodes of the instance.
5565

5566
    """
5567
    instance = self.instance
5568
    source_node = instance.primary_node
5569
    target_node = instance.secondary_nodes[0]
5570
    env = {
5571
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5572
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5573
      "OLD_PRIMARY": source_node,
5574
      "OLD_SECONDARY": target_node,
5575
      "NEW_PRIMARY": target_node,
5576
      "NEW_SECONDARY": source_node,
5577
      }
5578
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5579
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5580
    nl_post = list(nl)
5581
    nl_post.append(source_node)
5582
    return env, nl, nl_post
5583

    
5584
  def CheckPrereq(self):
5585
    """Check prerequisites.
5586

5587
    This checks that the instance is in the cluster.
5588

5589
    """
5590
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5591
    assert self.instance is not None, \
5592
      "Cannot retrieve locked instance %s" % self.op.instance_name
5593

    
5594
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5595
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5596
      raise errors.OpPrereqError("Instance's disk layout is not"
5597
                                 " network mirrored, cannot failover.",
5598
                                 errors.ECODE_STATE)
5599

    
5600
    secondary_nodes = instance.secondary_nodes
5601
    if not secondary_nodes:
5602
      raise errors.ProgrammerError("no secondary node but using "
5603
                                   "a mirrored disk template")
5604

    
5605
    target_node = secondary_nodes[0]
5606
    _CheckNodeOnline(self, target_node)
5607
    _CheckNodeNotDrained(self, target_node)
5608
    if instance.admin_up:
5609
      # check memory requirements on the secondary node
5610
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5611
                           instance.name, bep[constants.BE_MEMORY],
5612
                           instance.hypervisor)
5613
    else:
5614
      self.LogInfo("Not checking memory on the secondary node as"
5615
                   " instance will not be started")
5616

    
5617
    # check bridge existance
5618
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5619

    
5620
  def Exec(self, feedback_fn):
5621
    """Failover an instance.
5622

5623
    The failover is done by shutting it down on its present node and
5624
    starting it on the secondary.
5625

5626
    """
5627
    instance = self.instance
5628
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5629

    
5630
    source_node = instance.primary_node
5631
    target_node = instance.secondary_nodes[0]
5632

    
5633
    if instance.admin_up:
5634
      feedback_fn("* checking disk consistency between source and target")
5635
      for dev in instance.disks:
5636
        # for drbd, these are drbd over lvm
5637
        if not _CheckDiskConsistency(self, dev, target_node, False):
5638
          if not self.op.ignore_consistency:
5639
            raise errors.OpExecError("Disk %s is degraded on target node,"
5640
                                     " aborting failover." % dev.iv_name)
5641
    else:
5642
      feedback_fn("* not checking disk consistency as instance is not running")
5643

    
5644
    feedback_fn("* shutting down instance on source node")
5645
    logging.info("Shutting down instance %s on node %s",
5646
                 instance.name, source_node)
5647

    
5648
    result = self.rpc.call_instance_shutdown(source_node, instance,
5649
                                             self.op.shutdown_timeout)
5650
    msg = result.fail_msg
5651
    if msg:
5652
      if self.op.ignore_consistency or primary_node.offline:
5653
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5654
                             " Proceeding anyway. Please make sure node"
5655
                             " %s is down. Error details: %s",
5656
                             instance.name, source_node, source_node, msg)
5657
      else:
5658
        raise errors.OpExecError("Could not shutdown instance %s on"
5659
                                 " node %s: %s" %
5660
                                 (instance.name, source_node, msg))
5661

    
5662
    feedback_fn("* deactivating the instance's disks on source node")
5663
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5664
      raise errors.OpExecError("Can't shut down the instance's disks.")
5665

    
5666
    instance.primary_node = target_node
5667
    # distribute new instance config to the other nodes
5668
    self.cfg.Update(instance, feedback_fn)
5669

    
5670
    # Only start the instance if it's marked as up
5671
    if instance.admin_up:
5672
      feedback_fn("* activating the instance's disks on target node")
5673
      logging.info("Starting instance %s on node %s",
5674
                   instance.name, target_node)
5675

    
5676
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5677
                                           ignore_secondaries=True)
5678
      if not disks_ok:
5679
        _ShutdownInstanceDisks(self, instance)
5680
        raise errors.OpExecError("Can't activate the instance's disks")
5681

    
5682
      feedback_fn("* starting the instance on the target node")
5683
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5684
      msg = result.fail_msg
5685
      if msg:
5686
        _ShutdownInstanceDisks(self, instance)
5687
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5688
                                 (instance.name, target_node, msg))
5689

    
5690

    
5691
class LUMigrateInstance(LogicalUnit):
5692
  """Migrate an instance.
5693

5694
  This is migration without shutting down, compared to the failover,
5695
  which is done with shutdown.
5696

5697
  """
5698
  HPATH = "instance-migrate"
5699
  HTYPE = constants.HTYPE_INSTANCE
5700
  REQ_BGL = False
5701

    
5702
  def ExpandNames(self):
5703
    self._ExpandAndLockInstance()
5704

    
5705
    self.needed_locks[locking.LEVEL_NODE] = []
5706
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5707

    
5708
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5709
                                       self.op.cleanup)
5710
    self.tasklets = [self._migrater]
5711

    
5712
  def DeclareLocks(self, level):
5713
    if level == locking.LEVEL_NODE:
5714
      self._LockInstancesNodes()
5715

    
5716
  def BuildHooksEnv(self):
5717
    """Build hooks env.
5718

5719
    This runs on master, primary and secondary nodes of the instance.
5720

5721
    """
5722
    instance = self._migrater.instance
5723
    source_node = instance.primary_node
5724
    target_node = instance.secondary_nodes[0]
5725
    env = _BuildInstanceHookEnvByObject(self, instance)
5726
    env["MIGRATE_LIVE"] = self._migrater.live
5727
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5728
    env.update({
5729
        "OLD_PRIMARY": source_node,
5730
        "OLD_SECONDARY": target_node,
5731
        "NEW_PRIMARY": target_node,
5732
        "NEW_SECONDARY": source_node,
5733
        })
5734
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5735
    nl_post = list(nl)
5736
    nl_post.append(source_node)
5737
    return env, nl, nl_post
5738

    
5739

    
5740
class LUMoveInstance(LogicalUnit):
5741
  """Move an instance by data-copying.
5742

5743
  """
5744
  HPATH = "instance-move"
5745
  HTYPE = constants.HTYPE_INSTANCE
5746
  REQ_BGL = False
5747

    
5748
  def ExpandNames(self):
5749
    self._ExpandAndLockInstance()
5750
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5751
    self.op.target_node = target_node
5752
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5753
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5754

    
5755
  def DeclareLocks(self, level):
5756
    if level == locking.LEVEL_NODE:
5757
      self._LockInstancesNodes(primary_only=True)
5758

    
5759
  def BuildHooksEnv(self):
5760
    """Build hooks env.
5761

5762
    This runs on master, primary and secondary nodes of the instance.
5763

5764
    """
5765
    env = {
5766
      "TARGET_NODE": self.op.target_node,
5767
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5768
      }
5769
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5770
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5771
                                       self.op.target_node]
5772
    return env, nl, nl
5773

    
5774
  def CheckPrereq(self):
5775
    """Check prerequisites.
5776

5777
    This checks that the instance is in the cluster.
5778

5779
    """
5780
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5781
    assert self.instance is not None, \
5782
      "Cannot retrieve locked instance %s" % self.op.instance_name
5783

    
5784
    node = self.cfg.GetNodeInfo(self.op.target_node)
5785
    assert node is not None, \
5786
      "Cannot retrieve locked node %s" % self.op.target_node
5787

    
5788
    self.target_node = target_node = node.name
5789

    
5790
    if target_node == instance.primary_node:
5791
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5792
                                 (instance.name, target_node),
5793
                                 errors.ECODE_STATE)
5794

    
5795
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5796

    
5797
    for idx, dsk in enumerate(instance.disks):
5798
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5799
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5800
                                   " cannot copy" % idx, errors.ECODE_STATE)
5801

    
5802
    _CheckNodeOnline(self, target_node)
5803
    _CheckNodeNotDrained(self, target_node)
5804
    _CheckNodeVmCapable(self, target_node)
5805

    
5806
    if instance.admin_up:
5807
      # check memory requirements on the secondary node
5808
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5809
                           instance.name, bep[constants.BE_MEMORY],
5810
                           instance.hypervisor)
5811
    else:
5812
      self.LogInfo("Not checking memory on the secondary node as"
5813
                   " instance will not be started")
5814

    
5815
    # check bridge existance
5816
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5817

    
5818
  def Exec(self, feedback_fn):
5819
    """Move an instance.
5820

5821
    The move is done by shutting it down on its present node, copying
5822
    the data over (slow) and starting it on the new node.
5823

5824
    """
5825
    instance = self.instance
5826

    
5827
    source_node = instance.primary_node
5828
    target_node = self.target_node
5829

    
5830
    self.LogInfo("Shutting down instance %s on source node %s",
5831
                 instance.name, source_node)
5832

    
5833
    result = self.rpc.call_instance_shutdown(source_node, instance,
5834
                                             self.op.shutdown_timeout)
5835
    msg = result.fail_msg
5836
    if msg:
5837
      if self.op.ignore_consistency:
5838
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5839
                             " Proceeding anyway. Please make sure node"
5840
                             " %s is down. Error details: %s",
5841
                             instance.name, source_node, source_node, msg)
5842
      else:
5843
        raise errors.OpExecError("Could not shutdown instance %s on"
5844
                                 " node %s: %s" %
5845
                                 (instance.name, source_node, msg))
5846

    
5847
    # create the target disks
5848
    try:
5849
      _CreateDisks(self, instance, target_node=target_node)
5850
    except errors.OpExecError:
5851
      self.LogWarning("Device creation failed, reverting...")
5852
      try:
5853
        _RemoveDisks(self, instance, target_node=target_node)
5854
      finally:
5855
        self.cfg.ReleaseDRBDMinors(instance.name)
5856
        raise
5857

    
5858
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5859

    
5860
    errs = []
5861
    # activate, get path, copy the data over
5862
    for idx, disk in enumerate(instance.disks):
5863
      self.LogInfo("Copying data for disk %d", idx)
5864
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5865
                                               instance.name, True)
5866
      if result.fail_msg:
5867
        self.LogWarning("Can't assemble newly created disk %d: %s",
5868
                        idx, result.fail_msg)
5869
        errs.append(result.fail_msg)
5870
        break
5871
      dev_path = result.payload
5872
      result = self.rpc.call_blockdev_export(source_node, disk,
5873
                                             target_node, dev_path,
5874
                                             cluster_name)
5875
      if result.fail_msg:
5876
        self.LogWarning("Can't copy data over for disk %d: %s",
5877
                        idx, result.fail_msg)
5878
        errs.append(result.fail_msg)
5879
        break
5880

    
5881
    if errs:
5882
      self.LogWarning("Some disks failed to copy, aborting")
5883
      try:
5884
        _RemoveDisks(self, instance, target_node=target_node)
5885
      finally:
5886
        self.cfg.ReleaseDRBDMinors(instance.name)
5887
        raise errors.OpExecError("Errors during disk copy: %s" %
5888
                                 (",".join(errs),))
5889

    
5890
    instance.primary_node = target_node
5891
    self.cfg.Update(instance, feedback_fn)
5892

    
5893
    self.LogInfo("Removing the disks on the original node")
5894
    _RemoveDisks(self, instance, target_node=source_node)
5895

    
5896
    # Only start the instance if it's marked as up
5897
    if instance.admin_up:
5898
      self.LogInfo("Starting instance %s on node %s",
5899
                   instance.name, target_node)
5900

    
5901
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5902
                                           ignore_secondaries=True)
5903
      if not disks_ok:
5904
        _ShutdownInstanceDisks(self, instance)
5905
        raise errors.OpExecError("Can't activate the instance's disks")
5906

    
5907
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5908
      msg = result.fail_msg
5909
      if msg:
5910
        _ShutdownInstanceDisks(self, instance)
5911
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5912
                                 (instance.name, target_node, msg))
5913

    
5914

    
5915
class LUMigrateNode(LogicalUnit):
5916
  """Migrate all instances from a node.
5917

5918
  """
5919
  HPATH = "node-migrate"
5920
  HTYPE = constants.HTYPE_NODE
5921
  REQ_BGL = False
5922

    
5923
  def ExpandNames(self):
5924
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5925

    
5926
    self.needed_locks = {
5927
      locking.LEVEL_NODE: [self.op.node_name],
5928
      }
5929

    
5930
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5931

    
5932
    # Create tasklets for migrating instances for all instances on this node
5933
    names = []
5934
    tasklets = []
5935

    
5936
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5937
      logging.debug("Migrating instance %s", inst.name)
5938
      names.append(inst.name)
5939

    
5940
      tasklets.append(TLMigrateInstance(self, inst.name, False))
5941

    
5942
    self.tasklets = tasklets
5943

    
5944
    # Declare instance locks
5945
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5946

    
5947
  def DeclareLocks(self, level):
5948
    if level == locking.LEVEL_NODE:
5949
      self._LockInstancesNodes()
5950

    
5951
  def BuildHooksEnv(self):
5952
    """Build hooks env.
5953

5954
    This runs on the master, the primary and all the secondaries.
5955

5956
    """
5957
    env = {
5958
      "NODE_NAME": self.op.node_name,
5959
      }
5960

    
5961
    nl = [self.cfg.GetMasterNode()]
5962

    
5963
    return (env, nl, nl)
5964

    
5965

    
5966
class TLMigrateInstance(Tasklet):
5967
  """Tasklet class for instance migration.
5968

5969
  @type live: boolean
5970
  @ivar live: whether the migration will be done live or non-live;
5971
      this variable is initalized only after CheckPrereq has run
5972

5973
  """
5974
  def __init__(self, lu, instance_name, cleanup):
5975
    """Initializes this class.
5976

5977
    """
5978
    Tasklet.__init__(self, lu)
5979

    
5980
    # Parameters
5981
    self.instance_name = instance_name
5982
    self.cleanup = cleanup
5983
    self.live = False # will be overridden later
5984

    
5985
  def CheckPrereq(self):
5986
    """Check prerequisites.
5987

5988
    This checks that the instance is in the cluster.
5989

5990
    """
5991
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5992
    instance = self.cfg.GetInstanceInfo(instance_name)
5993
    assert instance is not None
5994

    
5995
    if instance.disk_template != constants.DT_DRBD8:
5996
      raise errors.OpPrereqError("Instance's disk layout is not"
5997
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5998

    
5999
    secondary_nodes = instance.secondary_nodes
6000
    if not secondary_nodes:
6001
      raise errors.ConfigurationError("No secondary node but using"
6002
                                      " drbd8 disk template")
6003

    
6004
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6005

    
6006
    target_node = secondary_nodes[0]
6007
    # check memory requirements on the secondary node
6008
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6009
                         instance.name, i_be[constants.BE_MEMORY],
6010
                         instance.hypervisor)
6011

    
6012
    # check bridge existance
6013
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6014

    
6015
    if not self.cleanup:
6016
      _CheckNodeNotDrained(self.lu, target_node)
6017
      result = self.rpc.call_instance_migratable(instance.primary_node,
6018
                                                 instance)
6019
      result.Raise("Can't migrate, please use failover",
6020
                   prereq=True, ecode=errors.ECODE_STATE)
6021

    
6022
    self.instance = instance
6023

    
6024
    if self.lu.op.live is not None and self.lu.op.mode is not None:
6025
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6026
                                 " parameters are accepted",
6027
                                 errors.ECODE_INVAL)
6028
    if self.lu.op.live is not None:
6029
      if self.lu.op.live:
6030
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
6031
      else:
6032
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6033
      # reset the 'live' parameter to None so that repeated
6034
      # invocations of CheckPrereq do not raise an exception
6035
      self.lu.op.live = None
6036
    elif self.lu.op.mode is None:
6037
      # read the default value from the hypervisor
6038
      i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6039
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6040

    
6041
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6042

    
6043
  def _WaitUntilSync(self):
6044
    """Poll with custom rpc for disk sync.
6045

6046
    This uses our own step-based rpc call.
6047

6048
    """
6049
    self.feedback_fn("* wait until resync is done")
6050
    all_done = False
6051
    while not all_done:
6052
      all_done = True
6053
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6054
                                            self.nodes_ip,
6055
                                            self.instance.disks)
6056
      min_percent = 100
6057
      for node, nres in result.items():
6058
        nres.Raise("Cannot resync disks on node %s" % node)
6059
        node_done, node_percent = nres.payload
6060
        all_done = all_done and node_done
6061
        if node_percent is not None:
6062
          min_percent = min(min_percent, node_percent)
6063
      if not all_done:
6064
        if min_percent < 100:
6065
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6066
        time.sleep(2)
6067

    
6068
  def _EnsureSecondary(self, node):
6069
    """Demote a node to secondary.
6070

6071
    """
6072
    self.feedback_fn("* switching node %s to secondary mode" % node)
6073

    
6074
    for dev in self.instance.disks:
6075
      self.cfg.SetDiskID(dev, node)
6076

    
6077
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6078
                                          self.instance.disks)
6079
    result.Raise("Cannot change disk to secondary on node %s" % node)
6080

    
6081
  def _GoStandalone(self):
6082
    """Disconnect from the network.
6083

6084
    """
6085
    self.feedback_fn("* changing into standalone mode")
6086
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6087
                                               self.instance.disks)
6088
    for node, nres in result.items():
6089
      nres.Raise("Cannot disconnect disks node %s" % node)
6090

    
6091
  def _GoReconnect(self, multimaster):
6092
    """Reconnect to the network.
6093

6094
    """
6095
    if multimaster:
6096
      msg = "dual-master"
6097
    else:
6098
      msg = "single-master"
6099
    self.feedback_fn("* changing disks into %s mode" % msg)
6100
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6101
                                           self.instance.disks,
6102
                                           self.instance.name, multimaster)
6103
    for node, nres in result.items():
6104
      nres.Raise("Cannot change disks config on node %s" % node)
6105

    
6106
  def _ExecCleanup(self):
6107
    """Try to cleanup after a failed migration.
6108

6109
    The cleanup is done by:
6110
      - check that the instance is running only on one node
6111
        (and update the config if needed)
6112
      - change disks on its secondary node to secondary
6113
      - wait until disks are fully synchronized
6114
      - disconnect from the network
6115
      - change disks into single-master mode
6116
      - wait again until disks are fully synchronized
6117

6118
    """
6119
    instance = self.instance
6120
    target_node = self.target_node
6121
    source_node = self.source_node
6122

    
6123
    # check running on only one node
6124
    self.feedback_fn("* checking where the instance actually runs"
6125
                     " (if this hangs, the hypervisor might be in"
6126
                     " a bad state)")
6127
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6128
    for node, result in ins_l.items():
6129
      result.Raise("Can't contact node %s" % node)
6130

    
6131
    runningon_source = instance.name in ins_l[source_node].payload
6132
    runningon_target = instance.name in ins_l[target_node].payload
6133

    
6134
    if runningon_source and runningon_target:
6135
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6136
                               " or the hypervisor is confused. You will have"
6137
                               " to ensure manually that it runs only on one"
6138
                               " and restart this operation.")
6139

    
6140
    if not (runningon_source or runningon_target):
6141
      raise errors.OpExecError("Instance does not seem to be running at all."
6142
                               " In this case, it's safer to repair by"
6143
                               " running 'gnt-instance stop' to ensure disk"
6144
                               " shutdown, and then restarting it.")
6145

    
6146
    if runningon_target:
6147
      # the migration has actually succeeded, we need to update the config
6148
      self.feedback_fn("* instance running on secondary node (%s),"
6149
                       " updating config" % target_node)
6150
      instance.primary_node = target_node
6151
      self.cfg.Update(instance, self.feedback_fn)
6152
      demoted_node = source_node
6153
    else:
6154
      self.feedback_fn("* instance confirmed to be running on its"
6155
                       " primary node (%s)" % source_node)
6156
      demoted_node = target_node
6157

    
6158
    self._EnsureSecondary(demoted_node)
6159
    try:
6160
      self._WaitUntilSync()
6161
    except errors.OpExecError:
6162
      # we ignore here errors, since if the device is standalone, it
6163
      # won't be able to sync
6164
      pass
6165
    self._GoStandalone()
6166
    self._GoReconnect(False)
6167
    self._WaitUntilSync()
6168

    
6169
    self.feedback_fn("* done")
6170

    
6171
  def _RevertDiskStatus(self):
6172
    """Try to revert the disk status after a failed migration.
6173

6174
    """
6175
    target_node = self.target_node
6176
    try:
6177
      self._EnsureSecondary(target_node)
6178
      self._GoStandalone()
6179
      self._GoReconnect(False)
6180
      self._WaitUntilSync()
6181
    except errors.OpExecError, err:
6182
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6183
                         " drives: error '%s'\n"
6184
                         "Please look and recover the instance status" %
6185
                         str(err))
6186

    
6187
  def _AbortMigration(self):
6188
    """Call the hypervisor code to abort a started migration.
6189

6190
    """
6191
    instance = self.instance
6192
    target_node = self.target_node
6193
    migration_info = self.migration_info
6194

    
6195
    abort_result = self.rpc.call_finalize_migration(target_node,
6196
                                                    instance,
6197
                                                    migration_info,
6198
                                                    False)
6199
    abort_msg = abort_result.fail_msg
6200
    if abort_msg:
6201
      logging.error("Aborting migration failed on target node %s: %s",
6202
                    target_node, abort_msg)
6203
      # Don't raise an exception here, as we stil have to try to revert the
6204
      # disk status, even if this step failed.
6205

    
6206
  def _ExecMigration(self):
6207
    """Migrate an instance.
6208

6209
    The migrate is done by:
6210
      - change the disks into dual-master mode
6211
      - wait until disks are fully synchronized again
6212
      - migrate the instance
6213
      - change disks on the new secondary node (the old primary) to secondary
6214
      - wait until disks are fully synchronized
6215
      - change disks into single-master mode
6216

6217
    """
6218
    instance = self.instance
6219
    target_node = self.target_node
6220
    source_node = self.source_node
6221

    
6222
    self.feedback_fn("* checking disk consistency between source and target")
6223
    for dev in instance.disks:
6224
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6225
        raise errors.OpExecError("Disk %s is degraded or not fully"
6226
                                 " synchronized on target node,"
6227
                                 " aborting migrate." % dev.iv_name)
6228

    
6229
    # First get the migration information from the remote node
6230
    result = self.rpc.call_migration_info(source_node, instance)
6231
    msg = result.fail_msg
6232
    if msg:
6233
      log_err = ("Failed fetching source migration information from %s: %s" %
6234
                 (source_node, msg))
6235
      logging.error(log_err)
6236
      raise errors.OpExecError(log_err)
6237

    
6238
    self.migration_info = migration_info = result.payload
6239

    
6240
    # Then switch the disks to master/master mode
6241
    self._EnsureSecondary(target_node)
6242
    self._GoStandalone()
6243
    self._GoReconnect(True)
6244
    self._WaitUntilSync()
6245

    
6246
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6247
    result = self.rpc.call_accept_instance(target_node,
6248
                                           instance,
6249
                                           migration_info,
6250
                                           self.nodes_ip[target_node])
6251

    
6252
    msg = result.fail_msg
6253
    if msg:
6254
      logging.error("Instance pre-migration failed, trying to revert"
6255
                    " disk status: %s", msg)
6256
      self.feedback_fn("Pre-migration failed, aborting")
6257
      self._AbortMigration()
6258
      self._RevertDiskStatus()
6259
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6260
                               (instance.name, msg))
6261

    
6262
    self.feedback_fn("* migrating instance to %s" % target_node)
6263
    time.sleep(10)
6264
    result = self.rpc.call_instance_migrate(source_node, instance,
6265
                                            self.nodes_ip[target_node],
6266
                                            self.live)
6267
    msg = result.fail_msg
6268
    if msg:
6269
      logging.error("Instance migration failed, trying to revert"
6270
                    " disk status: %s", msg)
6271
      self.feedback_fn("Migration failed, aborting")
6272
      self._AbortMigration()
6273
      self._RevertDiskStatus()
6274
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6275
                               (instance.name, msg))
6276
    time.sleep(10)
6277

    
6278
    instance.primary_node = target_node
6279
    # distribute new instance config to the other nodes
6280
    self.cfg.Update(instance, self.feedback_fn)
6281

    
6282
    result = self.rpc.call_finalize_migration(target_node,
6283
                                              instance,
6284
                                              migration_info,
6285
                                              True)
6286
    msg = result.fail_msg
6287
    if msg:
6288
      logging.error("Instance migration succeeded, but finalization failed:"
6289
                    " %s", msg)
6290
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6291
                               msg)
6292

    
6293
    self._EnsureSecondary(source_node)
6294
    self._WaitUntilSync()
6295
    self._GoStandalone()
6296
    self._GoReconnect(False)
6297
    self._WaitUntilSync()
6298

    
6299
    self.feedback_fn("* done")
6300

    
6301
  def Exec(self, feedback_fn):
6302
    """Perform the migration.
6303

6304
    """
6305
    feedback_fn("Migrating instance %s" % self.instance.name)
6306

    
6307
    self.feedback_fn = feedback_fn
6308

    
6309
    self.source_node = self.instance.primary_node
6310
    self.target_node = self.instance.secondary_nodes[0]
6311
    self.all_nodes = [self.source_node, self.target_node]
6312
    self.nodes_ip = {
6313
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6314
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6315
      }
6316

    
6317
    if self.cleanup:
6318
      return self._ExecCleanup()
6319
    else:
6320
      return self._ExecMigration()
6321

    
6322

    
6323
def _CreateBlockDev(lu, node, instance, device, force_create,
6324
                    info, force_open):
6325
  """Create a tree of block devices on a given node.
6326

6327
  If this device type has to be created on secondaries, create it and
6328
  all its children.
6329

6330
  If not, just recurse to children keeping the same 'force' value.
6331

6332
  @param lu: the lu on whose behalf we execute
6333
  @param node: the node on which to create the device
6334
  @type instance: L{objects.Instance}
6335
  @param instance: the instance which owns the device
6336
  @type device: L{objects.Disk}
6337
  @param device: the device to create
6338
  @type force_create: boolean
6339
  @param force_create: whether to force creation of this device; this
6340
      will be change to True whenever we find a device which has
6341
      CreateOnSecondary() attribute
6342
  @param info: the extra 'metadata' we should attach to the device
6343
      (this will be represented as a LVM tag)
6344
  @type force_open: boolean
6345
  @param force_open: this parameter will be passes to the
6346
      L{backend.BlockdevCreate} function where it specifies
6347
      whether we run on primary or not, and it affects both
6348
      the child assembly and the device own Open() execution
6349

6350
  """
6351
  if device.CreateOnSecondary():
6352
    force_create = True
6353

    
6354
  if device.children:
6355
    for child in device.children:
6356
      _CreateBlockDev(lu, node, instance, child, force_create,
6357
                      info, force_open)
6358

    
6359
  if not force_create:
6360
    return
6361

    
6362
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6363

    
6364

    
6365
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6366
  """Create a single block device on a given node.
6367

6368
  This will not recurse over children of the device, so they must be
6369
  created in advance.
6370

6371
  @param lu: the lu on whose behalf we execute
6372
  @param node: the node on which to create the device
6373
  @type instance: L{objects.Instance}
6374
  @param instance: the instance which owns the device
6375
  @type device: L{objects.Disk}
6376
  @param device: the device to create
6377
  @param info: the extra 'metadata' we should attach to the device
6378
      (this will be represented as a LVM tag)
6379
  @type force_open: boolean
6380
  @param force_open: this parameter will be passes to the
6381
      L{backend.BlockdevCreate} function where it specifies
6382
      whether we run on primary or not, and it affects both
6383
      the child assembly and the device own Open() execution
6384

6385
  """
6386
  lu.cfg.SetDiskID(device, node)
6387
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6388
                                       instance.name, force_open, info)
6389
  result.Raise("Can't create block device %s on"
6390
               " node %s for instance %s" % (device, node, instance.name))
6391
  if device.physical_id is None:
6392
    device.physical_id = result.payload
6393

    
6394

    
6395
def _GenerateUniqueNames(lu, exts):
6396
  """Generate a suitable LV name.
6397

6398
  This will generate a logical volume name for the given instance.
6399

6400
  """
6401
  results = []
6402
  for val in exts:
6403
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6404
    results.append("%s%s" % (new_id, val))
6405
  return results
6406

    
6407

    
6408
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6409
                         p_minor, s_minor):
6410
  """Generate a drbd8 device complete with its children.
6411

6412
  """
6413
  port = lu.cfg.AllocatePort()
6414
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6415
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6416
                          logical_id=(vgname, names[0]))
6417
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6418
                          logical_id=(vgname, names[1]))
6419
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6420
                          logical_id=(primary, secondary, port,
6421
                                      p_minor, s_minor,
6422
                                      shared_secret),
6423
                          children=[dev_data, dev_meta],
6424
                          iv_name=iv_name)
6425
  return drbd_dev
6426

    
6427

    
6428
def _GenerateDiskTemplate(lu, template_name,
6429
                          instance_name, primary_node,
6430
                          secondary_nodes, disk_info,
6431
                          file_storage_dir, file_driver,
6432
                          base_index, feedback_fn):
6433
  """Generate the entire disk layout for a given template type.
6434

6435
  """
6436
  #TODO: compute space requirements
6437

    
6438
  vgname = lu.cfg.GetVGName()
6439
  disk_count = len(disk_info)
6440
  disks = []
6441
  if template_name == constants.DT_DISKLESS:
6442
    pass
6443
  elif template_name == constants.DT_PLAIN:
6444
    if len(secondary_nodes) != 0:
6445
      raise errors.ProgrammerError("Wrong template configuration")
6446

    
6447
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6448
                                      for i in range(disk_count)])
6449
    for idx, disk in enumerate(disk_info):
6450
      disk_index = idx + base_index
6451
      vg = disk.get("vg", vgname)
6452
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6453
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6454
                              logical_id=(vg, names[idx]),
6455
                              iv_name="disk/%d" % disk_index,
6456
                              mode=disk["mode"])
6457
      disks.append(disk_dev)
6458
  elif template_name == constants.DT_DRBD8:
6459
    if len(secondary_nodes) != 1:
6460
      raise errors.ProgrammerError("Wrong template configuration")
6461
    remote_node = secondary_nodes[0]
6462
    minors = lu.cfg.AllocateDRBDMinor(
6463
      [primary_node, remote_node] * len(disk_info), instance_name)
6464

    
6465
    names = []
6466
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6467
                                               for i in range(disk_count)]):
6468
      names.append(lv_prefix + "_data")
6469
      names.append(lv_prefix + "_meta")
6470
    for idx, disk in enumerate(disk_info):
6471
      disk_index = idx + base_index
6472
      vg = disk.get("vg", vgname)
6473
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6474
                                      disk["size"], vg, names[idx*2:idx*2+2],
6475
                                      "disk/%d" % disk_index,
6476
                                      minors[idx*2], minors[idx*2+1])
6477
      disk_dev.mode = disk["mode"]
6478
      disks.append(disk_dev)
6479
  elif template_name == constants.DT_FILE:
6480
    if len(secondary_nodes) != 0:
6481
      raise errors.ProgrammerError("Wrong template configuration")
6482

    
6483
    opcodes.RequireFileStorage()
6484

    
6485
    for idx, disk in enumerate(disk_info):
6486
      disk_index = idx + base_index
6487
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6488
                              iv_name="disk/%d" % disk_index,
6489
                              logical_id=(file_driver,
6490
                                          "%s/disk%d" % (file_storage_dir,
6491
                                                         disk_index)),
6492
                              mode=disk["mode"])
6493
      disks.append(disk_dev)
6494
  else:
6495
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6496
  return disks
6497

    
6498

    
6499
def _GetInstanceInfoText(instance):
6500
  """Compute that text that should be added to the disk's metadata.
6501

6502
  """
6503
  return "originstname+%s" % instance.name
6504

    
6505

    
6506
def _CalcEta(time_taken, written, total_size):
6507
  """Calculates the ETA based on size written and total size.
6508

6509
  @param time_taken: The time taken so far
6510
  @param written: amount written so far
6511
  @param total_size: The total size of data to be written
6512
  @return: The remaining time in seconds
6513

6514
  """
6515
  avg_time = time_taken / float(written)
6516
  return (total_size - written) * avg_time
6517

    
6518

    
6519
def _WipeDisks(lu, instance):
6520
  """Wipes instance disks.
6521

6522
  @type lu: L{LogicalUnit}
6523
  @param lu: the logical unit on whose behalf we execute
6524
  @type instance: L{objects.Instance}
6525
  @param instance: the instance whose disks we should create
6526
  @return: the success of the wipe
6527

6528
  """
6529
  node = instance.primary_node
6530
  logging.info("Pause sync of instance %s disks", instance.name)
6531
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
6532

    
6533
  for idx, success in enumerate(result.payload):
6534
    if not success:
6535
      logging.warn("pause-sync of instance %s for disks %d failed",
6536
                   instance.name, idx)
6537

    
6538
  try:
6539
    for idx, device in enumerate(instance.disks):
6540
      lu.LogInfo("* Wiping disk %d", idx)
6541
      logging.info("Wiping disk %d for instance %s", idx, instance.name)
6542

    
6543
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6544
      # MAX_WIPE_CHUNK at max
6545
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6546
                            constants.MIN_WIPE_CHUNK_PERCENT)
6547

    
6548
      offset = 0
6549
      size = device.size
6550
      last_output = 0
6551
      start_time = time.time()
6552

    
6553
      while offset < size:
6554
        wipe_size = min(wipe_chunk_size, size - offset)
6555
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6556
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
6557
                     (idx, offset, wipe_size))
6558
        now = time.time()
6559
        offset += wipe_size
6560
        if now - last_output >= 60:
6561
          eta = _CalcEta(now - start_time, offset, size)
6562
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
6563
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
6564
          last_output = now
6565
  finally:
6566
    logging.info("Resume sync of instance %s disks", instance.name)
6567

    
6568
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
6569

    
6570
    for idx, success in enumerate(result.payload):
6571
      if not success:
6572
        lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
6573
                      " look at the status and troubleshoot the issue.", idx)
6574
        logging.warn("resume-sync of instance %s for disks %d failed",
6575
                     instance.name, idx)
6576

    
6577

    
6578
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6579
  """Create all disks for an instance.
6580

6581
  This abstracts away some work from AddInstance.
6582

6583
  @type lu: L{LogicalUnit}
6584
  @param lu: the logical unit on whose behalf we execute
6585
  @type instance: L{objects.Instance}
6586
  @param instance: the instance whose disks we should create
6587
  @type to_skip: list
6588
  @param to_skip: list of indices to skip
6589
  @type target_node: string
6590
  @param target_node: if passed, overrides the target node for creation
6591
  @rtype: boolean
6592
  @return: the success of the creation
6593

6594
  """
6595
  info = _GetInstanceInfoText(instance)
6596
  if target_node is None:
6597
    pnode = instance.primary_node
6598
    all_nodes = instance.all_nodes
6599
  else:
6600
    pnode = target_node
6601
    all_nodes = [pnode]
6602

    
6603
  if instance.disk_template == constants.DT_FILE:
6604
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6605
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6606

    
6607
    result.Raise("Failed to create directory '%s' on"
6608
                 " node %s" % (file_storage_dir, pnode))
6609

    
6610
  # Note: this needs to be kept in sync with adding of disks in
6611
  # LUSetInstanceParams
6612
  for idx, device in enumerate(instance.disks):
6613
    if to_skip and idx in to_skip:
6614
      continue
6615
    logging.info("Creating volume %s for instance %s",
6616
                 device.iv_name, instance.name)
6617
    #HARDCODE
6618
    for node in all_nodes:
6619
      f_create = node == pnode
6620
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6621

    
6622

    
6623
def _RemoveDisks(lu, instance, target_node=None):
6624
  """Remove all disks for an instance.
6625

6626
  This abstracts away some work from `AddInstance()` and
6627
  `RemoveInstance()`. Note that in case some of the devices couldn't
6628
  be removed, the removal will continue with the other ones (compare
6629
  with `_CreateDisks()`).
6630

6631
  @type lu: L{LogicalUnit}
6632
  @param lu: the logical unit on whose behalf we execute
6633
  @type instance: L{objects.Instance}
6634
  @param instance: the instance whose disks we should remove
6635
  @type target_node: string
6636
  @param target_node: used to override the node on which to remove the disks
6637
  @rtype: boolean
6638
  @return: the success of the removal
6639

6640
  """
6641
  logging.info("Removing block devices for instance %s", instance.name)
6642

    
6643
  all_result = True
6644
  for device in instance.disks:
6645
    if target_node:
6646
      edata = [(target_node, device)]
6647
    else:
6648
      edata = device.ComputeNodeTree(instance.primary_node)
6649
    for node, disk in edata:
6650
      lu.cfg.SetDiskID(disk, node)
6651
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6652
      if msg:
6653
        lu.LogWarning("Could not remove block device %s on node %s,"
6654
                      " continuing anyway: %s", device.iv_name, node, msg)
6655
        all_result = False
6656

    
6657
  if instance.disk_template == constants.DT_FILE:
6658
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6659
    if target_node:
6660
      tgt = target_node
6661
    else:
6662
      tgt = instance.primary_node
6663
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6664
    if result.fail_msg:
6665
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6666
                    file_storage_dir, instance.primary_node, result.fail_msg)
6667
      all_result = False
6668

    
6669
  return all_result
6670

    
6671

    
6672
def _ComputeDiskSizePerVG(disk_template, disks):
6673
  """Compute disk size requirements in the volume group
6674

6675
  """
6676
  def _compute(disks, payload):
6677
    """Universal algorithm
6678

6679
    """
6680
    vgs = {}
6681
    for disk in disks:
6682
      vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6683

    
6684
    return vgs
6685

    
6686
  # Required free disk space as a function of disk and swap space
6687
  req_size_dict = {
6688
    constants.DT_DISKLESS: {},
6689
    constants.DT_PLAIN: _compute(disks, 0),
6690
    # 128 MB are added for drbd metadata for each disk
6691
    constants.DT_DRBD8: _compute(disks, 128),
6692
    constants.DT_FILE: {},
6693
  }
6694

    
6695
  if disk_template not in req_size_dict:
6696
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6697
                                 " is unknown" %  disk_template)
6698

    
6699
  return req_size_dict[disk_template]
6700

    
6701

    
6702
def _ComputeDiskSize(disk_template, disks):
6703
  """Compute disk size requirements in the volume group
6704

6705
  """
6706
  # Required free disk space as a function of disk and swap space
6707
  req_size_dict = {
6708
    constants.DT_DISKLESS: None,
6709
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6710
    # 128 MB are added for drbd metadata for each disk
6711
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6712
    constants.DT_FILE: None,
6713
  }
6714

    
6715
  if disk_template not in req_size_dict:
6716
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6717
                                 " is unknown" %  disk_template)
6718

    
6719
  return req_size_dict[disk_template]
6720

    
6721

    
6722
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6723
  """Hypervisor parameter validation.
6724

6725
  This function abstract the hypervisor parameter validation to be
6726
  used in both instance create and instance modify.
6727

6728
  @type lu: L{LogicalUnit}
6729
  @param lu: the logical unit for which we check
6730
  @type nodenames: list
6731
  @param nodenames: the list of nodes on which we should check
6732
  @type hvname: string
6733
  @param hvname: the name of the hypervisor we should use
6734
  @type hvparams: dict
6735
  @param hvparams: the parameters which we need to check
6736
  @raise errors.OpPrereqError: if the parameters are not valid
6737

6738
  """
6739
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6740
                                                  hvname,
6741
                                                  hvparams)
6742
  for node in nodenames:
6743
    info = hvinfo[node]
6744
    if info.offline:
6745
      continue
6746
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6747

    
6748

    
6749
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6750
  """OS parameters validation.
6751

6752
  @type lu: L{LogicalUnit}
6753
  @param lu: the logical unit for which we check
6754
  @type required: boolean
6755
  @param required: whether the validation should fail if the OS is not
6756
      found
6757
  @type nodenames: list
6758
  @param nodenames: the list of nodes on which we should check
6759
  @type osname: string
6760
  @param osname: the name of the hypervisor we should use
6761
  @type osparams: dict
6762
  @param osparams: the parameters which we need to check
6763
  @raise errors.OpPrereqError: if the parameters are not valid
6764

6765
  """
6766
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6767
                                   [constants.OS_VALIDATE_PARAMETERS],
6768
                                   osparams)
6769
  for node, nres in result.items():
6770
    # we don't check for offline cases since this should be run only
6771
    # against the master node and/or an instance's nodes
6772
    nres.Raise("OS Parameters validation failed on node %s" % node)
6773
    if not nres.payload:
6774
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6775
                 osname, node)
6776

    
6777

    
6778
class LUCreateInstance(LogicalUnit):
6779
  """Create an instance.
6780

6781
  """
6782
  HPATH = "instance-add"
6783
  HTYPE = constants.HTYPE_INSTANCE
6784
  REQ_BGL = False
6785

    
6786
  def CheckArguments(self):
6787
    """Check arguments.
6788

6789
    """
6790
    # do not require name_check to ease forward/backward compatibility
6791
    # for tools
6792
    if self.op.no_install and self.op.start:
6793
      self.LogInfo("No-installation mode selected, disabling startup")
6794
      self.op.start = False
6795
    # validate/normalize the instance name
6796
    self.op.instance_name = \
6797
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
6798

    
6799
    if self.op.ip_check and not self.op.name_check:
6800
      # TODO: make the ip check more flexible and not depend on the name check
6801
      raise errors.OpPrereqError("Cannot do ip check without a name check",
6802
                                 errors.ECODE_INVAL)
6803

    
6804
    # check nics' parameter names
6805
    for nic in self.op.nics:
6806
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6807

    
6808
    # check disks. parameter names and consistent adopt/no-adopt strategy
6809
    has_adopt = has_no_adopt = False
6810
    for disk in self.op.disks:
6811
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6812
      if "adopt" in disk:
6813
        has_adopt = True
6814
      else:
6815
        has_no_adopt = True
6816
    if has_adopt and has_no_adopt:
6817
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6818
                                 errors.ECODE_INVAL)
6819
    if has_adopt:
6820
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6821
        raise errors.OpPrereqError("Disk adoption is not supported for the"
6822
                                   " '%s' disk template" %
6823
                                   self.op.disk_template,
6824
                                   errors.ECODE_INVAL)
6825
      if self.op.iallocator is not None:
6826
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6827
                                   " iallocator script", errors.ECODE_INVAL)
6828
      if self.op.mode == constants.INSTANCE_IMPORT:
6829
        raise errors.OpPrereqError("Disk adoption not allowed for"
6830
                                   " instance import", errors.ECODE_INVAL)
6831

    
6832
    self.adopt_disks = has_adopt
6833

    
6834
    # instance name verification
6835
    if self.op.name_check:
6836
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6837
      self.op.instance_name = self.hostname1.name
6838
      # used in CheckPrereq for ip ping check
6839
      self.check_ip = self.hostname1.ip
6840
    else:
6841
      self.check_ip = None
6842

    
6843
    # file storage checks
6844
    if (self.op.file_driver and
6845
        not self.op.file_driver in constants.FILE_DRIVER):
6846
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6847
                                 self.op.file_driver, errors.ECODE_INVAL)
6848

    
6849
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6850
      raise errors.OpPrereqError("File storage directory path not absolute",
6851
                                 errors.ECODE_INVAL)
6852

    
6853
    ### Node/iallocator related checks
6854
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6855

    
6856
    if self.op.pnode is not None:
6857
      if self.op.disk_template in constants.DTS_NET_MIRROR:
6858
        if self.op.snode is None:
6859
          raise errors.OpPrereqError("The networked disk templates need"
6860
                                     " a mirror node", errors.ECODE_INVAL)
6861
      elif self.op.snode:
6862
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6863
                        " template")
6864
        self.op.snode = None
6865

    
6866
    self._cds = _GetClusterDomainSecret()
6867

    
6868
    if self.op.mode == constants.INSTANCE_IMPORT:
6869
      # On import force_variant must be True, because if we forced it at
6870
      # initial install, our only chance when importing it back is that it
6871
      # works again!
6872
      self.op.force_variant = True
6873

    
6874
      if self.op.no_install:
6875
        self.LogInfo("No-installation mode has no effect during import")
6876

    
6877
    elif self.op.mode == constants.INSTANCE_CREATE:
6878
      if self.op.os_type is None:
6879
        raise errors.OpPrereqError("No guest OS specified",
6880
                                   errors.ECODE_INVAL)
6881
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6882
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6883
                                   " installation" % self.op.os_type,
6884
                                   errors.ECODE_STATE)
6885
      if self.op.disk_template is None:
6886
        raise errors.OpPrereqError("No disk template specified",
6887
                                   errors.ECODE_INVAL)
6888

    
6889
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6890
      # Check handshake to ensure both clusters have the same domain secret
6891
      src_handshake = self.op.source_handshake
6892
      if not src_handshake:
6893
        raise errors.OpPrereqError("Missing source handshake",
6894
                                   errors.ECODE_INVAL)
6895

    
6896
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6897
                                                           src_handshake)
6898
      if errmsg:
6899
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6900
                                   errors.ECODE_INVAL)
6901

    
6902
      # Load and check source CA
6903
      self.source_x509_ca_pem = self.op.source_x509_ca
6904
      if not self.source_x509_ca_pem:
6905
        raise errors.OpPrereqError("Missing source X509 CA",
6906
                                   errors.ECODE_INVAL)
6907

    
6908
      try:
6909
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6910
                                                    self._cds)
6911
      except OpenSSL.crypto.Error, err:
6912
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6913
                                   (err, ), errors.ECODE_INVAL)
6914

    
6915
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6916
      if errcode is not None:
6917
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6918
                                   errors.ECODE_INVAL)
6919

    
6920
      self.source_x509_ca = cert
6921

    
6922
      src_instance_name = self.op.source_instance_name
6923
      if not src_instance_name:
6924
        raise errors.OpPrereqError("Missing source instance name",
6925
                                   errors.ECODE_INVAL)
6926

    
6927
      self.source_instance_name = \
6928
          netutils.GetHostname(name=src_instance_name).name
6929

    
6930
    else:
6931
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6932
                                 self.op.mode, errors.ECODE_INVAL)
6933

    
6934
  def ExpandNames(self):
6935
    """ExpandNames for CreateInstance.
6936

6937
    Figure out the right locks for instance creation.
6938

6939
    """
6940
    self.needed_locks = {}
6941

    
6942
    instance_name = self.op.instance_name
6943
    # this is just a preventive check, but someone might still add this
6944
    # instance in the meantime, and creation will fail at lock-add time
6945
    if instance_name in self.cfg.GetInstanceList():
6946
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6947
                                 instance_name, errors.ECODE_EXISTS)
6948

    
6949
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6950

    
6951
    if self.op.iallocator:
6952
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6953
    else:
6954
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6955
      nodelist = [self.op.pnode]
6956
      if self.op.snode is not None:
6957
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6958
        nodelist.append(self.op.snode)
6959
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6960

    
6961
    # in case of import lock the source node too
6962
    if self.op.mode == constants.INSTANCE_IMPORT:
6963
      src_node = self.op.src_node
6964
      src_path = self.op.src_path
6965

    
6966
      if src_path is None:
6967
        self.op.src_path = src_path = self.op.instance_name
6968

    
6969
      if src_node is None:
6970
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6971
        self.op.src_node = None
6972
        if os.path.isabs(src_path):
6973
          raise errors.OpPrereqError("Importing an instance from an absolute"
6974
                                     " path requires a source node option.",
6975
                                     errors.ECODE_INVAL)
6976
      else:
6977
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6978
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6979
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6980
        if not os.path.isabs(src_path):
6981
          self.op.src_path = src_path = \
6982
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6983

    
6984
  def _RunAllocator(self):
6985
    """Run the allocator based on input opcode.
6986

6987
    """
6988
    nics = [n.ToDict() for n in self.nics]
6989
    ial = IAllocator(self.cfg, self.rpc,
6990
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6991
                     name=self.op.instance_name,
6992
                     disk_template=self.op.disk_template,
6993
                     tags=[],
6994
                     os=self.op.os_type,
6995
                     vcpus=self.be_full[constants.BE_VCPUS],
6996
                     mem_size=self.be_full[constants.BE_MEMORY],
6997
                     disks=self.disks,
6998
                     nics=nics,
6999
                     hypervisor=self.op.hypervisor,
7000
                     )
7001

    
7002
    ial.Run(self.op.iallocator)
7003

    
7004
    if not ial.success:
7005
      raise errors.OpPrereqError("Can't compute nodes using"
7006
                                 " iallocator '%s': %s" %
7007
                                 (self.op.iallocator, ial.info),
7008
                                 errors.ECODE_NORES)
7009
    if len(ial.result) != ial.required_nodes:
7010
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7011
                                 " of nodes (%s), required %s" %
7012
                                 (self.op.iallocator, len(ial.result),
7013
                                  ial.required_nodes), errors.ECODE_FAULT)
7014
    self.op.pnode = ial.result[0]
7015
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7016
                 self.op.instance_name, self.op.iallocator,
7017
                 utils.CommaJoin(ial.result))
7018
    if ial.required_nodes == 2:
7019
      self.op.snode = ial.result[1]
7020

    
7021
  def BuildHooksEnv(self):
7022
    """Build hooks env.
7023

7024
    This runs on master, primary and secondary nodes of the instance.
7025

7026
    """
7027
    env = {
7028
      "ADD_MODE": self.op.mode,
7029
      }
7030
    if self.op.mode == constants.INSTANCE_IMPORT:
7031
      env["SRC_NODE"] = self.op.src_node
7032
      env["SRC_PATH"] = self.op.src_path
7033
      env["SRC_IMAGES"] = self.src_images
7034

    
7035
    env.update(_BuildInstanceHookEnv(
7036
      name=self.op.instance_name,
7037
      primary_node=self.op.pnode,
7038
      secondary_nodes=self.secondaries,
7039
      status=self.op.start,
7040
      os_type=self.op.os_type,
7041
      memory=self.be_full[constants.BE_MEMORY],
7042
      vcpus=self.be_full[constants.BE_VCPUS],
7043
      nics=_NICListToTuple(self, self.nics),
7044
      disk_template=self.op.disk_template,
7045
      disks=[(d["size"], d["mode"]) for d in self.disks],
7046
      bep=self.be_full,
7047
      hvp=self.hv_full,
7048
      hypervisor_name=self.op.hypervisor,
7049
    ))
7050

    
7051
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7052
          self.secondaries)
7053
    return env, nl, nl
7054

    
7055
  def _ReadExportInfo(self):
7056
    """Reads the export information from disk.
7057

7058
    It will override the opcode source node and path with the actual
7059
    information, if these two were not specified before.
7060

7061
    @return: the export information
7062

7063
    """
7064
    assert self.op.mode == constants.INSTANCE_IMPORT
7065

    
7066
    src_node = self.op.src_node
7067
    src_path = self.op.src_path
7068

    
7069
    if src_node is None:
7070
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7071
      exp_list = self.rpc.call_export_list(locked_nodes)
7072
      found = False
7073
      for node in exp_list:
7074
        if exp_list[node].fail_msg:
7075
          continue
7076
        if src_path in exp_list[node].payload:
7077
          found = True
7078
          self.op.src_node = src_node = node
7079
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7080
                                                       src_path)
7081
          break
7082
      if not found:
7083
        raise errors.OpPrereqError("No export found for relative path %s" %
7084
                                    src_path, errors.ECODE_INVAL)
7085

    
7086
    _CheckNodeOnline(self, src_node)
7087
    result = self.rpc.call_export_info(src_node, src_path)
7088
    result.Raise("No export or invalid export found in dir %s" % src_path)
7089

    
7090
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7091
    if not export_info.has_section(constants.INISECT_EXP):
7092
      raise errors.ProgrammerError("Corrupted export config",
7093
                                   errors.ECODE_ENVIRON)
7094

    
7095
    ei_version = export_info.get(constants.INISECT_EXP, "version")
7096
    if (int(ei_version) != constants.EXPORT_VERSION):
7097
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7098
                                 (ei_version, constants.EXPORT_VERSION),
7099
                                 errors.ECODE_ENVIRON)
7100
    return export_info
7101

    
7102
  def _ReadExportParams(self, einfo):
7103
    """Use export parameters as defaults.
7104

7105
    In case the opcode doesn't specify (as in override) some instance
7106
    parameters, then try to use them from the export information, if
7107
    that declares them.
7108

7109
    """
7110
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7111

    
7112
    if self.op.disk_template is None:
7113
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
7114
        self.op.disk_template = einfo.get(constants.INISECT_INS,
7115
                                          "disk_template")
7116
      else:
7117
        raise errors.OpPrereqError("No disk template specified and the export"
7118
                                   " is missing the disk_template information",
7119
                                   errors.ECODE_INVAL)
7120

    
7121
    if not self.op.disks:
7122
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7123
        disks = []
7124
        # TODO: import the disk iv_name too
7125
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7126
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7127
          disks.append({"size": disk_sz})
7128
        self.op.disks = disks
7129
      else:
7130
        raise errors.OpPrereqError("No disk info specified and the export"
7131
                                   " is missing the disk information",
7132
                                   errors.ECODE_INVAL)
7133

    
7134
    if (not self.op.nics and
7135
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7136
      nics = []
7137
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7138
        ndict = {}
7139
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7140
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7141
          ndict[name] = v
7142
        nics.append(ndict)
7143
      self.op.nics = nics
7144

    
7145
    if (self.op.hypervisor is None and
7146
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7147
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7148
    if einfo.has_section(constants.INISECT_HYP):
7149
      # use the export parameters but do not override the ones
7150
      # specified by the user
7151
      for name, value in einfo.items(constants.INISECT_HYP):
7152
        if name not in self.op.hvparams:
7153
          self.op.hvparams[name] = value
7154

    
7155
    if einfo.has_section(constants.INISECT_BEP):
7156
      # use the parameters, without overriding
7157
      for name, value in einfo.items(constants.INISECT_BEP):
7158
        if name not in self.op.beparams:
7159
          self.op.beparams[name] = value
7160
    else:
7161
      # try to read the parameters old style, from the main section
7162
      for name in constants.BES_PARAMETERS:
7163
        if (name not in self.op.beparams and
7164
            einfo.has_option(constants.INISECT_INS, name)):
7165
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7166

    
7167
    if einfo.has_section(constants.INISECT_OSP):
7168
      # use the parameters, without overriding
7169
      for name, value in einfo.items(constants.INISECT_OSP):
7170
        if name not in self.op.osparams:
7171
          self.op.osparams[name] = value
7172

    
7173
  def _RevertToDefaults(self, cluster):
7174
    """Revert the instance parameters to the default values.
7175

7176
    """
7177
    # hvparams
7178
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7179
    for name in self.op.hvparams.keys():
7180
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7181
        del self.op.hvparams[name]
7182
    # beparams
7183
    be_defs = cluster.SimpleFillBE({})
7184
    for name in self.op.beparams.keys():
7185
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7186
        del self.op.beparams[name]
7187
    # nic params
7188
    nic_defs = cluster.SimpleFillNIC({})
7189
    for nic in self.op.nics:
7190
      for name in constants.NICS_PARAMETERS:
7191
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7192
          del nic[name]
7193
    # osparams
7194
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7195
    for name in self.op.osparams.keys():
7196
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7197
        del self.op.osparams[name]
7198

    
7199
  def CheckPrereq(self):
7200
    """Check prerequisites.
7201

7202
    """
7203
    if self.op.mode == constants.INSTANCE_IMPORT:
7204
      export_info = self._ReadExportInfo()
7205
      self._ReadExportParams(export_info)
7206

    
7207
    if (not self.cfg.GetVGName() and
7208
        self.op.disk_template not in constants.DTS_NOT_LVM):
7209
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7210
                                 " instances", errors.ECODE_STATE)
7211

    
7212
    if self.op.hypervisor is None:
7213
      self.op.hypervisor = self.cfg.GetHypervisorType()
7214

    
7215
    cluster = self.cfg.GetClusterInfo()
7216
    enabled_hvs = cluster.enabled_hypervisors
7217
    if self.op.hypervisor not in enabled_hvs:
7218
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7219
                                 " cluster (%s)" % (self.op.hypervisor,
7220
                                  ",".join(enabled_hvs)),
7221
                                 errors.ECODE_STATE)
7222

    
7223
    # check hypervisor parameter syntax (locally)
7224
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7225
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7226
                                      self.op.hvparams)
7227
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7228
    hv_type.CheckParameterSyntax(filled_hvp)
7229
    self.hv_full = filled_hvp
7230
    # check that we don't specify global parameters on an instance
7231
    _CheckGlobalHvParams(self.op.hvparams)
7232

    
7233
    # fill and remember the beparams dict
7234
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7235
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7236

    
7237
    # build os parameters
7238
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7239

    
7240
    # now that hvp/bep are in final format, let's reset to defaults,
7241
    # if told to do so
7242
    if self.op.identify_defaults:
7243
      self._RevertToDefaults(cluster)
7244

    
7245
    # NIC buildup
7246
    self.nics = []
7247
    for idx, nic in enumerate(self.op.nics):
7248
      nic_mode_req = nic.get("mode", None)
7249
      nic_mode = nic_mode_req
7250
      if nic_mode is None:
7251
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7252

    
7253
      # in routed mode, for the first nic, the default ip is 'auto'
7254
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7255
        default_ip_mode = constants.VALUE_AUTO
7256
      else:
7257
        default_ip_mode = constants.VALUE_NONE
7258

    
7259
      # ip validity checks
7260
      ip = nic.get("ip", default_ip_mode)
7261
      if ip is None or ip.lower() == constants.VALUE_NONE:
7262
        nic_ip = None
7263
      elif ip.lower() == constants.VALUE_AUTO:
7264
        if not self.op.name_check:
7265
          raise errors.OpPrereqError("IP address set to auto but name checks"
7266
                                     " have been skipped",
7267
                                     errors.ECODE_INVAL)
7268
        nic_ip = self.hostname1.ip
7269
      else:
7270
        if not netutils.IPAddress.IsValid(ip):
7271
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7272
                                     errors.ECODE_INVAL)
7273
        nic_ip = ip
7274

    
7275
      # TODO: check the ip address for uniqueness
7276
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7277
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7278
                                   errors.ECODE_INVAL)
7279

    
7280
      # MAC address verification
7281
      mac = nic.get("mac", constants.VALUE_AUTO)
7282
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7283
        mac = utils.NormalizeAndValidateMac(mac)
7284

    
7285
        try:
7286
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7287
        except errors.ReservationError:
7288
          raise errors.OpPrereqError("MAC address %s already in use"
7289
                                     " in cluster" % mac,
7290
                                     errors.ECODE_NOTUNIQUE)
7291

    
7292
      # bridge verification
7293
      bridge = nic.get("bridge", None)
7294
      link = nic.get("link", None)
7295
      if bridge and link:
7296
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7297
                                   " at the same time", errors.ECODE_INVAL)
7298
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7299
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7300
                                   errors.ECODE_INVAL)
7301
      elif bridge:
7302
        link = bridge
7303

    
7304
      nicparams = {}
7305
      if nic_mode_req:
7306
        nicparams[constants.NIC_MODE] = nic_mode_req
7307
      if link:
7308
        nicparams[constants.NIC_LINK] = link
7309

    
7310
      check_params = cluster.SimpleFillNIC(nicparams)
7311
      objects.NIC.CheckParameterSyntax(check_params)
7312
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7313

    
7314
    # disk checks/pre-build
7315
    self.disks = []
7316
    for disk in self.op.disks:
7317
      mode = disk.get("mode", constants.DISK_RDWR)
7318
      if mode not in constants.DISK_ACCESS_SET:
7319
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7320
                                   mode, errors.ECODE_INVAL)
7321
      size = disk.get("size", None)
7322
      if size is None:
7323
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7324
      try:
7325
        size = int(size)
7326
      except (TypeError, ValueError):
7327
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7328
                                   errors.ECODE_INVAL)
7329
      vg = disk.get("vg", self.cfg.GetVGName())
7330
      new_disk = {"size": size, "mode": mode, "vg": vg}
7331
      if "adopt" in disk:
7332
        new_disk["adopt"] = disk["adopt"]
7333
      self.disks.append(new_disk)
7334

    
7335
    if self.op.mode == constants.INSTANCE_IMPORT:
7336

    
7337
      # Check that the new instance doesn't have less disks than the export
7338
      instance_disks = len(self.disks)
7339
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7340
      if instance_disks < export_disks:
7341
        raise errors.OpPrereqError("Not enough disks to import."
7342
                                   " (instance: %d, export: %d)" %
7343
                                   (instance_disks, export_disks),
7344
                                   errors.ECODE_INVAL)
7345

    
7346
      disk_images = []
7347
      for idx in range(export_disks):
7348
        option = 'disk%d_dump' % idx
7349
        if export_info.has_option(constants.INISECT_INS, option):
7350
          # FIXME: are the old os-es, disk sizes, etc. useful?
7351
          export_name = export_info.get(constants.INISECT_INS, option)
7352
          image = utils.PathJoin(self.op.src_path, export_name)
7353
          disk_images.append(image)
7354
        else:
7355
          disk_images.append(False)
7356

    
7357
      self.src_images = disk_images
7358

    
7359
      old_name = export_info.get(constants.INISECT_INS, 'name')
7360
      try:
7361
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7362
      except (TypeError, ValueError), err:
7363
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7364
                                   " an integer: %s" % str(err),
7365
                                   errors.ECODE_STATE)
7366
      if self.op.instance_name == old_name:
7367
        for idx, nic in enumerate(self.nics):
7368
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7369
            nic_mac_ini = 'nic%d_mac' % idx
7370
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7371

    
7372
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7373

    
7374
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7375
    if self.op.ip_check:
7376
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7377
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7378
                                   (self.check_ip, self.op.instance_name),
7379
                                   errors.ECODE_NOTUNIQUE)
7380

    
7381
    #### mac address generation
7382
    # By generating here the mac address both the allocator and the hooks get
7383
    # the real final mac address rather than the 'auto' or 'generate' value.
7384
    # There is a race condition between the generation and the instance object
7385
    # creation, which means that we know the mac is valid now, but we're not
7386
    # sure it will be when we actually add the instance. If things go bad
7387
    # adding the instance will abort because of a duplicate mac, and the
7388
    # creation job will fail.
7389
    for nic in self.nics:
7390
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7391
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7392

    
7393
    #### allocator run
7394

    
7395
    if self.op.iallocator is not None:
7396
      self._RunAllocator()
7397

    
7398
    #### node related checks
7399

    
7400
    # check primary node
7401
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7402
    assert self.pnode is not None, \
7403
      "Cannot retrieve locked node %s" % self.op.pnode
7404
    if pnode.offline:
7405
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7406
                                 pnode.name, errors.ECODE_STATE)
7407
    if pnode.drained:
7408
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7409
                                 pnode.name, errors.ECODE_STATE)
7410
    if not pnode.vm_capable:
7411
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7412
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
7413

    
7414
    self.secondaries = []
7415

    
7416
    # mirror node verification
7417
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7418
      if self.op.snode == pnode.name:
7419
        raise errors.OpPrereqError("The secondary node cannot be the"
7420
                                   " primary node.", errors.ECODE_INVAL)
7421
      _CheckNodeOnline(self, self.op.snode)
7422
      _CheckNodeNotDrained(self, self.op.snode)
7423
      _CheckNodeVmCapable(self, self.op.snode)
7424
      self.secondaries.append(self.op.snode)
7425

    
7426
    nodenames = [pnode.name] + self.secondaries
7427

    
7428
    if not self.adopt_disks:
7429
      # Check lv size requirements, if not adopting
7430
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7431
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7432

    
7433
    else: # instead, we must check the adoption data
7434
      all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7435
      if len(all_lvs) != len(self.disks):
7436
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7437
                                   errors.ECODE_INVAL)
7438
      for lv_name in all_lvs:
7439
        try:
7440
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7441
          # to ReserveLV uses the same syntax
7442
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7443
        except errors.ReservationError:
7444
          raise errors.OpPrereqError("LV named %s used by another instance" %
7445
                                     lv_name, errors.ECODE_NOTUNIQUE)
7446

    
7447
      vg_names = self.rpc.call_vg_list([pnode.name])
7448
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7449

    
7450
      node_lvs = self.rpc.call_lv_list([pnode.name],
7451
                                       vg_names[pnode.name].payload.keys()
7452
                                      )[pnode.name]
7453
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7454
      node_lvs = node_lvs.payload
7455

    
7456
      delta = all_lvs.difference(node_lvs.keys())
7457
      if delta:
7458
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7459
                                   utils.CommaJoin(delta),
7460
                                   errors.ECODE_INVAL)
7461
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7462
      if online_lvs:
7463
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7464
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7465
                                   errors.ECODE_STATE)
7466
      # update the size of disk based on what is found
7467
      for dsk in self.disks:
7468
        dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7469

    
7470
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7471

    
7472
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7473
    # check OS parameters (remotely)
7474
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7475

    
7476
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7477

    
7478
    # memory check on primary node
7479
    if self.op.start:
7480
      _CheckNodeFreeMemory(self, self.pnode.name,
7481
                           "creating instance %s" % self.op.instance_name,
7482
                           self.be_full[constants.BE_MEMORY],
7483
                           self.op.hypervisor)
7484

    
7485
    self.dry_run_result = list(nodenames)
7486

    
7487
  def Exec(self, feedback_fn):
7488
    """Create and add the instance to the cluster.
7489

7490
    """
7491
    instance = self.op.instance_name
7492
    pnode_name = self.pnode.name
7493

    
7494
    ht_kind = self.op.hypervisor
7495
    if ht_kind in constants.HTS_REQ_PORT:
7496
      network_port = self.cfg.AllocatePort()
7497
    else:
7498
      network_port = None
7499

    
7500
    if constants.ENABLE_FILE_STORAGE:
7501
      # this is needed because os.path.join does not accept None arguments
7502
      if self.op.file_storage_dir is None:
7503
        string_file_storage_dir = ""
7504
      else:
7505
        string_file_storage_dir = self.op.file_storage_dir
7506

    
7507
      # build the full file storage dir path
7508
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7509
                                        string_file_storage_dir, instance)
7510
    else:
7511
      file_storage_dir = ""
7512

    
7513
    disks = _GenerateDiskTemplate(self,
7514
                                  self.op.disk_template,
7515
                                  instance, pnode_name,
7516
                                  self.secondaries,
7517
                                  self.disks,
7518
                                  file_storage_dir,
7519
                                  self.op.file_driver,
7520
                                  0,
7521
                                  feedback_fn)
7522

    
7523
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7524
                            primary_node=pnode_name,
7525
                            nics=self.nics, disks=disks,
7526
                            disk_template=self.op.disk_template,
7527
                            admin_up=False,
7528
                            network_port=network_port,
7529
                            beparams=self.op.beparams,
7530
                            hvparams=self.op.hvparams,
7531
                            hypervisor=self.op.hypervisor,
7532
                            osparams=self.op.osparams,
7533
                            )
7534

    
7535
    if self.adopt_disks:
7536
      # rename LVs to the newly-generated names; we need to construct
7537
      # 'fake' LV disks with the old data, plus the new unique_id
7538
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7539
      rename_to = []
7540
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7541
        rename_to.append(t_dsk.logical_id)
7542
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7543
        self.cfg.SetDiskID(t_dsk, pnode_name)
7544
      result = self.rpc.call_blockdev_rename(pnode_name,
7545
                                             zip(tmp_disks, rename_to))
7546
      result.Raise("Failed to rename adoped LVs")
7547
    else:
7548
      feedback_fn("* creating instance disks...")
7549
      try:
7550
        _CreateDisks(self, iobj)
7551
      except errors.OpExecError:
7552
        self.LogWarning("Device creation failed, reverting...")
7553
        try:
7554
          _RemoveDisks(self, iobj)
7555
        finally:
7556
          self.cfg.ReleaseDRBDMinors(instance)
7557
          raise
7558

    
7559
      if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7560
        feedback_fn("* wiping instance disks...")
7561
        try:
7562
          _WipeDisks(self, iobj)
7563
        except errors.OpExecError:
7564
          self.LogWarning("Device wiping failed, reverting...")
7565
          try:
7566
            _RemoveDisks(self, iobj)
7567
          finally:
7568
            self.cfg.ReleaseDRBDMinors(instance)
7569
            raise
7570

    
7571
    feedback_fn("adding instance %s to cluster config" % instance)
7572

    
7573
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7574

    
7575
    # Declare that we don't want to remove the instance lock anymore, as we've
7576
    # added the instance to the config
7577
    del self.remove_locks[locking.LEVEL_INSTANCE]
7578
    # Unlock all the nodes
7579
    if self.op.mode == constants.INSTANCE_IMPORT:
7580
      nodes_keep = [self.op.src_node]
7581
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7582
                       if node != self.op.src_node]
7583
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7584
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7585
    else:
7586
      self.context.glm.release(locking.LEVEL_NODE)
7587
      del self.acquired_locks[locking.LEVEL_NODE]
7588

    
7589
    if self.op.wait_for_sync:
7590
      disk_abort = not _WaitForSync(self, iobj)
7591
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7592
      # make sure the disks are not degraded (still sync-ing is ok)
7593
      time.sleep(15)
7594
      feedback_fn("* checking mirrors status")
7595
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7596
    else:
7597
      disk_abort = False
7598

    
7599
    if disk_abort:
7600
      _RemoveDisks(self, iobj)
7601
      self.cfg.RemoveInstance(iobj.name)
7602
      # Make sure the instance lock gets removed
7603
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7604
      raise errors.OpExecError("There are some degraded disks for"
7605
                               " this instance")
7606

    
7607
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7608
      if self.op.mode == constants.INSTANCE_CREATE:
7609
        if not self.op.no_install:
7610
          feedback_fn("* running the instance OS create scripts...")
7611
          # FIXME: pass debug option from opcode to backend
7612
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7613
                                                 self.op.debug_level)
7614
          result.Raise("Could not add os for instance %s"
7615
                       " on node %s" % (instance, pnode_name))
7616

    
7617
      elif self.op.mode == constants.INSTANCE_IMPORT:
7618
        feedback_fn("* running the instance OS import scripts...")
7619

    
7620
        transfers = []
7621

    
7622
        for idx, image in enumerate(self.src_images):
7623
          if not image:
7624
            continue
7625

    
7626
          # FIXME: pass debug option from opcode to backend
7627
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7628
                                             constants.IEIO_FILE, (image, ),
7629
                                             constants.IEIO_SCRIPT,
7630
                                             (iobj.disks[idx], idx),
7631
                                             None)
7632
          transfers.append(dt)
7633

    
7634
        import_result = \
7635
          masterd.instance.TransferInstanceData(self, feedback_fn,
7636
                                                self.op.src_node, pnode_name,
7637
                                                self.pnode.secondary_ip,
7638
                                                iobj, transfers)
7639
        if not compat.all(import_result):
7640
          self.LogWarning("Some disks for instance %s on node %s were not"
7641
                          " imported successfully" % (instance, pnode_name))
7642

    
7643
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7644
        feedback_fn("* preparing remote import...")
7645
        # The source cluster will stop the instance before attempting to make a
7646
        # connection. In some cases stopping an instance can take a long time,
7647
        # hence the shutdown timeout is added to the connection timeout.
7648
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7649
                           self.op.source_shutdown_timeout)
7650
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7651

    
7652
        assert iobj.primary_node == self.pnode.name
7653
        disk_results = \
7654
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7655
                                        self.source_x509_ca,
7656
                                        self._cds, timeouts)
7657
        if not compat.all(disk_results):
7658
          # TODO: Should the instance still be started, even if some disks
7659
          # failed to import (valid for local imports, too)?
7660
          self.LogWarning("Some disks for instance %s on node %s were not"
7661
                          " imported successfully" % (instance, pnode_name))
7662

    
7663
        # Run rename script on newly imported instance
7664
        assert iobj.name == instance
7665
        feedback_fn("Running rename script for %s" % instance)
7666
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7667
                                                   self.source_instance_name,
7668
                                                   self.op.debug_level)
7669
        if result.fail_msg:
7670
          self.LogWarning("Failed to run rename script for %s on node"
7671
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7672

    
7673
      else:
7674
        # also checked in the prereq part
7675
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7676
                                     % self.op.mode)
7677

    
7678
    if self.op.start:
7679
      iobj.admin_up = True
7680
      self.cfg.Update(iobj, feedback_fn)
7681
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7682
      feedback_fn("* starting instance...")
7683
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7684
      result.Raise("Could not start instance")
7685

    
7686
    return list(iobj.all_nodes)
7687

    
7688

    
7689
class LUConnectConsole(NoHooksLU):
7690
  """Connect to an instance's console.
7691

7692
  This is somewhat special in that it returns the command line that
7693
  you need to run on the master node in order to connect to the
7694
  console.
7695

7696
  """
7697
  REQ_BGL = False
7698

    
7699
  def ExpandNames(self):
7700
    self._ExpandAndLockInstance()
7701

    
7702
  def CheckPrereq(self):
7703
    """Check prerequisites.
7704

7705
    This checks that the instance is in the cluster.
7706

7707
    """
7708
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7709
    assert self.instance is not None, \
7710
      "Cannot retrieve locked instance %s" % self.op.instance_name
7711
    _CheckNodeOnline(self, self.instance.primary_node)
7712

    
7713
  def Exec(self, feedback_fn):
7714
    """Connect to the console of an instance
7715

7716
    """
7717
    instance = self.instance
7718
    node = instance.primary_node
7719

    
7720
    node_insts = self.rpc.call_instance_list([node],
7721
                                             [instance.hypervisor])[node]
7722
    node_insts.Raise("Can't get node information from %s" % node)
7723

    
7724
    if instance.name not in node_insts.payload:
7725
      if instance.admin_up:
7726
        state = "ERROR_down"
7727
      else:
7728
        state = "ADMIN_down"
7729
      raise errors.OpExecError("Instance %s is not running (state %s)" %
7730
                               (instance.name, state))
7731

    
7732
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7733

    
7734
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7735
    cluster = self.cfg.GetClusterInfo()
7736
    # beparams and hvparams are passed separately, to avoid editing the
7737
    # instance and then saving the defaults in the instance itself.
7738
    hvparams = cluster.FillHV(instance)
7739
    beparams = cluster.FillBE(instance)
7740
    console = hyper.GetInstanceConsole(instance, hvparams, beparams)
7741

    
7742
    assert console.instance == instance.name
7743
    assert console.Validate()
7744

    
7745
    return console.ToDict()
7746

    
7747

    
7748
class LUReplaceDisks(LogicalUnit):
7749
  """Replace the disks of an instance.
7750

7751
  """
7752
  HPATH = "mirrors-replace"
7753
  HTYPE = constants.HTYPE_INSTANCE
7754
  REQ_BGL = False
7755

    
7756
  def CheckArguments(self):
7757
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7758
                                  self.op.iallocator)
7759

    
7760
  def ExpandNames(self):
7761
    self._ExpandAndLockInstance()
7762

    
7763
    if self.op.iallocator is not None:
7764
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7765

    
7766
    elif self.op.remote_node is not None:
7767
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7768
      self.op.remote_node = remote_node
7769

    
7770
      # Warning: do not remove the locking of the new secondary here
7771
      # unless DRBD8.AddChildren is changed to work in parallel;
7772
      # currently it doesn't since parallel invocations of
7773
      # FindUnusedMinor will conflict
7774
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7775
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7776

    
7777
    else:
7778
      self.needed_locks[locking.LEVEL_NODE] = []
7779
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7780

    
7781
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7782
                                   self.op.iallocator, self.op.remote_node,
7783
                                   self.op.disks, False, self.op.early_release)
7784

    
7785
    self.tasklets = [self.replacer]
7786

    
7787
  def DeclareLocks(self, level):
7788
    # If we're not already locking all nodes in the set we have to declare the
7789
    # instance's primary/secondary nodes.
7790
    if (level == locking.LEVEL_NODE and
7791
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7792
      self._LockInstancesNodes()
7793

    
7794
  def BuildHooksEnv(self):
7795
    """Build hooks env.
7796

7797
    This runs on the master, the primary and all the secondaries.
7798

7799
    """
7800
    instance = self.replacer.instance
7801
    env = {
7802
      "MODE": self.op.mode,
7803
      "NEW_SECONDARY": self.op.remote_node,
7804
      "OLD_SECONDARY": instance.secondary_nodes[0],
7805
      }
7806
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7807
    nl = [
7808
      self.cfg.GetMasterNode(),
7809
      instance.primary_node,
7810
      ]
7811
    if self.op.remote_node is not None:
7812
      nl.append(self.op.remote_node)
7813
    return env, nl, nl
7814

    
7815

    
7816
class TLReplaceDisks(Tasklet):
7817
  """Replaces disks for an instance.
7818

7819
  Note: Locking is not within the scope of this class.
7820

7821
  """
7822
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7823
               disks, delay_iallocator, early_release):
7824
    """Initializes this class.
7825

7826
    """
7827
    Tasklet.__init__(self, lu)
7828

    
7829
    # Parameters
7830
    self.instance_name = instance_name
7831
    self.mode = mode
7832
    self.iallocator_name = iallocator_name
7833
    self.remote_node = remote_node
7834
    self.disks = disks
7835
    self.delay_iallocator = delay_iallocator
7836
    self.early_release = early_release
7837

    
7838
    # Runtime data
7839
    self.instance = None
7840
    self.new_node = None
7841
    self.target_node = None
7842
    self.other_node = None
7843
    self.remote_node_info = None
7844
    self.node_secondary_ip = None
7845

    
7846
  @staticmethod
7847
  def CheckArguments(mode, remote_node, iallocator):
7848
    """Helper function for users of this class.
7849

7850
    """
7851
    # check for valid parameter combination
7852
    if mode == constants.REPLACE_DISK_CHG:
7853
      if remote_node is None and iallocator is None:
7854
        raise errors.OpPrereqError("When changing the secondary either an"
7855
                                   " iallocator script must be used or the"
7856
                                   " new node given", errors.ECODE_INVAL)
7857

    
7858
      if remote_node is not None and iallocator is not None:
7859
        raise errors.OpPrereqError("Give either the iallocator or the new"
7860
                                   " secondary, not both", errors.ECODE_INVAL)
7861

    
7862
    elif remote_node is not None or iallocator is not None:
7863
      # Not replacing the secondary
7864
      raise errors.OpPrereqError("The iallocator and new node options can"
7865
                                 " only be used when changing the"
7866
                                 " secondary node", errors.ECODE_INVAL)
7867

    
7868
  @staticmethod
7869
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7870
    """Compute a new secondary node using an IAllocator.
7871

7872
    """
7873
    ial = IAllocator(lu.cfg, lu.rpc,
7874
                     mode=constants.IALLOCATOR_MODE_RELOC,
7875
                     name=instance_name,
7876
                     relocate_from=relocate_from)
7877

    
7878
    ial.Run(iallocator_name)
7879

    
7880
    if not ial.success:
7881
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7882
                                 " %s" % (iallocator_name, ial.info),
7883
                                 errors.ECODE_NORES)
7884

    
7885
    if len(ial.result) != ial.required_nodes:
7886
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7887
                                 " of nodes (%s), required %s" %
7888
                                 (iallocator_name,
7889
                                  len(ial.result), ial.required_nodes),
7890
                                 errors.ECODE_FAULT)
7891

    
7892
    remote_node_name = ial.result[0]
7893

    
7894
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7895
               instance_name, remote_node_name)
7896

    
7897
    return remote_node_name
7898

    
7899
  def _FindFaultyDisks(self, node_name):
7900
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7901
                                    node_name, True)
7902

    
7903
  def CheckPrereq(self):
7904
    """Check prerequisites.
7905

7906
    This checks that the instance is in the cluster.
7907

7908
    """
7909
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7910
    assert instance is not None, \
7911
      "Cannot retrieve locked instance %s" % self.instance_name
7912

    
7913
    if instance.disk_template != constants.DT_DRBD8:
7914
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7915
                                 " instances", errors.ECODE_INVAL)
7916

    
7917
    if len(instance.secondary_nodes) != 1:
7918
      raise errors.OpPrereqError("The instance has a strange layout,"
7919
                                 " expected one secondary but found %d" %
7920
                                 len(instance.secondary_nodes),
7921
                                 errors.ECODE_FAULT)
7922

    
7923
    if not self.delay_iallocator:
7924
      self._CheckPrereq2()
7925

    
7926
  def _CheckPrereq2(self):
7927
    """Check prerequisites, second part.
7928

7929
    This function should always be part of CheckPrereq. It was separated and is
7930
    now called from Exec because during node evacuation iallocator was only
7931
    called with an unmodified cluster model, not taking planned changes into
7932
    account.
7933

7934
    """
7935
    instance = self.instance
7936
    secondary_node = instance.secondary_nodes[0]
7937

    
7938
    if self.iallocator_name is None:
7939
      remote_node = self.remote_node
7940
    else:
7941
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7942
                                       instance.name, instance.secondary_nodes)
7943

    
7944
    if remote_node is not None:
7945
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7946
      assert self.remote_node_info is not None, \
7947
        "Cannot retrieve locked node %s" % remote_node
7948
    else:
7949
      self.remote_node_info = None
7950

    
7951
    if remote_node == self.instance.primary_node:
7952
      raise errors.OpPrereqError("The specified node is the primary node of"
7953
                                 " the instance.", errors.ECODE_INVAL)
7954

    
7955
    if remote_node == secondary_node:
7956
      raise errors.OpPrereqError("The specified node is already the"
7957
                                 " secondary node of the instance.",
7958
                                 errors.ECODE_INVAL)
7959

    
7960
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7961
                                    constants.REPLACE_DISK_CHG):
7962
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7963
                                 errors.ECODE_INVAL)
7964

    
7965
    if self.mode == constants.REPLACE_DISK_AUTO:
7966
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7967
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7968

    
7969
      if faulty_primary and faulty_secondary:
7970
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7971
                                   " one node and can not be repaired"
7972
                                   " automatically" % self.instance_name,
7973
                                   errors.ECODE_STATE)
7974

    
7975
      if faulty_primary:
7976
        self.disks = faulty_primary
7977
        self.target_node = instance.primary_node
7978
        self.other_node = secondary_node
7979
        check_nodes = [self.target_node, self.other_node]
7980
      elif faulty_secondary:
7981
        self.disks = faulty_secondary
7982
        self.target_node = secondary_node
7983
        self.other_node = instance.primary_node
7984
        check_nodes = [self.target_node, self.other_node]
7985
      else:
7986
        self.disks = []
7987
        check_nodes = []
7988

    
7989
    else:
7990
      # Non-automatic modes
7991
      if self.mode == constants.REPLACE_DISK_PRI:
7992
        self.target_node = instance.primary_node
7993
        self.other_node = secondary_node
7994
        check_nodes = [self.target_node, self.other_node]
7995

    
7996
      elif self.mode == constants.REPLACE_DISK_SEC:
7997
        self.target_node = secondary_node
7998
        self.other_node = instance.primary_node
7999
        check_nodes = [self.target_node, self.other_node]
8000

    
8001
      elif self.mode == constants.REPLACE_DISK_CHG:
8002
        self.new_node = remote_node
8003
        self.other_node = instance.primary_node
8004
        self.target_node = secondary_node
8005
        check_nodes = [self.new_node, self.other_node]
8006

    
8007
        _CheckNodeNotDrained(self.lu, remote_node)
8008
        _CheckNodeVmCapable(self.lu, remote_node)
8009

    
8010
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
8011
        assert old_node_info is not None
8012
        if old_node_info.offline and not self.early_release:
8013
          # doesn't make sense to delay the release
8014
          self.early_release = True
8015
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8016
                          " early-release mode", secondary_node)
8017

    
8018
      else:
8019
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8020
                                     self.mode)
8021

    
8022
      # If not specified all disks should be replaced
8023
      if not self.disks:
8024
        self.disks = range(len(self.instance.disks))
8025

    
8026
    for node in check_nodes:
8027
      _CheckNodeOnline(self.lu, node)
8028

    
8029
    # Check whether disks are valid
8030
    for disk_idx in self.disks:
8031
      instance.FindDisk(disk_idx)
8032

    
8033
    # Get secondary node IP addresses
8034
    node_2nd_ip = {}
8035

    
8036
    for node_name in [self.target_node, self.other_node, self.new_node]:
8037
      if node_name is not None:
8038
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8039

    
8040
    self.node_secondary_ip = node_2nd_ip
8041

    
8042
  def Exec(self, feedback_fn):
8043
    """Execute disk replacement.
8044

8045
    This dispatches the disk replacement to the appropriate handler.
8046

8047
    """
8048
    if self.delay_iallocator:
8049
      self._CheckPrereq2()
8050

    
8051
    if not self.disks:
8052
      feedback_fn("No disks need replacement")
8053
      return
8054

    
8055
    feedback_fn("Replacing disk(s) %s for %s" %
8056
                (utils.CommaJoin(self.disks), self.instance.name))
8057

    
8058
    activate_disks = (not self.instance.admin_up)
8059

    
8060
    # Activate the instance disks if we're replacing them on a down instance
8061
    if activate_disks:
8062
      _StartInstanceDisks(self.lu, self.instance, True)
8063

    
8064
    try:
8065
      # Should we replace the secondary node?
8066
      if self.new_node is not None:
8067
        fn = self._ExecDrbd8Secondary
8068
      else:
8069
        fn = self._ExecDrbd8DiskOnly
8070

    
8071
      return fn(feedback_fn)
8072

    
8073
    finally:
8074
      # Deactivate the instance disks if we're replacing them on a
8075
      # down instance
8076
      if activate_disks:
8077
        _SafeShutdownInstanceDisks(self.lu, self.instance)
8078

    
8079
  def _CheckVolumeGroup(self, nodes):
8080
    self.lu.LogInfo("Checking volume groups")
8081

    
8082
    vgname = self.cfg.GetVGName()
8083

    
8084
    # Make sure volume group exists on all involved nodes
8085
    results = self.rpc.call_vg_list(nodes)
8086
    if not results:
8087
      raise errors.OpExecError("Can't list volume groups on the nodes")
8088

    
8089
    for node in nodes:
8090
      res = results[node]
8091
      res.Raise("Error checking node %s" % node)
8092
      if vgname not in res.payload:
8093
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
8094
                                 (vgname, node))
8095

    
8096
  def _CheckDisksExistence(self, nodes):
8097
    # Check disk existence
8098
    for idx, dev in enumerate(self.instance.disks):
8099
      if idx not in self.disks:
8100
        continue
8101

    
8102
      for node in nodes:
8103
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8104
        self.cfg.SetDiskID(dev, node)
8105

    
8106
        result = self.rpc.call_blockdev_find(node, dev)
8107

    
8108
        msg = result.fail_msg
8109
        if msg or not result.payload:
8110
          if not msg:
8111
            msg = "disk not found"
8112
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8113
                                   (idx, node, msg))
8114

    
8115
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8116
    for idx, dev in enumerate(self.instance.disks):
8117
      if idx not in self.disks:
8118
        continue
8119

    
8120
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8121
                      (idx, node_name))
8122

    
8123
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8124
                                   ldisk=ldisk):
8125
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8126
                                 " replace disks for instance %s" %
8127
                                 (node_name, self.instance.name))
8128

    
8129
  def _CreateNewStorage(self, node_name):
8130
    vgname = self.cfg.GetVGName()
8131
    iv_names = {}
8132

    
8133
    for idx, dev in enumerate(self.instance.disks):
8134
      if idx not in self.disks:
8135
        continue
8136

    
8137
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8138

    
8139
      self.cfg.SetDiskID(dev, node_name)
8140

    
8141
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8142
      names = _GenerateUniqueNames(self.lu, lv_names)
8143

    
8144
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8145
                             logical_id=(vgname, names[0]))
8146
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8147
                             logical_id=(vgname, names[1]))
8148

    
8149
      new_lvs = [lv_data, lv_meta]
8150
      old_lvs = dev.children
8151
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8152

    
8153
      # we pass force_create=True to force the LVM creation
8154
      for new_lv in new_lvs:
8155
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8156
                        _GetInstanceInfoText(self.instance), False)
8157

    
8158
    return iv_names
8159

    
8160
  def _CheckDevices(self, node_name, iv_names):
8161
    for name, (dev, _, _) in iv_names.iteritems():
8162
      self.cfg.SetDiskID(dev, node_name)
8163

    
8164
      result = self.rpc.call_blockdev_find(node_name, dev)
8165

    
8166
      msg = result.fail_msg
8167
      if msg or not result.payload:
8168
        if not msg:
8169
          msg = "disk not found"
8170
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
8171
                                 (name, msg))
8172

    
8173
      if result.payload.is_degraded:
8174
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
8175

    
8176
  def _RemoveOldStorage(self, node_name, iv_names):
8177
    for name, (_, old_lvs, _) in iv_names.iteritems():
8178
      self.lu.LogInfo("Remove logical volumes for %s" % name)
8179

    
8180
      for lv in old_lvs:
8181
        self.cfg.SetDiskID(lv, node_name)
8182

    
8183
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8184
        if msg:
8185
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
8186
                             hint="remove unused LVs manually")
8187

    
8188
  def _ReleaseNodeLock(self, node_name):
8189
    """Releases the lock for a given node."""
8190
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8191

    
8192
  def _ExecDrbd8DiskOnly(self, feedback_fn):
8193
    """Replace a disk on the primary or secondary for DRBD 8.
8194

8195
    The algorithm for replace is quite complicated:
8196

8197
      1. for each disk to be replaced:
8198

8199
        1. create new LVs on the target node with unique names
8200
        1. detach old LVs from the drbd device
8201
        1. rename old LVs to name_replaced.<time_t>
8202
        1. rename new LVs to old LVs
8203
        1. attach the new LVs (with the old names now) to the drbd device
8204

8205
      1. wait for sync across all devices
8206

8207
      1. for each modified disk:
8208

8209
        1. remove old LVs (which have the name name_replaces.<time_t>)
8210

8211
    Failures are not very well handled.
8212

8213
    """
8214
    steps_total = 6
8215

    
8216
    # Step: check device activation
8217
    self.lu.LogStep(1, steps_total, "Check device existence")
8218
    self._CheckDisksExistence([self.other_node, self.target_node])
8219
    self._CheckVolumeGroup([self.target_node, self.other_node])
8220

    
8221
    # Step: check other node consistency
8222
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8223
    self._CheckDisksConsistency(self.other_node,
8224
                                self.other_node == self.instance.primary_node,
8225
                                False)
8226

    
8227
    # Step: create new storage
8228
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8229
    iv_names = self._CreateNewStorage(self.target_node)
8230

    
8231
    # Step: for each lv, detach+rename*2+attach
8232
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8233
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8234
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8235

    
8236
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8237
                                                     old_lvs)
8238
      result.Raise("Can't detach drbd from local storage on node"
8239
                   " %s for device %s" % (self.target_node, dev.iv_name))
8240
      #dev.children = []
8241
      #cfg.Update(instance)
8242

    
8243
      # ok, we created the new LVs, so now we know we have the needed
8244
      # storage; as such, we proceed on the target node to rename
8245
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8246
      # using the assumption that logical_id == physical_id (which in
8247
      # turn is the unique_id on that node)
8248

    
8249
      # FIXME(iustin): use a better name for the replaced LVs
8250
      temp_suffix = int(time.time())
8251
      ren_fn = lambda d, suff: (d.physical_id[0],
8252
                                d.physical_id[1] + "_replaced-%s" % suff)
8253

    
8254
      # Build the rename list based on what LVs exist on the node
8255
      rename_old_to_new = []
8256
      for to_ren in old_lvs:
8257
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8258
        if not result.fail_msg and result.payload:
8259
          # device exists
8260
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8261

    
8262
      self.lu.LogInfo("Renaming the old LVs on the target node")
8263
      result = self.rpc.call_blockdev_rename(self.target_node,
8264
                                             rename_old_to_new)
8265
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
8266

    
8267
      # Now we rename the new LVs to the old LVs
8268
      self.lu.LogInfo("Renaming the new LVs on the target node")
8269
      rename_new_to_old = [(new, old.physical_id)
8270
                           for old, new in zip(old_lvs, new_lvs)]
8271
      result = self.rpc.call_blockdev_rename(self.target_node,
8272
                                             rename_new_to_old)
8273
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8274

    
8275
      for old, new in zip(old_lvs, new_lvs):
8276
        new.logical_id = old.logical_id
8277
        self.cfg.SetDiskID(new, self.target_node)
8278

    
8279
      for disk in old_lvs:
8280
        disk.logical_id = ren_fn(disk, temp_suffix)
8281
        self.cfg.SetDiskID(disk, self.target_node)
8282

    
8283
      # Now that the new lvs have the old name, we can add them to the device
8284
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8285
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8286
                                                  new_lvs)
8287
      msg = result.fail_msg
8288
      if msg:
8289
        for new_lv in new_lvs:
8290
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
8291
                                               new_lv).fail_msg
8292
          if msg2:
8293
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8294
                               hint=("cleanup manually the unused logical"
8295
                                     "volumes"))
8296
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8297

    
8298
      dev.children = new_lvs
8299

    
8300
      self.cfg.Update(self.instance, feedback_fn)
8301

    
8302
    cstep = 5
8303
    if self.early_release:
8304
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8305
      cstep += 1
8306
      self._RemoveOldStorage(self.target_node, iv_names)
8307
      # WARNING: we release both node locks here, do not do other RPCs
8308
      # than WaitForSync to the primary node
8309
      self._ReleaseNodeLock([self.target_node, self.other_node])
8310

    
8311
    # Wait for sync
8312
    # This can fail as the old devices are degraded and _WaitForSync
8313
    # does a combined result over all disks, so we don't check its return value
8314
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8315
    cstep += 1
8316
    _WaitForSync(self.lu, self.instance)
8317

    
8318
    # Check all devices manually
8319
    self._CheckDevices(self.instance.primary_node, iv_names)
8320

    
8321
    # Step: remove old storage
8322
    if not self.early_release:
8323
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8324
      cstep += 1
8325
      self._RemoveOldStorage(self.target_node, iv_names)
8326

    
8327
  def _ExecDrbd8Secondary(self, feedback_fn):
8328
    """Replace the secondary node for DRBD 8.
8329

8330
    The algorithm for replace is quite complicated:
8331
      - for all disks of the instance:
8332
        - create new LVs on the new node with same names
8333
        - shutdown the drbd device on the old secondary
8334
        - disconnect the drbd network on the primary
8335
        - create the drbd device on the new secondary
8336
        - network attach the drbd on the primary, using an artifice:
8337
          the drbd code for Attach() will connect to the network if it
8338
          finds a device which is connected to the good local disks but
8339
          not network enabled
8340
      - wait for sync across all devices
8341
      - remove all disks from the old secondary
8342

8343
    Failures are not very well handled.
8344

8345
    """
8346
    steps_total = 6
8347

    
8348
    # Step: check device activation
8349
    self.lu.LogStep(1, steps_total, "Check device existence")
8350
    self._CheckDisksExistence([self.instance.primary_node])
8351
    self._CheckVolumeGroup([self.instance.primary_node])
8352

    
8353
    # Step: check other node consistency
8354
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8355
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8356

    
8357
    # Step: create new storage
8358
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8359
    for idx, dev in enumerate(self.instance.disks):
8360
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8361
                      (self.new_node, idx))
8362
      # we pass force_create=True to force LVM creation
8363
      for new_lv in dev.children:
8364
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8365
                        _GetInstanceInfoText(self.instance), False)
8366

    
8367
    # Step 4: dbrd minors and drbd setups changes
8368
    # after this, we must manually remove the drbd minors on both the
8369
    # error and the success paths
8370
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8371
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8372
                                         for dev in self.instance.disks],
8373
                                        self.instance.name)
8374
    logging.debug("Allocated minors %r", minors)
8375

    
8376
    iv_names = {}
8377
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8378
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8379
                      (self.new_node, idx))
8380
      # create new devices on new_node; note that we create two IDs:
8381
      # one without port, so the drbd will be activated without
8382
      # networking information on the new node at this stage, and one
8383
      # with network, for the latter activation in step 4
8384
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8385
      if self.instance.primary_node == o_node1:
8386
        p_minor = o_minor1
8387
      else:
8388
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8389
        p_minor = o_minor2
8390

    
8391
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8392
                      p_minor, new_minor, o_secret)
8393
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8394
                    p_minor, new_minor, o_secret)
8395

    
8396
      iv_names[idx] = (dev, dev.children, new_net_id)
8397
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8398
                    new_net_id)
8399
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8400
                              logical_id=new_alone_id,
8401
                              children=dev.children,
8402
                              size=dev.size)
8403
      try:
8404
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8405
                              _GetInstanceInfoText(self.instance), False)
8406
      except errors.GenericError:
8407
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8408
        raise
8409

    
8410
    # We have new devices, shutdown the drbd on the old secondary
8411
    for idx, dev in enumerate(self.instance.disks):
8412
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8413
      self.cfg.SetDiskID(dev, self.target_node)
8414
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8415
      if msg:
8416
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8417
                           "node: %s" % (idx, msg),
8418
                           hint=("Please cleanup this device manually as"
8419
                                 " soon as possible"))
8420

    
8421
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8422
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8423
                                               self.node_secondary_ip,
8424
                                               self.instance.disks)\
8425
                                              [self.instance.primary_node]
8426

    
8427
    msg = result.fail_msg
8428
    if msg:
8429
      # detaches didn't succeed (unlikely)
8430
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8431
      raise errors.OpExecError("Can't detach the disks from the network on"
8432
                               " old node: %s" % (msg,))
8433

    
8434
    # if we managed to detach at least one, we update all the disks of
8435
    # the instance to point to the new secondary
8436
    self.lu.LogInfo("Updating instance configuration")
8437
    for dev, _, new_logical_id in iv_names.itervalues():
8438
      dev.logical_id = new_logical_id
8439
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8440

    
8441
    self.cfg.Update(self.instance, feedback_fn)
8442

    
8443
    # and now perform the drbd attach
8444
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8445
                    " (standalone => connected)")
8446
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8447
                                            self.new_node],
8448
                                           self.node_secondary_ip,
8449
                                           self.instance.disks,
8450
                                           self.instance.name,
8451
                                           False)
8452
    for to_node, to_result in result.items():
8453
      msg = to_result.fail_msg
8454
      if msg:
8455
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8456
                           to_node, msg,
8457
                           hint=("please do a gnt-instance info to see the"
8458
                                 " status of disks"))
8459
    cstep = 5
8460
    if self.early_release:
8461
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8462
      cstep += 1
8463
      self._RemoveOldStorage(self.target_node, iv_names)
8464
      # WARNING: we release all node locks here, do not do other RPCs
8465
      # than WaitForSync to the primary node
8466
      self._ReleaseNodeLock([self.instance.primary_node,
8467
                             self.target_node,
8468
                             self.new_node])
8469

    
8470
    # Wait for sync
8471
    # This can fail as the old devices are degraded and _WaitForSync
8472
    # does a combined result over all disks, so we don't check its return value
8473
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8474
    cstep += 1
8475
    _WaitForSync(self.lu, self.instance)
8476

    
8477
    # Check all devices manually
8478
    self._CheckDevices(self.instance.primary_node, iv_names)
8479

    
8480
    # Step: remove old storage
8481
    if not self.early_release:
8482
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8483
      self._RemoveOldStorage(self.target_node, iv_names)
8484

    
8485

    
8486
class LURepairNodeStorage(NoHooksLU):
8487
  """Repairs the volume group on a node.
8488

8489
  """
8490
  REQ_BGL = False
8491

    
8492
  def CheckArguments(self):
8493
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8494

    
8495
    storage_type = self.op.storage_type
8496

    
8497
    if (constants.SO_FIX_CONSISTENCY not in
8498
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8499
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8500
                                 " repaired" % storage_type,
8501
                                 errors.ECODE_INVAL)
8502

    
8503
  def ExpandNames(self):
8504
    self.needed_locks = {
8505
      locking.LEVEL_NODE: [self.op.node_name],
8506
      }
8507

    
8508
  def _CheckFaultyDisks(self, instance, node_name):
8509
    """Ensure faulty disks abort the opcode or at least warn."""
8510
    try:
8511
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8512
                                  node_name, True):
8513
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8514
                                   " node '%s'" % (instance.name, node_name),
8515
                                   errors.ECODE_STATE)
8516
    except errors.OpPrereqError, err:
8517
      if self.op.ignore_consistency:
8518
        self.proc.LogWarning(str(err.args[0]))
8519
      else:
8520
        raise
8521

    
8522
  def CheckPrereq(self):
8523
    """Check prerequisites.
8524

8525
    """
8526
    # Check whether any instance on this node has faulty disks
8527
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8528
      if not inst.admin_up:
8529
        continue
8530
      check_nodes = set(inst.all_nodes)
8531
      check_nodes.discard(self.op.node_name)
8532
      for inst_node_name in check_nodes:
8533
        self._CheckFaultyDisks(inst, inst_node_name)
8534

    
8535
  def Exec(self, feedback_fn):
8536
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8537
                (self.op.name, self.op.node_name))
8538

    
8539
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8540
    result = self.rpc.call_storage_execute(self.op.node_name,
8541
                                           self.op.storage_type, st_args,
8542
                                           self.op.name,
8543
                                           constants.SO_FIX_CONSISTENCY)
8544
    result.Raise("Failed to repair storage unit '%s' on %s" %
8545
                 (self.op.name, self.op.node_name))
8546

    
8547

    
8548
class LUNodeEvacuationStrategy(NoHooksLU):
8549
  """Computes the node evacuation strategy.
8550

8551
  """
8552
  REQ_BGL = False
8553

    
8554
  def CheckArguments(self):
8555
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8556

    
8557
  def ExpandNames(self):
8558
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8559
    self.needed_locks = locks = {}
8560
    if self.op.remote_node is None:
8561
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8562
    else:
8563
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8564
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8565

    
8566
  def Exec(self, feedback_fn):
8567
    if self.op.remote_node is not None:
8568
      instances = []
8569
      for node in self.op.nodes:
8570
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8571
      result = []
8572
      for i in instances:
8573
        if i.primary_node == self.op.remote_node:
8574
          raise errors.OpPrereqError("Node %s is the primary node of"
8575
                                     " instance %s, cannot use it as"
8576
                                     " secondary" %
8577
                                     (self.op.remote_node, i.name),
8578
                                     errors.ECODE_INVAL)
8579
        result.append([i.name, self.op.remote_node])
8580
    else:
8581
      ial = IAllocator(self.cfg, self.rpc,
8582
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8583
                       evac_nodes=self.op.nodes)
8584
      ial.Run(self.op.iallocator, validate=True)
8585
      if not ial.success:
8586
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8587
                                 errors.ECODE_NORES)
8588
      result = ial.result
8589
    return result
8590

    
8591

    
8592
class LUGrowDisk(LogicalUnit):
8593
  """Grow a disk of an instance.
8594

8595
  """
8596
  HPATH = "disk-grow"
8597
  HTYPE = constants.HTYPE_INSTANCE
8598
  REQ_BGL = False
8599

    
8600
  def ExpandNames(self):
8601
    self._ExpandAndLockInstance()
8602
    self.needed_locks[locking.LEVEL_NODE] = []
8603
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8604

    
8605
  def DeclareLocks(self, level):
8606
    if level == locking.LEVEL_NODE:
8607
      self._LockInstancesNodes()
8608

    
8609
  def BuildHooksEnv(self):
8610
    """Build hooks env.
8611

8612
    This runs on the master, the primary and all the secondaries.
8613

8614
    """
8615
    env = {
8616
      "DISK": self.op.disk,
8617
      "AMOUNT": self.op.amount,
8618
      }
8619
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8620
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8621
    return env, nl, nl
8622

    
8623
  def CheckPrereq(self):
8624
    """Check prerequisites.
8625

8626
    This checks that the instance is in the cluster.
8627

8628
    """
8629
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8630
    assert instance is not None, \
8631
      "Cannot retrieve locked instance %s" % self.op.instance_name
8632
    nodenames = list(instance.all_nodes)
8633
    for node in nodenames:
8634
      _CheckNodeOnline(self, node)
8635

    
8636
    self.instance = instance
8637

    
8638
    if instance.disk_template not in constants.DTS_GROWABLE:
8639
      raise errors.OpPrereqError("Instance's disk layout does not support"
8640
                                 " growing.", errors.ECODE_INVAL)
8641

    
8642
    self.disk = instance.FindDisk(self.op.disk)
8643

    
8644
    if instance.disk_template != constants.DT_FILE:
8645
      # TODO: check the free disk space for file, when that feature
8646
      # will be supported
8647
      _CheckNodesFreeDiskPerVG(self, nodenames,
8648
                               self.disk.ComputeGrowth(self.op.amount))
8649

    
8650
  def Exec(self, feedback_fn):
8651
    """Execute disk grow.
8652

8653
    """
8654
    instance = self.instance
8655
    disk = self.disk
8656

    
8657
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8658
    if not disks_ok:
8659
      raise errors.OpExecError("Cannot activate block device to grow")
8660

    
8661
    for node in instance.all_nodes:
8662
      self.cfg.SetDiskID(disk, node)
8663
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8664
      result.Raise("Grow request failed to node %s" % node)
8665

    
8666
      # TODO: Rewrite code to work properly
8667
      # DRBD goes into sync mode for a short amount of time after executing the
8668
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8669
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8670
      # time is a work-around.
8671
      time.sleep(5)
8672

    
8673
    disk.RecordGrow(self.op.amount)
8674
    self.cfg.Update(instance, feedback_fn)
8675
    if self.op.wait_for_sync:
8676
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8677
      if disk_abort:
8678
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8679
                             " status.\nPlease check the instance.")
8680
      if not instance.admin_up:
8681
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8682
    elif not instance.admin_up:
8683
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8684
                           " not supposed to be running because no wait for"
8685
                           " sync mode was requested.")
8686

    
8687

    
8688
class LUQueryInstanceData(NoHooksLU):
8689
  """Query runtime instance data.
8690

8691
  """
8692
  REQ_BGL = False
8693

    
8694
  def ExpandNames(self):
8695
    self.needed_locks = {}
8696
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8697

    
8698
    if self.op.instances:
8699
      self.wanted_names = []
8700
      for name in self.op.instances:
8701
        full_name = _ExpandInstanceName(self.cfg, name)
8702
        self.wanted_names.append(full_name)
8703
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8704
    else:
8705
      self.wanted_names = None
8706
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8707

    
8708
    self.needed_locks[locking.LEVEL_NODE] = []
8709
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8710

    
8711
  def DeclareLocks(self, level):
8712
    if level == locking.LEVEL_NODE:
8713
      self._LockInstancesNodes()
8714

    
8715
  def CheckPrereq(self):
8716
    """Check prerequisites.
8717

8718
    This only checks the optional instance list against the existing names.
8719

8720
    """
8721
    if self.wanted_names is None:
8722
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8723

    
8724
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8725
                             in self.wanted_names]
8726

    
8727
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8728
    """Returns the status of a block device
8729

8730
    """
8731
    if self.op.static or not node:
8732
      return None
8733

    
8734
    self.cfg.SetDiskID(dev, node)
8735

    
8736
    result = self.rpc.call_blockdev_find(node, dev)
8737
    if result.offline:
8738
      return None
8739

    
8740
    result.Raise("Can't compute disk status for %s" % instance_name)
8741

    
8742
    status = result.payload
8743
    if status is None:
8744
      return None
8745

    
8746
    return (status.dev_path, status.major, status.minor,
8747
            status.sync_percent, status.estimated_time,
8748
            status.is_degraded, status.ldisk_status)
8749

    
8750
  def _ComputeDiskStatus(self, instance, snode, dev):
8751
    """Compute block device status.
8752

8753
    """
8754
    if dev.dev_type in constants.LDS_DRBD:
8755
      # we change the snode then (otherwise we use the one passed in)
8756
      if dev.logical_id[0] == instance.primary_node:
8757
        snode = dev.logical_id[1]
8758
      else:
8759
        snode = dev.logical_id[0]
8760

    
8761
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8762
                                              instance.name, dev)
8763
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8764

    
8765
    if dev.children:
8766
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8767
                      for child in dev.children]
8768
    else:
8769
      dev_children = []
8770

    
8771
    data = {
8772
      "iv_name": dev.iv_name,
8773
      "dev_type": dev.dev_type,
8774
      "logical_id": dev.logical_id,
8775
      "physical_id": dev.physical_id,
8776
      "pstatus": dev_pstatus,
8777
      "sstatus": dev_sstatus,
8778
      "children": dev_children,
8779
      "mode": dev.mode,
8780
      "size": dev.size,
8781
      }
8782

    
8783
    return data
8784

    
8785
  def Exec(self, feedback_fn):
8786
    """Gather and return data"""
8787
    result = {}
8788

    
8789
    cluster = self.cfg.GetClusterInfo()
8790

    
8791
    for instance in self.wanted_instances:
8792
      if not self.op.static:
8793
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8794
                                                  instance.name,
8795
                                                  instance.hypervisor)
8796
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8797
        remote_info = remote_info.payload
8798
        if remote_info and "state" in remote_info:
8799
          remote_state = "up"
8800
        else:
8801
          remote_state = "down"
8802
      else:
8803
        remote_state = None
8804
      if instance.admin_up:
8805
        config_state = "up"
8806
      else:
8807
        config_state = "down"
8808

    
8809
      disks = [self._ComputeDiskStatus(instance, None, device)
8810
               for device in instance.disks]
8811

    
8812
      idict = {
8813
        "name": instance.name,
8814
        "config_state": config_state,
8815
        "run_state": remote_state,
8816
        "pnode": instance.primary_node,
8817
        "snodes": instance.secondary_nodes,
8818
        "os": instance.os,
8819
        # this happens to be the same format used for hooks
8820
        "nics": _NICListToTuple(self, instance.nics),
8821
        "disk_template": instance.disk_template,
8822
        "disks": disks,
8823
        "hypervisor": instance.hypervisor,
8824
        "network_port": instance.network_port,
8825
        "hv_instance": instance.hvparams,
8826
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8827
        "be_instance": instance.beparams,
8828
        "be_actual": cluster.FillBE(instance),
8829
        "os_instance": instance.osparams,
8830
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8831
        "serial_no": instance.serial_no,
8832
        "mtime": instance.mtime,
8833
        "ctime": instance.ctime,
8834
        "uuid": instance.uuid,
8835
        }
8836

    
8837
      result[instance.name] = idict
8838

    
8839
    return result
8840

    
8841

    
8842
class LUSetInstanceParams(LogicalUnit):
8843
  """Modifies an instances's parameters.
8844

8845
  """
8846
  HPATH = "instance-modify"
8847
  HTYPE = constants.HTYPE_INSTANCE
8848
  REQ_BGL = False
8849

    
8850
  def CheckArguments(self):
8851
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8852
            self.op.hvparams or self.op.beparams or self.op.os_name):
8853
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8854

    
8855
    if self.op.hvparams:
8856
      _CheckGlobalHvParams(self.op.hvparams)
8857

    
8858
    # Disk validation
8859
    disk_addremove = 0
8860
    for disk_op, disk_dict in self.op.disks:
8861
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8862
      if disk_op == constants.DDM_REMOVE:
8863
        disk_addremove += 1
8864
        continue
8865
      elif disk_op == constants.DDM_ADD:
8866
        disk_addremove += 1
8867
      else:
8868
        if not isinstance(disk_op, int):
8869
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8870
        if not isinstance(disk_dict, dict):
8871
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8872
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8873

    
8874
      if disk_op == constants.DDM_ADD:
8875
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8876
        if mode not in constants.DISK_ACCESS_SET:
8877
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8878
                                     errors.ECODE_INVAL)
8879
        size = disk_dict.get('size', None)
8880
        if size is None:
8881
          raise errors.OpPrereqError("Required disk parameter size missing",
8882
                                     errors.ECODE_INVAL)
8883
        try:
8884
          size = int(size)
8885
        except (TypeError, ValueError), err:
8886
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8887
                                     str(err), errors.ECODE_INVAL)
8888
        disk_dict['size'] = size
8889
      else:
8890
        # modification of disk
8891
        if 'size' in disk_dict:
8892
          raise errors.OpPrereqError("Disk size change not possible, use"
8893
                                     " grow-disk", errors.ECODE_INVAL)
8894

    
8895
    if disk_addremove > 1:
8896
      raise errors.OpPrereqError("Only one disk add or remove operation"
8897
                                 " supported at a time", errors.ECODE_INVAL)
8898

    
8899
    if self.op.disks and self.op.disk_template is not None:
8900
      raise errors.OpPrereqError("Disk template conversion and other disk"
8901
                                 " changes not supported at the same time",
8902
                                 errors.ECODE_INVAL)
8903

    
8904
    if (self.op.disk_template and
8905
        self.op.disk_template in constants.DTS_NET_MIRROR and
8906
        self.op.remote_node is None):
8907
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
8908
                                 " one requires specifying a secondary node",
8909
                                 errors.ECODE_INVAL)
8910

    
8911
    # NIC validation
8912
    nic_addremove = 0
8913
    for nic_op, nic_dict in self.op.nics:
8914
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8915
      if nic_op == constants.DDM_REMOVE:
8916
        nic_addremove += 1
8917
        continue
8918
      elif nic_op == constants.DDM_ADD:
8919
        nic_addremove += 1
8920
      else:
8921
        if not isinstance(nic_op, int):
8922
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8923
        if not isinstance(nic_dict, dict):
8924
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8925
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8926

    
8927
      # nic_dict should be a dict
8928
      nic_ip = nic_dict.get('ip', None)
8929
      if nic_ip is not None:
8930
        if nic_ip.lower() == constants.VALUE_NONE:
8931
          nic_dict['ip'] = None
8932
        else:
8933
          if not netutils.IPAddress.IsValid(nic_ip):
8934
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8935
                                       errors.ECODE_INVAL)
8936

    
8937
      nic_bridge = nic_dict.get('bridge', None)
8938
      nic_link = nic_dict.get('link', None)
8939
      if nic_bridge and nic_link:
8940
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8941
                                   " at the same time", errors.ECODE_INVAL)
8942
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8943
        nic_dict['bridge'] = None
8944
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8945
        nic_dict['link'] = None
8946

    
8947
      if nic_op == constants.DDM_ADD:
8948
        nic_mac = nic_dict.get('mac', None)
8949
        if nic_mac is None:
8950
          nic_dict['mac'] = constants.VALUE_AUTO
8951

    
8952
      if 'mac' in nic_dict:
8953
        nic_mac = nic_dict['mac']
8954
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8955
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8956

    
8957
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8958
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8959
                                     " modifying an existing nic",
8960
                                     errors.ECODE_INVAL)
8961

    
8962
    if nic_addremove > 1:
8963
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8964
                                 " supported at a time", errors.ECODE_INVAL)
8965

    
8966
  def ExpandNames(self):
8967
    self._ExpandAndLockInstance()
8968
    self.needed_locks[locking.LEVEL_NODE] = []
8969
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8970

    
8971
  def DeclareLocks(self, level):
8972
    if level == locking.LEVEL_NODE:
8973
      self._LockInstancesNodes()
8974
      if self.op.disk_template and self.op.remote_node:
8975
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8976
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8977

    
8978
  def BuildHooksEnv(self):
8979
    """Build hooks env.
8980

8981
    This runs on the master, primary and secondaries.
8982

8983
    """
8984
    args = dict()
8985
    if constants.BE_MEMORY in self.be_new:
8986
      args['memory'] = self.be_new[constants.BE_MEMORY]
8987
    if constants.BE_VCPUS in self.be_new:
8988
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8989
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8990
    # information at all.
8991
    if self.op.nics:
8992
      args['nics'] = []
8993
      nic_override = dict(self.op.nics)
8994
      for idx, nic in enumerate(self.instance.nics):
8995
        if idx in nic_override:
8996
          this_nic_override = nic_override[idx]
8997
        else:
8998
          this_nic_override = {}
8999
        if 'ip' in this_nic_override:
9000
          ip = this_nic_override['ip']
9001
        else:
9002
          ip = nic.ip
9003
        if 'mac' in this_nic_override:
9004
          mac = this_nic_override['mac']
9005
        else:
9006
          mac = nic.mac
9007
        if idx in self.nic_pnew:
9008
          nicparams = self.nic_pnew[idx]
9009
        else:
9010
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9011
        mode = nicparams[constants.NIC_MODE]
9012
        link = nicparams[constants.NIC_LINK]
9013
        args['nics'].append((ip, mac, mode, link))
9014
      if constants.DDM_ADD in nic_override:
9015
        ip = nic_override[constants.DDM_ADD].get('ip', None)
9016
        mac = nic_override[constants.DDM_ADD]['mac']
9017
        nicparams = self.nic_pnew[constants.DDM_ADD]
9018
        mode = nicparams[constants.NIC_MODE]
9019
        link = nicparams[constants.NIC_LINK]
9020
        args['nics'].append((ip, mac, mode, link))
9021
      elif constants.DDM_REMOVE in nic_override:
9022
        del args['nics'][-1]
9023

    
9024
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9025
    if self.op.disk_template:
9026
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9027
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9028
    return env, nl, nl
9029

    
9030
  def CheckPrereq(self):
9031
    """Check prerequisites.
9032

9033
    This only checks the instance list against the existing names.
9034

9035
    """
9036
    # checking the new params on the primary/secondary nodes
9037

    
9038
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9039
    cluster = self.cluster = self.cfg.GetClusterInfo()
9040
    assert self.instance is not None, \
9041
      "Cannot retrieve locked instance %s" % self.op.instance_name
9042
    pnode = instance.primary_node
9043
    nodelist = list(instance.all_nodes)
9044

    
9045
    # OS change
9046
    if self.op.os_name and not self.op.force:
9047
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9048
                      self.op.force_variant)
9049
      instance_os = self.op.os_name
9050
    else:
9051
      instance_os = instance.os
9052

    
9053
    if self.op.disk_template:
9054
      if instance.disk_template == self.op.disk_template:
9055
        raise errors.OpPrereqError("Instance already has disk template %s" %
9056
                                   instance.disk_template, errors.ECODE_INVAL)
9057

    
9058
      if (instance.disk_template,
9059
          self.op.disk_template) not in self._DISK_CONVERSIONS:
9060
        raise errors.OpPrereqError("Unsupported disk template conversion from"
9061
                                   " %s to %s" % (instance.disk_template,
9062
                                                  self.op.disk_template),
9063
                                   errors.ECODE_INVAL)
9064
      _CheckInstanceDown(self, instance, "cannot change disk template")
9065
      if self.op.disk_template in constants.DTS_NET_MIRROR:
9066
        if self.op.remote_node == pnode:
9067
          raise errors.OpPrereqError("Given new secondary node %s is the same"
9068
                                     " as the primary node of the instance" %
9069
                                     self.op.remote_node, errors.ECODE_STATE)
9070
        _CheckNodeOnline(self, self.op.remote_node)
9071
        _CheckNodeNotDrained(self, self.op.remote_node)
9072
        # FIXME: here we assume that the old instance type is DT_PLAIN
9073
        assert instance.disk_template == constants.DT_PLAIN
9074
        disks = [{"size": d.size, "vg": d.logical_id[0]}
9075
                 for d in instance.disks]
9076
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9077
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9078

    
9079
    # hvparams processing
9080
    if self.op.hvparams:
9081
      hv_type = instance.hypervisor
9082
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9083
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9084
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9085

    
9086
      # local check
9087
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9088
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9089
      self.hv_new = hv_new # the new actual values
9090
      self.hv_inst = i_hvdict # the new dict (without defaults)
9091
    else:
9092
      self.hv_new = self.hv_inst = {}
9093

    
9094
    # beparams processing
9095
    if self.op.beparams:
9096
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9097
                                   use_none=True)
9098
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9099
      be_new = cluster.SimpleFillBE(i_bedict)
9100
      self.be_new = be_new # the new actual values
9101
      self.be_inst = i_bedict # the new dict (without defaults)
9102
    else:
9103
      self.be_new = self.be_inst = {}
9104

    
9105
    # osparams processing
9106
    if self.op.osparams:
9107
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9108
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9109
      self.os_inst = i_osdict # the new dict (without defaults)
9110
    else:
9111
      self.os_inst = {}
9112

    
9113
    self.warn = []
9114

    
9115
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9116
      mem_check_list = [pnode]
9117
      if be_new[constants.BE_AUTO_BALANCE]:
9118
        # either we changed auto_balance to yes or it was from before
9119
        mem_check_list.extend(instance.secondary_nodes)
9120
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9121
                                                  instance.hypervisor)
9122
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9123
                                         instance.hypervisor)
9124
      pninfo = nodeinfo[pnode]
9125
      msg = pninfo.fail_msg
9126
      if msg:
9127
        # Assume the primary node is unreachable and go ahead
9128
        self.warn.append("Can't get info from primary node %s: %s" %
9129
                         (pnode,  msg))
9130
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
9131
        self.warn.append("Node data from primary node %s doesn't contain"
9132
                         " free memory information" % pnode)
9133
      elif instance_info.fail_msg:
9134
        self.warn.append("Can't get instance runtime information: %s" %
9135
                        instance_info.fail_msg)
9136
      else:
9137
        if instance_info.payload:
9138
          current_mem = int(instance_info.payload['memory'])
9139
        else:
9140
          # Assume instance not running
9141
          # (there is a slight race condition here, but it's not very probable,
9142
          # and we have no other way to check)
9143
          current_mem = 0
9144
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9145
                    pninfo.payload['memory_free'])
9146
        if miss_mem > 0:
9147
          raise errors.OpPrereqError("This change will prevent the instance"
9148
                                     " from starting, due to %d MB of memory"
9149
                                     " missing on its primary node" % miss_mem,
9150
                                     errors.ECODE_NORES)
9151

    
9152
      if be_new[constants.BE_AUTO_BALANCE]:
9153
        for node, nres in nodeinfo.items():
9154
          if node not in instance.secondary_nodes:
9155
            continue
9156
          msg = nres.fail_msg
9157
          if msg:
9158
            self.warn.append("Can't get info from secondary node %s: %s" %
9159
                             (node, msg))
9160
          elif not isinstance(nres.payload.get('memory_free', None), int):
9161
            self.warn.append("Secondary node %s didn't return free"
9162
                             " memory information" % node)
9163
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9164
            self.warn.append("Not enough memory to failover instance to"
9165
                             " secondary node %s" % node)
9166

    
9167
    # NIC processing
9168
    self.nic_pnew = {}
9169
    self.nic_pinst = {}
9170
    for nic_op, nic_dict in self.op.nics:
9171
      if nic_op == constants.DDM_REMOVE:
9172
        if not instance.nics:
9173
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9174
                                     errors.ECODE_INVAL)
9175
        continue
9176
      if nic_op != constants.DDM_ADD:
9177
        # an existing nic
9178
        if not instance.nics:
9179
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9180
                                     " no NICs" % nic_op,
9181
                                     errors.ECODE_INVAL)
9182
        if nic_op < 0 or nic_op >= len(instance.nics):
9183
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9184
                                     " are 0 to %d" %
9185
                                     (nic_op, len(instance.nics) - 1),
9186
                                     errors.ECODE_INVAL)
9187
        old_nic_params = instance.nics[nic_op].nicparams
9188
        old_nic_ip = instance.nics[nic_op].ip
9189
      else:
9190
        old_nic_params = {}
9191
        old_nic_ip = None
9192

    
9193
      update_params_dict = dict([(key, nic_dict[key])
9194
                                 for key in constants.NICS_PARAMETERS
9195
                                 if key in nic_dict])
9196

    
9197
      if 'bridge' in nic_dict:
9198
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9199

    
9200
      new_nic_params = _GetUpdatedParams(old_nic_params,
9201
                                         update_params_dict)
9202
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9203
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9204
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9205
      self.nic_pinst[nic_op] = new_nic_params
9206
      self.nic_pnew[nic_op] = new_filled_nic_params
9207
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9208

    
9209
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
9210
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9211
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9212
        if msg:
9213
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9214
          if self.op.force:
9215
            self.warn.append(msg)
9216
          else:
9217
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9218
      if new_nic_mode == constants.NIC_MODE_ROUTED:
9219
        if 'ip' in nic_dict:
9220
          nic_ip = nic_dict['ip']
9221
        else:
9222
          nic_ip = old_nic_ip
9223
        if nic_ip is None:
9224
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9225
                                     ' on a routed nic', errors.ECODE_INVAL)
9226
      if 'mac' in nic_dict:
9227
        nic_mac = nic_dict['mac']
9228
        if nic_mac is None:
9229
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9230
                                     errors.ECODE_INVAL)
9231
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9232
          # otherwise generate the mac
9233
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9234
        else:
9235
          # or validate/reserve the current one
9236
          try:
9237
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9238
          except errors.ReservationError:
9239
            raise errors.OpPrereqError("MAC address %s already in use"
9240
                                       " in cluster" % nic_mac,
9241
                                       errors.ECODE_NOTUNIQUE)
9242

    
9243
    # DISK processing
9244
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9245
      raise errors.OpPrereqError("Disk operations not supported for"
9246
                                 " diskless instances",
9247
                                 errors.ECODE_INVAL)
9248
    for disk_op, _ in self.op.disks:
9249
      if disk_op == constants.DDM_REMOVE:
9250
        if len(instance.disks) == 1:
9251
          raise errors.OpPrereqError("Cannot remove the last disk of"
9252
                                     " an instance", errors.ECODE_INVAL)
9253
        _CheckInstanceDown(self, instance, "cannot remove disks")
9254

    
9255
      if (disk_op == constants.DDM_ADD and
9256
          len(instance.nics) >= constants.MAX_DISKS):
9257
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9258
                                   " add more" % constants.MAX_DISKS,
9259
                                   errors.ECODE_STATE)
9260
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9261
        # an existing disk
9262
        if disk_op < 0 or disk_op >= len(instance.disks):
9263
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
9264
                                     " are 0 to %d" %
9265
                                     (disk_op, len(instance.disks)),
9266
                                     errors.ECODE_INVAL)
9267

    
9268
    return
9269

    
9270
  def _ConvertPlainToDrbd(self, feedback_fn):
9271
    """Converts an instance from plain to drbd.
9272

9273
    """
9274
    feedback_fn("Converting template to drbd")
9275
    instance = self.instance
9276
    pnode = instance.primary_node
9277
    snode = self.op.remote_node
9278

    
9279
    # create a fake disk info for _GenerateDiskTemplate
9280
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9281
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9282
                                      instance.name, pnode, [snode],
9283
                                      disk_info, None, None, 0, feedback_fn)
9284
    info = _GetInstanceInfoText(instance)
9285
    feedback_fn("Creating aditional volumes...")
9286
    # first, create the missing data and meta devices
9287
    for disk in new_disks:
9288
      # unfortunately this is... not too nice
9289
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9290
                            info, True)
9291
      for child in disk.children:
9292
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
9293
    # at this stage, all new LVs have been created, we can rename the
9294
    # old ones
9295
    feedback_fn("Renaming original volumes...")
9296
    rename_list = [(o, n.children[0].logical_id)
9297
                   for (o, n) in zip(instance.disks, new_disks)]
9298
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
9299
    result.Raise("Failed to rename original LVs")
9300

    
9301
    feedback_fn("Initializing DRBD devices...")
9302
    # all child devices are in place, we can now create the DRBD devices
9303
    for disk in new_disks:
9304
      for node in [pnode, snode]:
9305
        f_create = node == pnode
9306
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9307

    
9308
    # at this point, the instance has been modified
9309
    instance.disk_template = constants.DT_DRBD8
9310
    instance.disks = new_disks
9311
    self.cfg.Update(instance, feedback_fn)
9312

    
9313
    # disks are created, waiting for sync
9314
    disk_abort = not _WaitForSync(self, instance)
9315
    if disk_abort:
9316
      raise errors.OpExecError("There are some degraded disks for"
9317
                               " this instance, please cleanup manually")
9318

    
9319
  def _ConvertDrbdToPlain(self, feedback_fn):
9320
    """Converts an instance from drbd to plain.
9321

9322
    """
9323
    instance = self.instance
9324
    assert len(instance.secondary_nodes) == 1
9325
    pnode = instance.primary_node
9326
    snode = instance.secondary_nodes[0]
9327
    feedback_fn("Converting template to plain")
9328

    
9329
    old_disks = instance.disks
9330
    new_disks = [d.children[0] for d in old_disks]
9331

    
9332
    # copy over size and mode
9333
    for parent, child in zip(old_disks, new_disks):
9334
      child.size = parent.size
9335
      child.mode = parent.mode
9336

    
9337
    # update instance structure
9338
    instance.disks = new_disks
9339
    instance.disk_template = constants.DT_PLAIN
9340
    self.cfg.Update(instance, feedback_fn)
9341

    
9342
    feedback_fn("Removing volumes on the secondary node...")
9343
    for disk in old_disks:
9344
      self.cfg.SetDiskID(disk, snode)
9345
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9346
      if msg:
9347
        self.LogWarning("Could not remove block device %s on node %s,"
9348
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9349

    
9350
    feedback_fn("Removing unneeded volumes on the primary node...")
9351
    for idx, disk in enumerate(old_disks):
9352
      meta = disk.children[1]
9353
      self.cfg.SetDiskID(meta, pnode)
9354
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9355
      if msg:
9356
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9357
                        " continuing anyway: %s", idx, pnode, msg)
9358

    
9359
  def Exec(self, feedback_fn):
9360
    """Modifies an instance.
9361

9362
    All parameters take effect only at the next restart of the instance.
9363

9364
    """
9365
    # Process here the warnings from CheckPrereq, as we don't have a
9366
    # feedback_fn there.
9367
    for warn in self.warn:
9368
      feedback_fn("WARNING: %s" % warn)
9369

    
9370
    result = []
9371
    instance = self.instance
9372
    # disk changes
9373
    for disk_op, disk_dict in self.op.disks:
9374
      if disk_op == constants.DDM_REMOVE:
9375
        # remove the last disk
9376
        device = instance.disks.pop()
9377
        device_idx = len(instance.disks)
9378
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9379
          self.cfg.SetDiskID(disk, node)
9380
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9381
          if msg:
9382
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9383
                            " continuing anyway", device_idx, node, msg)
9384
        result.append(("disk/%d" % device_idx, "remove"))
9385
      elif disk_op == constants.DDM_ADD:
9386
        # add a new disk
9387
        if instance.disk_template == constants.DT_FILE:
9388
          file_driver, file_path = instance.disks[0].logical_id
9389
          file_path = os.path.dirname(file_path)
9390
        else:
9391
          file_driver = file_path = None
9392
        disk_idx_base = len(instance.disks)
9393
        new_disk = _GenerateDiskTemplate(self,
9394
                                         instance.disk_template,
9395
                                         instance.name, instance.primary_node,
9396
                                         instance.secondary_nodes,
9397
                                         [disk_dict],
9398
                                         file_path,
9399
                                         file_driver,
9400
                                         disk_idx_base, feedback_fn)[0]
9401
        instance.disks.append(new_disk)
9402
        info = _GetInstanceInfoText(instance)
9403

    
9404
        logging.info("Creating volume %s for instance %s",
9405
                     new_disk.iv_name, instance.name)
9406
        # Note: this needs to be kept in sync with _CreateDisks
9407
        #HARDCODE
9408
        for node in instance.all_nodes:
9409
          f_create = node == instance.primary_node
9410
          try:
9411
            _CreateBlockDev(self, node, instance, new_disk,
9412
                            f_create, info, f_create)
9413
          except errors.OpExecError, err:
9414
            self.LogWarning("Failed to create volume %s (%s) on"
9415
                            " node %s: %s",
9416
                            new_disk.iv_name, new_disk, node, err)
9417
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9418
                       (new_disk.size, new_disk.mode)))
9419
      else:
9420
        # change a given disk
9421
        instance.disks[disk_op].mode = disk_dict['mode']
9422
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9423

    
9424
    if self.op.disk_template:
9425
      r_shut = _ShutdownInstanceDisks(self, instance)
9426
      if not r_shut:
9427
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9428
                                 " proceed with disk template conversion")
9429
      mode = (instance.disk_template, self.op.disk_template)
9430
      try:
9431
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9432
      except:
9433
        self.cfg.ReleaseDRBDMinors(instance.name)
9434
        raise
9435
      result.append(("disk_template", self.op.disk_template))
9436

    
9437
    # NIC changes
9438
    for nic_op, nic_dict in self.op.nics:
9439
      if nic_op == constants.DDM_REMOVE:
9440
        # remove the last nic
9441
        del instance.nics[-1]
9442
        result.append(("nic.%d" % len(instance.nics), "remove"))
9443
      elif nic_op == constants.DDM_ADD:
9444
        # mac and bridge should be set, by now
9445
        mac = nic_dict['mac']
9446
        ip = nic_dict.get('ip', None)
9447
        nicparams = self.nic_pinst[constants.DDM_ADD]
9448
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9449
        instance.nics.append(new_nic)
9450
        result.append(("nic.%d" % (len(instance.nics) - 1),
9451
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9452
                       (new_nic.mac, new_nic.ip,
9453
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9454
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9455
                       )))
9456
      else:
9457
        for key in 'mac', 'ip':
9458
          if key in nic_dict:
9459
            setattr(instance.nics[nic_op], key, nic_dict[key])
9460
        if nic_op in self.nic_pinst:
9461
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9462
        for key, val in nic_dict.iteritems():
9463
          result.append(("nic.%s/%d" % (key, nic_op), val))
9464

    
9465
    # hvparams changes
9466
    if self.op.hvparams:
9467
      instance.hvparams = self.hv_inst
9468
      for key, val in self.op.hvparams.iteritems():
9469
        result.append(("hv/%s" % key, val))
9470

    
9471
    # beparams changes
9472
    if self.op.beparams:
9473
      instance.beparams = self.be_inst
9474
      for key, val in self.op.beparams.iteritems():
9475
        result.append(("be/%s" % key, val))
9476

    
9477
    # OS change
9478
    if self.op.os_name:
9479
      instance.os = self.op.os_name
9480

    
9481
    # osparams changes
9482
    if self.op.osparams:
9483
      instance.osparams = self.os_inst
9484
      for key, val in self.op.osparams.iteritems():
9485
        result.append(("os/%s" % key, val))
9486

    
9487
    self.cfg.Update(instance, feedback_fn)
9488

    
9489
    return result
9490

    
9491
  _DISK_CONVERSIONS = {
9492
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9493
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9494
    }
9495

    
9496

    
9497
class LUQueryExports(NoHooksLU):
9498
  """Query the exports list
9499

9500
  """
9501
  REQ_BGL = False
9502

    
9503
  def ExpandNames(self):
9504
    self.needed_locks = {}
9505
    self.share_locks[locking.LEVEL_NODE] = 1
9506
    if not self.op.nodes:
9507
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9508
    else:
9509
      self.needed_locks[locking.LEVEL_NODE] = \
9510
        _GetWantedNodes(self, self.op.nodes)
9511

    
9512
  def Exec(self, feedback_fn):
9513
    """Compute the list of all the exported system images.
9514

9515
    @rtype: dict
9516
    @return: a dictionary with the structure node->(export-list)
9517
        where export-list is a list of the instances exported on
9518
        that node.
9519

9520
    """
9521
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9522
    rpcresult = self.rpc.call_export_list(self.nodes)
9523
    result = {}
9524
    for node in rpcresult:
9525
      if rpcresult[node].fail_msg:
9526
        result[node] = False
9527
      else:
9528
        result[node] = rpcresult[node].payload
9529

    
9530
    return result
9531

    
9532

    
9533
class LUBackupPrepare(NoHooksLU):
9534
  """Prepares an instance for an export and returns useful information.
9535

9536
  """
9537
  REQ_BGL = False
9538

    
9539
  def ExpandNames(self):
9540
    self._ExpandAndLockInstance()
9541

    
9542
  def CheckPrereq(self):
9543
    """Check prerequisites.
9544

9545
    """
9546
    instance_name = self.op.instance_name
9547

    
9548
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9549
    assert self.instance is not None, \
9550
          "Cannot retrieve locked instance %s" % self.op.instance_name
9551
    _CheckNodeOnline(self, self.instance.primary_node)
9552

    
9553
    self._cds = _GetClusterDomainSecret()
9554

    
9555
  def Exec(self, feedback_fn):
9556
    """Prepares an instance for an export.
9557

9558
    """
9559
    instance = self.instance
9560

    
9561
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9562
      salt = utils.GenerateSecret(8)
9563

    
9564
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9565
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9566
                                              constants.RIE_CERT_VALIDITY)
9567
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9568

    
9569
      (name, cert_pem) = result.payload
9570

    
9571
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9572
                                             cert_pem)
9573

    
9574
      return {
9575
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9576
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9577
                          salt),
9578
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9579
        }
9580

    
9581
    return None
9582

    
9583

    
9584
class LUBackupExport(LogicalUnit):
9585
  """Export an instance to an image in the cluster.
9586

9587
  """
9588
  HPATH = "instance-export"
9589
  HTYPE = constants.HTYPE_INSTANCE
9590
  REQ_BGL = False
9591

    
9592
  def CheckArguments(self):
9593
    """Check the arguments.
9594

9595
    """
9596
    self.x509_key_name = self.op.x509_key_name
9597
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9598

    
9599
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9600
      if not self.x509_key_name:
9601
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9602
                                   errors.ECODE_INVAL)
9603

    
9604
      if not self.dest_x509_ca_pem:
9605
        raise errors.OpPrereqError("Missing destination X509 CA",
9606
                                   errors.ECODE_INVAL)
9607

    
9608
  def ExpandNames(self):
9609
    self._ExpandAndLockInstance()
9610

    
9611
    # Lock all nodes for local exports
9612
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9613
      # FIXME: lock only instance primary and destination node
9614
      #
9615
      # Sad but true, for now we have do lock all nodes, as we don't know where
9616
      # the previous export might be, and in this LU we search for it and
9617
      # remove it from its current node. In the future we could fix this by:
9618
      #  - making a tasklet to search (share-lock all), then create the
9619
      #    new one, then one to remove, after
9620
      #  - removing the removal operation altogether
9621
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9622

    
9623
  def DeclareLocks(self, level):
9624
    """Last minute lock declaration."""
9625
    # All nodes are locked anyway, so nothing to do here.
9626

    
9627
  def BuildHooksEnv(self):
9628
    """Build hooks env.
9629

9630
    This will run on the master, primary node and target node.
9631

9632
    """
9633
    env = {
9634
      "EXPORT_MODE": self.op.mode,
9635
      "EXPORT_NODE": self.op.target_node,
9636
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9637
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9638
      # TODO: Generic function for boolean env variables
9639
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9640
      }
9641

    
9642
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9643

    
9644
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9645

    
9646
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9647
      nl.append(self.op.target_node)
9648

    
9649
    return env, nl, nl
9650

    
9651
  def CheckPrereq(self):
9652
    """Check prerequisites.
9653

9654
    This checks that the instance and node names are valid.
9655

9656
    """
9657
    instance_name = self.op.instance_name
9658

    
9659
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9660
    assert self.instance is not None, \
9661
          "Cannot retrieve locked instance %s" % self.op.instance_name
9662
    _CheckNodeOnline(self, self.instance.primary_node)
9663

    
9664
    if (self.op.remove_instance and self.instance.admin_up and
9665
        not self.op.shutdown):
9666
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9667
                                 " down before")
9668

    
9669
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9670
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9671
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9672
      assert self.dst_node is not None
9673

    
9674
      _CheckNodeOnline(self, self.dst_node.name)
9675
      _CheckNodeNotDrained(self, self.dst_node.name)
9676

    
9677
      self._cds = None
9678
      self.dest_disk_info = None
9679
      self.dest_x509_ca = None
9680

    
9681
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9682
      self.dst_node = None
9683

    
9684
      if len(self.op.target_node) != len(self.instance.disks):
9685
        raise errors.OpPrereqError(("Received destination information for %s"
9686
                                    " disks, but instance %s has %s disks") %
9687
                                   (len(self.op.target_node), instance_name,
9688
                                    len(self.instance.disks)),
9689
                                   errors.ECODE_INVAL)
9690

    
9691
      cds = _GetClusterDomainSecret()
9692

    
9693
      # Check X509 key name
9694
      try:
9695
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9696
      except (TypeError, ValueError), err:
9697
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9698

    
9699
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9700
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9701
                                   errors.ECODE_INVAL)
9702

    
9703
      # Load and verify CA
9704
      try:
9705
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9706
      except OpenSSL.crypto.Error, err:
9707
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9708
                                   (err, ), errors.ECODE_INVAL)
9709

    
9710
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9711
      if errcode is not None:
9712
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9713
                                   (msg, ), errors.ECODE_INVAL)
9714

    
9715
      self.dest_x509_ca = cert
9716

    
9717
      # Verify target information
9718
      disk_info = []
9719
      for idx, disk_data in enumerate(self.op.target_node):
9720
        try:
9721
          (host, port, magic) = \
9722
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9723
        except errors.GenericError, err:
9724
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9725
                                     (idx, err), errors.ECODE_INVAL)
9726

    
9727
        disk_info.append((host, port, magic))
9728

    
9729
      assert len(disk_info) == len(self.op.target_node)
9730
      self.dest_disk_info = disk_info
9731

    
9732
    else:
9733
      raise errors.ProgrammerError("Unhandled export mode %r" %
9734
                                   self.op.mode)
9735

    
9736
    # instance disk type verification
9737
    # TODO: Implement export support for file-based disks
9738
    for disk in self.instance.disks:
9739
      if disk.dev_type == constants.LD_FILE:
9740
        raise errors.OpPrereqError("Export not supported for instances with"
9741
                                   " file-based disks", errors.ECODE_INVAL)
9742

    
9743
  def _CleanupExports(self, feedback_fn):
9744
    """Removes exports of current instance from all other nodes.
9745

9746
    If an instance in a cluster with nodes A..D was exported to node C, its
9747
    exports will be removed from the nodes A, B and D.
9748

9749
    """
9750
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
9751

    
9752
    nodelist = self.cfg.GetNodeList()
9753
    nodelist.remove(self.dst_node.name)
9754

    
9755
    # on one-node clusters nodelist will be empty after the removal
9756
    # if we proceed the backup would be removed because OpQueryExports
9757
    # substitutes an empty list with the full cluster node list.
9758
    iname = self.instance.name
9759
    if nodelist:
9760
      feedback_fn("Removing old exports for instance %s" % iname)
9761
      exportlist = self.rpc.call_export_list(nodelist)
9762
      for node in exportlist:
9763
        if exportlist[node].fail_msg:
9764
          continue
9765
        if iname in exportlist[node].payload:
9766
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9767
          if msg:
9768
            self.LogWarning("Could not remove older export for instance %s"
9769
                            " on node %s: %s", iname, node, msg)
9770

    
9771
  def Exec(self, feedback_fn):
9772
    """Export an instance to an image in the cluster.
9773

9774
    """
9775
    assert self.op.mode in constants.EXPORT_MODES
9776

    
9777
    instance = self.instance
9778
    src_node = instance.primary_node
9779

    
9780
    if self.op.shutdown:
9781
      # shutdown the instance, but not the disks
9782
      feedback_fn("Shutting down instance %s" % instance.name)
9783
      result = self.rpc.call_instance_shutdown(src_node, instance,
9784
                                               self.op.shutdown_timeout)
9785
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9786
      result.Raise("Could not shutdown instance %s on"
9787
                   " node %s" % (instance.name, src_node))
9788

    
9789
    # set the disks ID correctly since call_instance_start needs the
9790
    # correct drbd minor to create the symlinks
9791
    for disk in instance.disks:
9792
      self.cfg.SetDiskID(disk, src_node)
9793

    
9794
    activate_disks = (not instance.admin_up)
9795

    
9796
    if activate_disks:
9797
      # Activate the instance disks if we'exporting a stopped instance
9798
      feedback_fn("Activating disks for %s" % instance.name)
9799
      _StartInstanceDisks(self, instance, None)
9800

    
9801
    try:
9802
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9803
                                                     instance)
9804

    
9805
      helper.CreateSnapshots()
9806
      try:
9807
        if (self.op.shutdown and instance.admin_up and
9808
            not self.op.remove_instance):
9809
          assert not activate_disks
9810
          feedback_fn("Starting instance %s" % instance.name)
9811
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9812
          msg = result.fail_msg
9813
          if msg:
9814
            feedback_fn("Failed to start instance: %s" % msg)
9815
            _ShutdownInstanceDisks(self, instance)
9816
            raise errors.OpExecError("Could not start instance: %s" % msg)
9817

    
9818
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
9819
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9820
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9821
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9822
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9823

    
9824
          (key_name, _, _) = self.x509_key_name
9825

    
9826
          dest_ca_pem = \
9827
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9828
                                            self.dest_x509_ca)
9829

    
9830
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9831
                                                     key_name, dest_ca_pem,
9832
                                                     timeouts)
9833
      finally:
9834
        helper.Cleanup()
9835

    
9836
      # Check for backwards compatibility
9837
      assert len(dresults) == len(instance.disks)
9838
      assert compat.all(isinstance(i, bool) for i in dresults), \
9839
             "Not all results are boolean: %r" % dresults
9840

    
9841
    finally:
9842
      if activate_disks:
9843
        feedback_fn("Deactivating disks for %s" % instance.name)
9844
        _ShutdownInstanceDisks(self, instance)
9845

    
9846
    if not (compat.all(dresults) and fin_resu):
9847
      failures = []
9848
      if not fin_resu:
9849
        failures.append("export finalization")
9850
      if not compat.all(dresults):
9851
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9852
                               if not dsk)
9853
        failures.append("disk export: disk(s) %s" % fdsk)
9854

    
9855
      raise errors.OpExecError("Export failed, errors in %s" %
9856
                               utils.CommaJoin(failures))
9857

    
9858
    # At this point, the export was successful, we can cleanup/finish
9859

    
9860
    # Remove instance if requested
9861
    if self.op.remove_instance:
9862
      feedback_fn("Removing instance %s" % instance.name)
9863
      _RemoveInstance(self, feedback_fn, instance,
9864
                      self.op.ignore_remove_failures)
9865

    
9866
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9867
      self._CleanupExports(feedback_fn)
9868

    
9869
    return fin_resu, dresults
9870

    
9871

    
9872
class LURemoveExport(NoHooksLU):
9873
  """Remove exports related to the named instance.
9874

9875
  """
9876
  REQ_BGL = False
9877

    
9878
  def ExpandNames(self):
9879
    self.needed_locks = {}
9880
    # We need all nodes to be locked in order for RemoveExport to work, but we
9881
    # don't need to lock the instance itself, as nothing will happen to it (and
9882
    # we can remove exports also for a removed instance)
9883
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9884

    
9885
  def Exec(self, feedback_fn):
9886
    """Remove any export.
9887

9888
    """
9889
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9890
    # If the instance was not found we'll try with the name that was passed in.
9891
    # This will only work if it was an FQDN, though.
9892
    fqdn_warn = False
9893
    if not instance_name:
9894
      fqdn_warn = True
9895
      instance_name = self.op.instance_name
9896

    
9897
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9898
    exportlist = self.rpc.call_export_list(locked_nodes)
9899
    found = False
9900
    for node in exportlist:
9901
      msg = exportlist[node].fail_msg
9902
      if msg:
9903
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9904
        continue
9905
      if instance_name in exportlist[node].payload:
9906
        found = True
9907
        result = self.rpc.call_export_remove(node, instance_name)
9908
        msg = result.fail_msg
9909
        if msg:
9910
          logging.error("Could not remove export for instance %s"
9911
                        " on node %s: %s", instance_name, node, msg)
9912

    
9913
    if fqdn_warn and not found:
9914
      feedback_fn("Export not found. If trying to remove an export belonging"
9915
                  " to a deleted instance please use its Fully Qualified"
9916
                  " Domain Name.")
9917

    
9918

    
9919
class LUAddGroup(LogicalUnit):
9920
  """Logical unit for creating node groups.
9921

9922
  """
9923
  HPATH = "group-add"
9924
  HTYPE = constants.HTYPE_GROUP
9925
  REQ_BGL = False
9926

    
9927
  def ExpandNames(self):
9928
    # We need the new group's UUID here so that we can create and acquire the
9929
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
9930
    # that it should not check whether the UUID exists in the configuration.
9931
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
9932
    self.needed_locks = {}
9933
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
9934

    
9935
  def CheckPrereq(self):
9936
    """Check prerequisites.
9937

9938
    This checks that the given group name is not an existing node group
9939
    already.
9940

9941
    """
9942
    try:
9943
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
9944
    except errors.OpPrereqError:
9945
      pass
9946
    else:
9947
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
9948
                                 " node group (UUID: %s)" %
9949
                                 (self.op.group_name, existing_uuid),
9950
                                 errors.ECODE_EXISTS)
9951

    
9952
    if self.op.ndparams:
9953
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
9954

    
9955
  def BuildHooksEnv(self):
9956
    """Build hooks env.
9957

9958
    """
9959
    env = {
9960
      "GROUP_NAME": self.op.group_name,
9961
      }
9962
    mn = self.cfg.GetMasterNode()
9963
    return env, [mn], [mn]
9964

    
9965
  def Exec(self, feedback_fn):
9966
    """Add the node group to the cluster.
9967

9968
    """
9969
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
9970
                                  uuid=self.group_uuid,
9971
                                  alloc_policy=self.op.alloc_policy,
9972
                                  ndparams=self.op.ndparams)
9973

    
9974
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
9975
    del self.remove_locks[locking.LEVEL_NODEGROUP]
9976

    
9977

    
9978
class LUAssignGroupNodes(NoHooksLU):
9979
  """Logical unit for assigning nodes to groups.
9980

9981
  """
9982
  REQ_BGL = False
9983

    
9984
  def ExpandNames(self):
9985
    # These raise errors.OpPrereqError on their own:
9986
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
9987
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9988

    
9989
    # We want to lock all the affected nodes and groups. We have readily
9990
    # available the list of nodes, and the *destination* group. To gather the
9991
    # list of "source" groups, we need to fetch node information.
9992
    self.node_data = self.cfg.GetAllNodesInfo()
9993
    affected_groups = set(self.node_data[node].group for node in self.op.nodes)
9994
    affected_groups.add(self.group_uuid)
9995

    
9996
    self.needed_locks = {
9997
      locking.LEVEL_NODEGROUP: list(affected_groups),
9998
      locking.LEVEL_NODE: self.op.nodes,
9999
      }
10000

    
10001
  def CheckPrereq(self):
10002
    """Check prerequisites.
10003

10004
    """
10005
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10006
    instance_data = self.cfg.GetAllInstancesInfo()
10007

    
10008
    if self.group is None:
10009
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10010
                               (self.op.group_name, self.group_uuid))
10011

    
10012
    (new_splits, previous_splits) = \
10013
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10014
                                             for node in self.op.nodes],
10015
                                            self.node_data, instance_data)
10016

    
10017
    if new_splits:
10018
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10019

    
10020
      if not self.op.force:
10021
        raise errors.OpExecError("The following instances get split by this"
10022
                                 " change and --force was not given: %s" %
10023
                                 fmt_new_splits)
10024
      else:
10025
        self.LogWarning("This operation will split the following instances: %s",
10026
                        fmt_new_splits)
10027

    
10028
        if previous_splits:
10029
          self.LogWarning("In addition, these already-split instances continue"
10030
                          " to be spit across groups: %s",
10031
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
10032

    
10033
  def Exec(self, feedback_fn):
10034
    """Assign nodes to a new group.
10035

10036
    """
10037
    for node in self.op.nodes:
10038
      self.node_data[node].group = self.group_uuid
10039

    
10040
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10041

    
10042
  @staticmethod
10043
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10044
    """Check for split instances after a node assignment.
10045

10046
    This method considers a series of node assignments as an atomic operation,
10047
    and returns information about split instances after applying the set of
10048
    changes.
10049

10050
    In particular, it returns information about newly split instances, and
10051
    instances that were already split, and remain so after the change.
10052

10053
    Only instances whose disk template is listed in constants.DTS_NET_MIRROR are
10054
    considered.
10055

10056
    @type changes: list of (node_name, new_group_uuid) pairs.
10057
    @param changes: list of node assignments to consider.
10058
    @param node_data: a dict with data for all nodes
10059
    @param instance_data: a dict with all instances to consider
10060
    @rtype: a two-tuple
10061
    @return: a list of instances that were previously okay and result split as a
10062
      consequence of this change, and a list of instances that were previously
10063
      split and this change does not fix.
10064

10065
    """
10066
    changed_nodes = dict((node, group) for node, group in changes
10067
                         if node_data[node].group != group)
10068

    
10069
    all_split_instances = set()
10070
    previously_split_instances = set()
10071

    
10072
    def InstanceNodes(instance):
10073
      return [instance.primary_node] + list(instance.secondary_nodes)
10074

    
10075
    for inst in instance_data.values():
10076
      if inst.disk_template not in constants.DTS_NET_MIRROR:
10077
        continue
10078

    
10079
      instance_nodes = InstanceNodes(inst)
10080

    
10081
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
10082
        previously_split_instances.add(inst.name)
10083

    
10084
      if len(set(changed_nodes.get(node, node_data[node].group)
10085
                 for node in instance_nodes)) > 1:
10086
        all_split_instances.add(inst.name)
10087

    
10088
    return (list(all_split_instances - previously_split_instances),
10089
            list(previously_split_instances & all_split_instances))
10090

    
10091

    
10092
class _GroupQuery(_QueryBase):
10093

    
10094
  FIELDS = query.GROUP_FIELDS
10095

    
10096
  def ExpandNames(self, lu):
10097
    lu.needed_locks = {}
10098

    
10099
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10100
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10101

    
10102
    if not self.names:
10103
      self.wanted = [name_to_uuid[name]
10104
                     for name in utils.NiceSort(name_to_uuid.keys())]
10105
    else:
10106
      # Accept names to be either names or UUIDs.
10107
      missing = []
10108
      self.wanted = []
10109
      all_uuid = frozenset(self._all_groups.keys())
10110

    
10111
      for name in self.names:
10112
        if name in all_uuid:
10113
          self.wanted.append(name)
10114
        elif name in name_to_uuid:
10115
          self.wanted.append(name_to_uuid[name])
10116
        else:
10117
          missing.append(name)
10118

    
10119
      if missing:
10120
        raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10121
                                   errors.ECODE_NOENT)
10122

    
10123
  def DeclareLocks(self, lu, level):
10124
    pass
10125

    
10126
  def _GetQueryData(self, lu):
10127
    """Computes the list of node groups and their attributes.
10128

10129
    """
10130
    do_nodes = query.GQ_NODE in self.requested_data
10131
    do_instances = query.GQ_INST in self.requested_data
10132

    
10133
    group_to_nodes = None
10134
    group_to_instances = None
10135

    
10136
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10137
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10138
    # latter GetAllInstancesInfo() is not enough, for we have to go through
10139
    # instance->node. Hence, we will need to process nodes even if we only need
10140
    # instance information.
10141
    if do_nodes or do_instances:
10142
      all_nodes = lu.cfg.GetAllNodesInfo()
10143
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10144
      node_to_group = {}
10145

    
10146
      for node in all_nodes.values():
10147
        if node.group in group_to_nodes:
10148
          group_to_nodes[node.group].append(node.name)
10149
          node_to_group[node.name] = node.group
10150

    
10151
      if do_instances:
10152
        all_instances = lu.cfg.GetAllInstancesInfo()
10153
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
10154

    
10155
        for instance in all_instances.values():
10156
          node = instance.primary_node
10157
          if node in node_to_group:
10158
            group_to_instances[node_to_group[node]].append(instance.name)
10159

    
10160
        if not do_nodes:
10161
          # Do not pass on node information if it was not requested.
10162
          group_to_nodes = None
10163

    
10164
    return query.GroupQueryData([self._all_groups[uuid]
10165
                                 for uuid in self.wanted],
10166
                                group_to_nodes, group_to_instances)
10167

    
10168

    
10169
class LUQueryGroups(NoHooksLU):
10170
  """Logical unit for querying node groups.
10171

10172
  """
10173
  REQ_BGL = False
10174

    
10175
  def CheckArguments(self):
10176
    self.gq = _GroupQuery(self.op.names, self.op.output_fields, False)
10177

    
10178
  def ExpandNames(self):
10179
    self.gq.ExpandNames(self)
10180

    
10181
  def Exec(self, feedback_fn):
10182
    return self.gq.OldStyleQuery(self)
10183

    
10184

    
10185
class LUSetGroupParams(LogicalUnit):
10186
  """Modifies the parameters of a node group.
10187

10188
  """
10189
  HPATH = "group-modify"
10190
  HTYPE = constants.HTYPE_GROUP
10191
  REQ_BGL = False
10192

    
10193
  def CheckArguments(self):
10194
    all_changes = [
10195
      self.op.ndparams,
10196
      self.op.alloc_policy,
10197
      ]
10198

    
10199
    if all_changes.count(None) == len(all_changes):
10200
      raise errors.OpPrereqError("Please pass at least one modification",
10201
                                 errors.ECODE_INVAL)
10202

    
10203
  def ExpandNames(self):
10204
    # This raises errors.OpPrereqError on its own:
10205
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10206

    
10207
    self.needed_locks = {
10208
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10209
      }
10210

    
10211
  def CheckPrereq(self):
10212
    """Check prerequisites.
10213

10214
    """
10215
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10216

    
10217
    if self.group is None:
10218
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10219
                               (self.op.group_name, self.group_uuid))
10220

    
10221
    if self.op.ndparams:
10222
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10223
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10224
      self.new_ndparams = new_ndparams
10225

    
10226
  def BuildHooksEnv(self):
10227
    """Build hooks env.
10228

10229
    """
10230
    env = {
10231
      "GROUP_NAME": self.op.group_name,
10232
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
10233
      }
10234
    mn = self.cfg.GetMasterNode()
10235
    return env, [mn], [mn]
10236

    
10237
  def Exec(self, feedback_fn):
10238
    """Modifies the node group.
10239

10240
    """
10241
    result = []
10242

    
10243
    if self.op.ndparams:
10244
      self.group.ndparams = self.new_ndparams
10245
      result.append(("ndparams", str(self.group.ndparams)))
10246

    
10247
    if self.op.alloc_policy:
10248
      self.group.alloc_policy = self.op.alloc_policy
10249

    
10250
    self.cfg.Update(self.group, feedback_fn)
10251
    return result
10252

    
10253

    
10254

    
10255
class LURemoveGroup(LogicalUnit):
10256
  HPATH = "group-remove"
10257
  HTYPE = constants.HTYPE_GROUP
10258
  REQ_BGL = False
10259

    
10260
  def ExpandNames(self):
10261
    # This will raises errors.OpPrereqError on its own:
10262
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10263
    self.needed_locks = {
10264
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10265
      }
10266

    
10267
  def CheckPrereq(self):
10268
    """Check prerequisites.
10269

10270
    This checks that the given group name exists as a node group, that is
10271
    empty (i.e., contains no nodes), and that is not the last group of the
10272
    cluster.
10273

10274
    """
10275
    # Verify that the group is empty.
10276
    group_nodes = [node.name
10277
                   for node in self.cfg.GetAllNodesInfo().values()
10278
                   if node.group == self.group_uuid]
10279

    
10280
    if group_nodes:
10281
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
10282
                                 " nodes: %s" %
10283
                                 (self.op.group_name,
10284
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
10285
                                 errors.ECODE_STATE)
10286

    
10287
    # Verify the cluster would not be left group-less.
10288
    if len(self.cfg.GetNodeGroupList()) == 1:
10289
      raise errors.OpPrereqError("Group '%s' is the last group in the cluster,"
10290
                                 " which cannot be left without at least one"
10291
                                 " group" % self.op.group_name,
10292
                                 errors.ECODE_STATE)
10293

    
10294
  def BuildHooksEnv(self):
10295
    """Build hooks env.
10296

10297
    """
10298
    env = {
10299
      "GROUP_NAME": self.op.group_name,
10300
      }
10301
    mn = self.cfg.GetMasterNode()
10302
    return env, [mn], [mn]
10303

    
10304
  def Exec(self, feedback_fn):
10305
    """Remove the node group.
10306

10307
    """
10308
    try:
10309
      self.cfg.RemoveNodeGroup(self.group_uuid)
10310
    except errors.ConfigurationError:
10311
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10312
                               (self.op.group_name, self.group_uuid))
10313

    
10314
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10315

    
10316

    
10317
class LURenameGroup(LogicalUnit):
10318
  HPATH = "group-rename"
10319
  HTYPE = constants.HTYPE_GROUP
10320
  REQ_BGL = False
10321

    
10322
  def ExpandNames(self):
10323
    # This raises errors.OpPrereqError on its own:
10324
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10325

    
10326
    self.needed_locks = {
10327
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10328
      }
10329

    
10330
  def CheckPrereq(self):
10331
    """Check prerequisites.
10332

10333
    This checks that the given old_name exists as a node group, and that
10334
    new_name doesn't.
10335

10336
    """
10337
    try:
10338
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10339
    except errors.OpPrereqError:
10340
      pass
10341
    else:
10342
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10343
                                 " node group (UUID: %s)" %
10344
                                 (self.op.new_name, new_name_uuid),
10345
                                 errors.ECODE_EXISTS)
10346

    
10347
  def BuildHooksEnv(self):
10348
    """Build hooks env.
10349

10350
    """
10351
    env = {
10352
      "OLD_NAME": self.op.old_name,
10353
      "NEW_NAME": self.op.new_name,
10354
      }
10355

    
10356
    mn = self.cfg.GetMasterNode()
10357
    all_nodes = self.cfg.GetAllNodesInfo()
10358
    run_nodes = [mn]
10359
    all_nodes.pop(mn, None)
10360

    
10361
    for node in all_nodes.values():
10362
      if node.group == self.group_uuid:
10363
        run_nodes.append(node.name)
10364

    
10365
    return env, run_nodes, run_nodes
10366

    
10367
  def Exec(self, feedback_fn):
10368
    """Rename the node group.
10369

10370
    """
10371
    group = self.cfg.GetNodeGroup(self.group_uuid)
10372

    
10373
    if group is None:
10374
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10375
                               (self.op.old_name, self.group_uuid))
10376

    
10377
    group.name = self.op.new_name
10378
    self.cfg.Update(group, feedback_fn)
10379

    
10380
    return self.op.new_name
10381

    
10382

    
10383
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10384
  """Generic tags LU.
10385

10386
  This is an abstract class which is the parent of all the other tags LUs.
10387

10388
  """
10389

    
10390
  def ExpandNames(self):
10391
    self.needed_locks = {}
10392
    if self.op.kind == constants.TAG_NODE:
10393
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10394
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
10395
    elif self.op.kind == constants.TAG_INSTANCE:
10396
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10397
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10398

    
10399
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10400
    # not possible to acquire the BGL based on opcode parameters)
10401

    
10402
  def CheckPrereq(self):
10403
    """Check prerequisites.
10404

10405
    """
10406
    if self.op.kind == constants.TAG_CLUSTER:
10407
      self.target = self.cfg.GetClusterInfo()
10408
    elif self.op.kind == constants.TAG_NODE:
10409
      self.target = self.cfg.GetNodeInfo(self.op.name)
10410
    elif self.op.kind == constants.TAG_INSTANCE:
10411
      self.target = self.cfg.GetInstanceInfo(self.op.name)
10412
    else:
10413
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10414
                                 str(self.op.kind), errors.ECODE_INVAL)
10415

    
10416

    
10417
class LUGetTags(TagsLU):
10418
  """Returns the tags of a given object.
10419

10420
  """
10421
  REQ_BGL = False
10422

    
10423
  def ExpandNames(self):
10424
    TagsLU.ExpandNames(self)
10425

    
10426
    # Share locks as this is only a read operation
10427
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10428

    
10429
  def Exec(self, feedback_fn):
10430
    """Returns the tag list.
10431

10432
    """
10433
    return list(self.target.GetTags())
10434

    
10435

    
10436
class LUSearchTags(NoHooksLU):
10437
  """Searches the tags for a given pattern.
10438

10439
  """
10440
  REQ_BGL = False
10441

    
10442
  def ExpandNames(self):
10443
    self.needed_locks = {}
10444

    
10445
  def CheckPrereq(self):
10446
    """Check prerequisites.
10447

10448
    This checks the pattern passed for validity by compiling it.
10449

10450
    """
10451
    try:
10452
      self.re = re.compile(self.op.pattern)
10453
    except re.error, err:
10454
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10455
                                 (self.op.pattern, err), errors.ECODE_INVAL)
10456

    
10457
  def Exec(self, feedback_fn):
10458
    """Returns the tag list.
10459

10460
    """
10461
    cfg = self.cfg
10462
    tgts = [("/cluster", cfg.GetClusterInfo())]
10463
    ilist = cfg.GetAllInstancesInfo().values()
10464
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10465
    nlist = cfg.GetAllNodesInfo().values()
10466
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10467
    results = []
10468
    for path, target in tgts:
10469
      for tag in target.GetTags():
10470
        if self.re.search(tag):
10471
          results.append((path, tag))
10472
    return results
10473

    
10474

    
10475
class LUAddTags(TagsLU):
10476
  """Sets a tag on a given object.
10477

10478
  """
10479
  REQ_BGL = False
10480

    
10481
  def CheckPrereq(self):
10482
    """Check prerequisites.
10483

10484
    This checks the type and length of the tag name and value.
10485

10486
    """
10487
    TagsLU.CheckPrereq(self)
10488
    for tag in self.op.tags:
10489
      objects.TaggableObject.ValidateTag(tag)
10490

    
10491
  def Exec(self, feedback_fn):
10492
    """Sets the tag.
10493

10494
    """
10495
    try:
10496
      for tag in self.op.tags:
10497
        self.target.AddTag(tag)
10498
    except errors.TagError, err:
10499
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
10500
    self.cfg.Update(self.target, feedback_fn)
10501

    
10502

    
10503
class LUDelTags(TagsLU):
10504
  """Delete a list of tags from a given object.
10505

10506
  """
10507
  REQ_BGL = False
10508

    
10509
  def CheckPrereq(self):
10510
    """Check prerequisites.
10511

10512
    This checks that we have the given tag.
10513

10514
    """
10515
    TagsLU.CheckPrereq(self)
10516
    for tag in self.op.tags:
10517
      objects.TaggableObject.ValidateTag(tag)
10518
    del_tags = frozenset(self.op.tags)
10519
    cur_tags = self.target.GetTags()
10520

    
10521
    diff_tags = del_tags - cur_tags
10522
    if diff_tags:
10523
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
10524
      raise errors.OpPrereqError("Tag(s) %s not found" %
10525
                                 (utils.CommaJoin(diff_names), ),
10526
                                 errors.ECODE_NOENT)
10527

    
10528
  def Exec(self, feedback_fn):
10529
    """Remove the tag from the object.
10530

10531
    """
10532
    for tag in self.op.tags:
10533
      self.target.RemoveTag(tag)
10534
    self.cfg.Update(self.target, feedback_fn)
10535

    
10536

    
10537
class LUTestDelay(NoHooksLU):
10538
  """Sleep for a specified amount of time.
10539

10540
  This LU sleeps on the master and/or nodes for a specified amount of
10541
  time.
10542

10543
  """
10544
  REQ_BGL = False
10545

    
10546
  def ExpandNames(self):
10547
    """Expand names and set required locks.
10548

10549
    This expands the node list, if any.
10550

10551
    """
10552
    self.needed_locks = {}
10553
    if self.op.on_nodes:
10554
      # _GetWantedNodes can be used here, but is not always appropriate to use
10555
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10556
      # more information.
10557
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10558
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10559

    
10560
  def _TestDelay(self):
10561
    """Do the actual sleep.
10562

10563
    """
10564
    if self.op.on_master:
10565
      if not utils.TestDelay(self.op.duration):
10566
        raise errors.OpExecError("Error during master delay test")
10567
    if self.op.on_nodes:
10568
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10569
      for node, node_result in result.items():
10570
        node_result.Raise("Failure during rpc call to node %s" % node)
10571

    
10572
  def Exec(self, feedback_fn):
10573
    """Execute the test delay opcode, with the wanted repetitions.
10574

10575
    """
10576
    if self.op.repeat == 0:
10577
      self._TestDelay()
10578
    else:
10579
      top_value = self.op.repeat - 1
10580
      for i in range(self.op.repeat):
10581
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10582
        self._TestDelay()
10583

    
10584

    
10585
class LUTestJobqueue(NoHooksLU):
10586
  """Utility LU to test some aspects of the job queue.
10587

10588
  """
10589
  REQ_BGL = False
10590

    
10591
  # Must be lower than default timeout for WaitForJobChange to see whether it
10592
  # notices changed jobs
10593
  _CLIENT_CONNECT_TIMEOUT = 20.0
10594
  _CLIENT_CONFIRM_TIMEOUT = 60.0
10595

    
10596
  @classmethod
10597
  def _NotifyUsingSocket(cls, cb, errcls):
10598
    """Opens a Unix socket and waits for another program to connect.
10599

10600
    @type cb: callable
10601
    @param cb: Callback to send socket name to client
10602
    @type errcls: class
10603
    @param errcls: Exception class to use for errors
10604

10605
    """
10606
    # Using a temporary directory as there's no easy way to create temporary
10607
    # sockets without writing a custom loop around tempfile.mktemp and
10608
    # socket.bind
10609
    tmpdir = tempfile.mkdtemp()
10610
    try:
10611
      tmpsock = utils.PathJoin(tmpdir, "sock")
10612

    
10613
      logging.debug("Creating temporary socket at %s", tmpsock)
10614
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10615
      try:
10616
        sock.bind(tmpsock)
10617
        sock.listen(1)
10618

    
10619
        # Send details to client
10620
        cb(tmpsock)
10621

    
10622
        # Wait for client to connect before continuing
10623
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10624
        try:
10625
          (conn, _) = sock.accept()
10626
        except socket.error, err:
10627
          raise errcls("Client didn't connect in time (%s)" % err)
10628
      finally:
10629
        sock.close()
10630
    finally:
10631
      # Remove as soon as client is connected
10632
      shutil.rmtree(tmpdir)
10633

    
10634
    # Wait for client to close
10635
    try:
10636
      try:
10637
        # pylint: disable-msg=E1101
10638
        # Instance of '_socketobject' has no ... member
10639
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10640
        conn.recv(1)
10641
      except socket.error, err:
10642
        raise errcls("Client failed to confirm notification (%s)" % err)
10643
    finally:
10644
      conn.close()
10645

    
10646
  def _SendNotification(self, test, arg, sockname):
10647
    """Sends a notification to the client.
10648

10649
    @type test: string
10650
    @param test: Test name
10651
    @param arg: Test argument (depends on test)
10652
    @type sockname: string
10653
    @param sockname: Socket path
10654

10655
    """
10656
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10657

    
10658
  def _Notify(self, prereq, test, arg):
10659
    """Notifies the client of a test.
10660

10661
    @type prereq: bool
10662
    @param prereq: Whether this is a prereq-phase test
10663
    @type test: string
10664
    @param test: Test name
10665
    @param arg: Test argument (depends on test)
10666

10667
    """
10668
    if prereq:
10669
      errcls = errors.OpPrereqError
10670
    else:
10671
      errcls = errors.OpExecError
10672

    
10673
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10674
                                                  test, arg),
10675
                                   errcls)
10676

    
10677
  def CheckArguments(self):
10678
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10679
    self.expandnames_calls = 0
10680

    
10681
  def ExpandNames(self):
10682
    checkargs_calls = getattr(self, "checkargs_calls", 0)
10683
    if checkargs_calls < 1:
10684
      raise errors.ProgrammerError("CheckArguments was not called")
10685

    
10686
    self.expandnames_calls += 1
10687

    
10688
    if self.op.notify_waitlock:
10689
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
10690

    
10691
    self.LogInfo("Expanding names")
10692

    
10693
    # Get lock on master node (just to get a lock, not for a particular reason)
10694
    self.needed_locks = {
10695
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10696
      }
10697

    
10698
  def Exec(self, feedback_fn):
10699
    if self.expandnames_calls < 1:
10700
      raise errors.ProgrammerError("ExpandNames was not called")
10701

    
10702
    if self.op.notify_exec:
10703
      self._Notify(False, constants.JQT_EXEC, None)
10704

    
10705
    self.LogInfo("Executing")
10706

    
10707
    if self.op.log_messages:
10708
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10709
      for idx, msg in enumerate(self.op.log_messages):
10710
        self.LogInfo("Sending log message %s", idx + 1)
10711
        feedback_fn(constants.JQT_MSGPREFIX + msg)
10712
        # Report how many test messages have been sent
10713
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10714

    
10715
    if self.op.fail:
10716
      raise errors.OpExecError("Opcode failure was requested")
10717

    
10718
    return True
10719

    
10720

    
10721
class IAllocator(object):
10722
  """IAllocator framework.
10723

10724
  An IAllocator instance has three sets of attributes:
10725
    - cfg that is needed to query the cluster
10726
    - input data (all members of the _KEYS class attribute are required)
10727
    - four buffer attributes (in|out_data|text), that represent the
10728
      input (to the external script) in text and data structure format,
10729
      and the output from it, again in two formats
10730
    - the result variables from the script (success, info, nodes) for
10731
      easy usage
10732

10733
  """
10734
  # pylint: disable-msg=R0902
10735
  # lots of instance attributes
10736
  _ALLO_KEYS = [
10737
    "name", "mem_size", "disks", "disk_template",
10738
    "os", "tags", "nics", "vcpus", "hypervisor",
10739
    ]
10740
  _RELO_KEYS = [
10741
    "name", "relocate_from",
10742
    ]
10743
  _EVAC_KEYS = [
10744
    "evac_nodes",
10745
    ]
10746

    
10747
  def __init__(self, cfg, rpc, mode, **kwargs):
10748
    self.cfg = cfg
10749
    self.rpc = rpc
10750
    # init buffer variables
10751
    self.in_text = self.out_text = self.in_data = self.out_data = None
10752
    # init all input fields so that pylint is happy
10753
    self.mode = mode
10754
    self.mem_size = self.disks = self.disk_template = None
10755
    self.os = self.tags = self.nics = self.vcpus = None
10756
    self.hypervisor = None
10757
    self.relocate_from = None
10758
    self.name = None
10759
    self.evac_nodes = None
10760
    # computed fields
10761
    self.required_nodes = None
10762
    # init result fields
10763
    self.success = self.info = self.result = None
10764
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10765
      keyset = self._ALLO_KEYS
10766
      fn = self._AddNewInstance
10767
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10768
      keyset = self._RELO_KEYS
10769
      fn = self._AddRelocateInstance
10770
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10771
      keyset = self._EVAC_KEYS
10772
      fn = self._AddEvacuateNodes
10773
    else:
10774
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10775
                                   " IAllocator" % self.mode)
10776
    for key in kwargs:
10777
      if key not in keyset:
10778
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
10779
                                     " IAllocator" % key)
10780
      setattr(self, key, kwargs[key])
10781

    
10782
    for key in keyset:
10783
      if key not in kwargs:
10784
        raise errors.ProgrammerError("Missing input parameter '%s' to"
10785
                                     " IAllocator" % key)
10786
    self._BuildInputData(fn)
10787

    
10788
  def _ComputeClusterData(self):
10789
    """Compute the generic allocator input data.
10790

10791
    This is the data that is independent of the actual operation.
10792

10793
    """
10794
    cfg = self.cfg
10795
    cluster_info = cfg.GetClusterInfo()
10796
    # cluster data
10797
    data = {
10798
      "version": constants.IALLOCATOR_VERSION,
10799
      "cluster_name": cfg.GetClusterName(),
10800
      "cluster_tags": list(cluster_info.GetTags()),
10801
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10802
      # we don't have job IDs
10803
      }
10804
    ninfo = cfg.GetAllNodesInfo()
10805
    iinfo = cfg.GetAllInstancesInfo().values()
10806
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10807

    
10808
    # node data
10809
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
10810

    
10811
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10812
      hypervisor_name = self.hypervisor
10813
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10814
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10815
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10816
      hypervisor_name = cluster_info.enabled_hypervisors[0]
10817

    
10818
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10819
                                        hypervisor_name)
10820
    node_iinfo = \
10821
      self.rpc.call_all_instances_info(node_list,
10822
                                       cluster_info.enabled_hypervisors)
10823

    
10824
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10825

    
10826
    config_ndata = self._ComputeBasicNodeData(ninfo)
10827
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
10828
                                                 i_list, config_ndata)
10829
    assert len(data["nodes"]) == len(ninfo), \
10830
        "Incomplete node data computed"
10831

    
10832
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10833

    
10834
    self.in_data = data
10835

    
10836
  @staticmethod
10837
  def _ComputeNodeGroupData(cfg):
10838
    """Compute node groups data.
10839

10840
    """
10841
    ng = {}
10842
    for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10843
      ng[guuid] = {
10844
        "name": gdata.name,
10845
        "alloc_policy": gdata.alloc_policy,
10846
        }
10847
    return ng
10848

    
10849
  @staticmethod
10850
  def _ComputeBasicNodeData(node_cfg):
10851
    """Compute global node data.
10852

10853
    @rtype: dict
10854
    @returns: a dict of name: (node dict, node config)
10855

10856
    """
10857
    node_results = {}
10858
    for ninfo in node_cfg.values():
10859
      # fill in static (config-based) values
10860
      pnr = {
10861
        "tags": list(ninfo.GetTags()),
10862
        "primary_ip": ninfo.primary_ip,
10863
        "secondary_ip": ninfo.secondary_ip,
10864
        "offline": ninfo.offline,
10865
        "drained": ninfo.drained,
10866
        "master_candidate": ninfo.master_candidate,
10867
        "group": ninfo.group,
10868
        "master_capable": ninfo.master_capable,
10869
        "vm_capable": ninfo.vm_capable,
10870
        }
10871

    
10872
      node_results[ninfo.name] = pnr
10873

    
10874
    return node_results
10875

    
10876
  @staticmethod
10877
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
10878
                              node_results):
10879
    """Compute global node data.
10880

10881
    @param node_results: the basic node structures as filled from the config
10882

10883
    """
10884
    # make a copy of the current dict
10885
    node_results = dict(node_results)
10886
    for nname, nresult in node_data.items():
10887
      assert nname in node_results, "Missing basic data for node %s" % nname
10888
      ninfo = node_cfg[nname]
10889

    
10890
      if not (ninfo.offline or ninfo.drained):
10891
        nresult.Raise("Can't get data for node %s" % nname)
10892
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10893
                                nname)
10894
        remote_info = nresult.payload
10895

    
10896
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
10897
                     'vg_size', 'vg_free', 'cpu_total']:
10898
          if attr not in remote_info:
10899
            raise errors.OpExecError("Node '%s' didn't return attribute"
10900
                                     " '%s'" % (nname, attr))
10901
          if not isinstance(remote_info[attr], int):
10902
            raise errors.OpExecError("Node '%s' returned invalid value"
10903
                                     " for '%s': %s" %
10904
                                     (nname, attr, remote_info[attr]))
10905
        # compute memory used by primary instances
10906
        i_p_mem = i_p_up_mem = 0
10907
        for iinfo, beinfo in i_list:
10908
          if iinfo.primary_node == nname:
10909
            i_p_mem += beinfo[constants.BE_MEMORY]
10910
            if iinfo.name not in node_iinfo[nname].payload:
10911
              i_used_mem = 0
10912
            else:
10913
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10914
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10915
            remote_info['memory_free'] -= max(0, i_mem_diff)
10916

    
10917
            if iinfo.admin_up:
10918
              i_p_up_mem += beinfo[constants.BE_MEMORY]
10919

    
10920
        # compute memory used by instances
10921
        pnr_dyn = {
10922
          "total_memory": remote_info['memory_total'],
10923
          "reserved_memory": remote_info['memory_dom0'],
10924
          "free_memory": remote_info['memory_free'],
10925
          "total_disk": remote_info['vg_size'],
10926
          "free_disk": remote_info['vg_free'],
10927
          "total_cpus": remote_info['cpu_total'],
10928
          "i_pri_memory": i_p_mem,
10929
          "i_pri_up_memory": i_p_up_mem,
10930
          }
10931
        pnr_dyn.update(node_results[nname])
10932

    
10933
      node_results[nname] = pnr_dyn
10934

    
10935
    return node_results
10936

    
10937
  @staticmethod
10938
  def _ComputeInstanceData(cluster_info, i_list):
10939
    """Compute global instance data.
10940

10941
    """
10942
    instance_data = {}
10943
    for iinfo, beinfo in i_list:
10944
      nic_data = []
10945
      for nic in iinfo.nics:
10946
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10947
        nic_dict = {"mac": nic.mac,
10948
                    "ip": nic.ip,
10949
                    "mode": filled_params[constants.NIC_MODE],
10950
                    "link": filled_params[constants.NIC_LINK],
10951
                   }
10952
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10953
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10954
        nic_data.append(nic_dict)
10955
      pir = {
10956
        "tags": list(iinfo.GetTags()),
10957
        "admin_up": iinfo.admin_up,
10958
        "vcpus": beinfo[constants.BE_VCPUS],
10959
        "memory": beinfo[constants.BE_MEMORY],
10960
        "os": iinfo.os,
10961
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10962
        "nics": nic_data,
10963
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10964
        "disk_template": iinfo.disk_template,
10965
        "hypervisor": iinfo.hypervisor,
10966
        }
10967
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10968
                                                 pir["disks"])
10969
      instance_data[iinfo.name] = pir
10970

    
10971
    return instance_data
10972

    
10973
  def _AddNewInstance(self):
10974
    """Add new instance data to allocator structure.
10975

10976
    This in combination with _AllocatorGetClusterData will create the
10977
    correct structure needed as input for the allocator.
10978

10979
    The checks for the completeness of the opcode must have already been
10980
    done.
10981

10982
    """
10983
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10984

    
10985
    if self.disk_template in constants.DTS_NET_MIRROR:
10986
      self.required_nodes = 2
10987
    else:
10988
      self.required_nodes = 1
10989
    request = {
10990
      "name": self.name,
10991
      "disk_template": self.disk_template,
10992
      "tags": self.tags,
10993
      "os": self.os,
10994
      "vcpus": self.vcpus,
10995
      "memory": self.mem_size,
10996
      "disks": self.disks,
10997
      "disk_space_total": disk_space,
10998
      "nics": self.nics,
10999
      "required_nodes": self.required_nodes,
11000
      }
11001
    return request
11002

    
11003
  def _AddRelocateInstance(self):
11004
    """Add relocate instance data to allocator structure.
11005

11006
    This in combination with _IAllocatorGetClusterData will create the
11007
    correct structure needed as input for the allocator.
11008

11009
    The checks for the completeness of the opcode must have already been
11010
    done.
11011

11012
    """
11013
    instance = self.cfg.GetInstanceInfo(self.name)
11014
    if instance is None:
11015
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
11016
                                   " IAllocator" % self.name)
11017

    
11018
    if instance.disk_template not in constants.DTS_NET_MIRROR:
11019
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11020
                                 errors.ECODE_INVAL)
11021

    
11022
    if len(instance.secondary_nodes) != 1:
11023
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
11024
                                 errors.ECODE_STATE)
11025

    
11026
    self.required_nodes = 1
11027
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
11028
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11029

    
11030
    request = {
11031
      "name": self.name,
11032
      "disk_space_total": disk_space,
11033
      "required_nodes": self.required_nodes,
11034
      "relocate_from": self.relocate_from,
11035
      }
11036
    return request
11037

    
11038
  def _AddEvacuateNodes(self):
11039
    """Add evacuate nodes data to allocator structure.
11040

11041
    """
11042
    request = {
11043
      "evac_nodes": self.evac_nodes
11044
      }
11045
    return request
11046

    
11047
  def _BuildInputData(self, fn):
11048
    """Build input data structures.
11049

11050
    """
11051
    self._ComputeClusterData()
11052

    
11053
    request = fn()
11054
    request["type"] = self.mode
11055
    self.in_data["request"] = request
11056

    
11057
    self.in_text = serializer.Dump(self.in_data)
11058

    
11059
  def Run(self, name, validate=True, call_fn=None):
11060
    """Run an instance allocator and return the results.
11061

11062
    """
11063
    if call_fn is None:
11064
      call_fn = self.rpc.call_iallocator_runner
11065

    
11066
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11067
    result.Raise("Failure while running the iallocator script")
11068

    
11069
    self.out_text = result.payload
11070
    if validate:
11071
      self._ValidateResult()
11072

    
11073
  def _ValidateResult(self):
11074
    """Process the allocator results.
11075

11076
    This will process and if successful save the result in
11077
    self.out_data and the other parameters.
11078

11079
    """
11080
    try:
11081
      rdict = serializer.Load(self.out_text)
11082
    except Exception, err:
11083
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11084

    
11085
    if not isinstance(rdict, dict):
11086
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
11087

    
11088
    # TODO: remove backwards compatiblity in later versions
11089
    if "nodes" in rdict and "result" not in rdict:
11090
      rdict["result"] = rdict["nodes"]
11091
      del rdict["nodes"]
11092

    
11093
    for key in "success", "info", "result":
11094
      if key not in rdict:
11095
        raise errors.OpExecError("Can't parse iallocator results:"
11096
                                 " missing key '%s'" % key)
11097
      setattr(self, key, rdict[key])
11098

    
11099
    if not isinstance(rdict["result"], list):
11100
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11101
                               " is not a list")
11102
    self.out_data = rdict
11103

    
11104

    
11105
class LUTestAllocator(NoHooksLU):
11106
  """Run allocator tests.
11107

11108
  This LU runs the allocator tests
11109

11110
  """
11111
  def CheckPrereq(self):
11112
    """Check prerequisites.
11113

11114
    This checks the opcode parameters depending on the director and mode test.
11115

11116
    """
11117
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11118
      for attr in ["mem_size", "disks", "disk_template",
11119
                   "os", "tags", "nics", "vcpus"]:
11120
        if not hasattr(self.op, attr):
11121
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11122
                                     attr, errors.ECODE_INVAL)
11123
      iname = self.cfg.ExpandInstanceName(self.op.name)
11124
      if iname is not None:
11125
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11126
                                   iname, errors.ECODE_EXISTS)
11127
      if not isinstance(self.op.nics, list):
11128
        raise errors.OpPrereqError("Invalid parameter 'nics'",
11129
                                   errors.ECODE_INVAL)
11130
      if not isinstance(self.op.disks, list):
11131
        raise errors.OpPrereqError("Invalid parameter 'disks'",
11132
                                   errors.ECODE_INVAL)
11133
      for row in self.op.disks:
11134
        if (not isinstance(row, dict) or
11135
            "size" not in row or
11136
            not isinstance(row["size"], int) or
11137
            "mode" not in row or
11138
            row["mode"] not in ['r', 'w']):
11139
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
11140
                                     " parameter", errors.ECODE_INVAL)
11141
      if self.op.hypervisor is None:
11142
        self.op.hypervisor = self.cfg.GetHypervisorType()
11143
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11144
      fname = _ExpandInstanceName(self.cfg, self.op.name)
11145
      self.op.name = fname
11146
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11147
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11148
      if not hasattr(self.op, "evac_nodes"):
11149
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11150
                                   " opcode input", errors.ECODE_INVAL)
11151
    else:
11152
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11153
                                 self.op.mode, errors.ECODE_INVAL)
11154

    
11155
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11156
      if self.op.allocator is None:
11157
        raise errors.OpPrereqError("Missing allocator name",
11158
                                   errors.ECODE_INVAL)
11159
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11160
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
11161
                                 self.op.direction, errors.ECODE_INVAL)
11162

    
11163
  def Exec(self, feedback_fn):
11164
    """Run the allocator test.
11165

11166
    """
11167
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11168
      ial = IAllocator(self.cfg, self.rpc,
11169
                       mode=self.op.mode,
11170
                       name=self.op.name,
11171
                       mem_size=self.op.mem_size,
11172
                       disks=self.op.disks,
11173
                       disk_template=self.op.disk_template,
11174
                       os=self.op.os,
11175
                       tags=self.op.tags,
11176
                       nics=self.op.nics,
11177
                       vcpus=self.op.vcpus,
11178
                       hypervisor=self.op.hypervisor,
11179
                       )
11180
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11181
      ial = IAllocator(self.cfg, self.rpc,
11182
                       mode=self.op.mode,
11183
                       name=self.op.name,
11184
                       relocate_from=list(self.relocate_from),
11185
                       )
11186
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11187
      ial = IAllocator(self.cfg, self.rpc,
11188
                       mode=self.op.mode,
11189
                       evac_nodes=self.op.evac_nodes)
11190
    else:
11191
      raise errors.ProgrammerError("Uncatched mode %s in"
11192
                                   " LUTestAllocator.Exec", self.op.mode)
11193

    
11194
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
11195
      result = ial.in_text
11196
    else:
11197
      ial.Run(self.op.allocator, validate=False)
11198
      result = ial.out_text
11199
    return result
11200

    
11201

    
11202
#: Query type implementations
11203
_QUERY_IMPL = {
11204
  constants.QR_INSTANCE: _InstanceQuery,
11205
  constants.QR_NODE: _NodeQuery,
11206
  constants.QR_GROUP: _GroupQuery,
11207
  }
11208

    
11209

    
11210
def _GetQueryImplementation(name):
11211
  """Returns the implemtnation for a query type.
11212

11213
  @param name: Query type, must be one of L{constants.QR_OP_QUERY}
11214

11215
  """
11216
  try:
11217
    return _QUERY_IMPL[name]
11218
  except KeyError:
11219
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11220
                               errors.ECODE_INVAL)