Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ d5cafd31

History | View | Annotate | Download (418.6 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import query
58
from ganeti import qlang
59
from ganeti import opcodes
60

    
61
import ganeti.masterd.instance # pylint: disable-msg=W0611
62

    
63

    
64
def _SupportsOob(cfg, node):
65
  """Tells if node supports OOB.
66

67
  @type cfg: L{config.ConfigWriter}
68
  @param cfg: The cluster configuration
69
  @type node: L{objects.Node}
70
  @param node: The node
71
  @return: The OOB script if supported or an empty string otherwise
72

73
  """
74
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
75

    
76

    
77
class ResultWithJobs:
78
  """Data container for LU results with jobs.
79

80
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
81
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
82
  contained in the C{jobs} attribute and include the job IDs in the opcode
83
  result.
84

85
  """
86
  def __init__(self, jobs, **kwargs):
87
    """Initializes this class.
88

89
    Additional return values can be specified as keyword arguments.
90

91
    @type jobs: list of lists of L{opcode.OpCode}
92
    @param jobs: A list of lists of opcode objects
93

94
    """
95
    self.jobs = jobs
96
    self.other = kwargs
97

    
98

    
99
class LogicalUnit(object):
100
  """Logical Unit base class.
101

102
  Subclasses must follow these rules:
103
    - implement ExpandNames
104
    - implement CheckPrereq (except when tasklets are used)
105
    - implement Exec (except when tasklets are used)
106
    - implement BuildHooksEnv
107
    - implement BuildHooksNodes
108
    - redefine HPATH and HTYPE
109
    - optionally redefine their run requirements:
110
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
111

112
  Note that all commands require root permissions.
113

114
  @ivar dry_run_result: the value (if any) that will be returned to the caller
115
      in dry-run mode (signalled by opcode dry_run parameter)
116

117
  """
118
  HPATH = None
119
  HTYPE = None
120
  REQ_BGL = True
121

    
122
  def __init__(self, processor, op, context, rpc):
123
    """Constructor for LogicalUnit.
124

125
    This needs to be overridden in derived classes in order to check op
126
    validity.
127

128
    """
129
    self.proc = processor
130
    self.op = op
131
    self.cfg = context.cfg
132
    self.context = context
133
    self.rpc = rpc
134
    # Dicts used to declare locking needs to mcpu
135
    self.needed_locks = None
136
    self.acquired_locks = {}
137
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
138
    self.add_locks = {}
139
    self.remove_locks = {}
140
    # Used to force good behavior when calling helper functions
141
    self.recalculate_locks = {}
142
    self.__ssh = None
143
    # logging
144
    self.Log = processor.Log # pylint: disable-msg=C0103
145
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
146
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
147
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
148
    # support for dry-run
149
    self.dry_run_result = None
150
    # support for generic debug attribute
151
    if (not hasattr(self.op, "debug_level") or
152
        not isinstance(self.op.debug_level, int)):
153
      self.op.debug_level = 0
154

    
155
    # Tasklets
156
    self.tasklets = None
157

    
158
    # Validate opcode parameters and set defaults
159
    self.op.Validate(True)
160

    
161
    self.CheckArguments()
162

    
163
  def __GetSSH(self):
164
    """Returns the SshRunner object
165

166
    """
167
    if not self.__ssh:
168
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
169
    return self.__ssh
170

    
171
  ssh = property(fget=__GetSSH)
172

    
173
  def CheckArguments(self):
174
    """Check syntactic validity for the opcode arguments.
175

176
    This method is for doing a simple syntactic check and ensure
177
    validity of opcode parameters, without any cluster-related
178
    checks. While the same can be accomplished in ExpandNames and/or
179
    CheckPrereq, doing these separate is better because:
180

181
      - ExpandNames is left as as purely a lock-related function
182
      - CheckPrereq is run after we have acquired locks (and possible
183
        waited for them)
184

185
    The function is allowed to change the self.op attribute so that
186
    later methods can no longer worry about missing parameters.
187

188
    """
189
    pass
190

    
191
  def ExpandNames(self):
192
    """Expand names for this LU.
193

194
    This method is called before starting to execute the opcode, and it should
195
    update all the parameters of the opcode to their canonical form (e.g. a
196
    short node name must be fully expanded after this method has successfully
197
    completed). This way locking, hooks, logging, etc. can work correctly.
198

199
    LUs which implement this method must also populate the self.needed_locks
200
    member, as a dict with lock levels as keys, and a list of needed lock names
201
    as values. Rules:
202

203
      - use an empty dict if you don't need any lock
204
      - if you don't need any lock at a particular level omit that level
205
      - don't put anything for the BGL level
206
      - if you want all locks at a level use locking.ALL_SET as a value
207

208
    If you need to share locks (rather than acquire them exclusively) at one
209
    level you can modify self.share_locks, setting a true value (usually 1) for
210
    that level. By default locks are not shared.
211

212
    This function can also define a list of tasklets, which then will be
213
    executed in order instead of the usual LU-level CheckPrereq and Exec
214
    functions, if those are not defined by the LU.
215

216
    Examples::
217

218
      # Acquire all nodes and one instance
219
      self.needed_locks = {
220
        locking.LEVEL_NODE: locking.ALL_SET,
221
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
222
      }
223
      # Acquire just two nodes
224
      self.needed_locks = {
225
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
226
      }
227
      # Acquire no locks
228
      self.needed_locks = {} # No, you can't leave it to the default value None
229

230
    """
231
    # The implementation of this method is mandatory only if the new LU is
232
    # concurrent, so that old LUs don't need to be changed all at the same
233
    # time.
234
    if self.REQ_BGL:
235
      self.needed_locks = {} # Exclusive LUs don't need locks.
236
    else:
237
      raise NotImplementedError
238

    
239
  def DeclareLocks(self, level):
240
    """Declare LU locking needs for a level
241

242
    While most LUs can just declare their locking needs at ExpandNames time,
243
    sometimes there's the need to calculate some locks after having acquired
244
    the ones before. This function is called just before acquiring locks at a
245
    particular level, but after acquiring the ones at lower levels, and permits
246
    such calculations. It can be used to modify self.needed_locks, and by
247
    default it does nothing.
248

249
    This function is only called if you have something already set in
250
    self.needed_locks for the level.
251

252
    @param level: Locking level which is going to be locked
253
    @type level: member of ganeti.locking.LEVELS
254

255
    """
256

    
257
  def CheckPrereq(self):
258
    """Check prerequisites for this LU.
259

260
    This method should check that the prerequisites for the execution
261
    of this LU are fulfilled. It can do internode communication, but
262
    it should be idempotent - no cluster or system changes are
263
    allowed.
264

265
    The method should raise errors.OpPrereqError in case something is
266
    not fulfilled. Its return value is ignored.
267

268
    This method should also update all the parameters of the opcode to
269
    their canonical form if it hasn't been done by ExpandNames before.
270

271
    """
272
    if self.tasklets is not None:
273
      for (idx, tl) in enumerate(self.tasklets):
274
        logging.debug("Checking prerequisites for tasklet %s/%s",
275
                      idx + 1, len(self.tasklets))
276
        tl.CheckPrereq()
277
    else:
278
      pass
279

    
280
  def Exec(self, feedback_fn):
281
    """Execute the LU.
282

283
    This method should implement the actual work. It should raise
284
    errors.OpExecError for failures that are somewhat dealt with in
285
    code, or expected.
286

287
    """
288
    if self.tasklets is not None:
289
      for (idx, tl) in enumerate(self.tasklets):
290
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
291
        tl.Exec(feedback_fn)
292
    else:
293
      raise NotImplementedError
294

    
295
  def BuildHooksEnv(self):
296
    """Build hooks environment for this LU.
297

298
    @rtype: dict
299
    @return: Dictionary containing the environment that will be used for
300
      running the hooks for this LU. The keys of the dict must not be prefixed
301
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
302
      will extend the environment with additional variables. If no environment
303
      should be defined, an empty dictionary should be returned (not C{None}).
304
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
305
      will not be called.
306

307
    """
308
    raise NotImplementedError
309

    
310
  def BuildHooksNodes(self):
311
    """Build list of nodes to run LU's hooks.
312

313
    @rtype: tuple; (list, list)
314
    @return: Tuple containing a list of node names on which the hook
315
      should run before the execution and a list of node names on which the
316
      hook should run after the execution. No nodes should be returned as an
317
      empty list (and not None).
318
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
319
      will not be called.
320

321
    """
322
    raise NotImplementedError
323

    
324
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
325
    """Notify the LU about the results of its hooks.
326

327
    This method is called every time a hooks phase is executed, and notifies
328
    the Logical Unit about the hooks' result. The LU can then use it to alter
329
    its result based on the hooks.  By default the method does nothing and the
330
    previous result is passed back unchanged but any LU can define it if it
331
    wants to use the local cluster hook-scripts somehow.
332

333
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
334
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
335
    @param hook_results: the results of the multi-node hooks rpc call
336
    @param feedback_fn: function used send feedback back to the caller
337
    @param lu_result: the previous Exec result this LU had, or None
338
        in the PRE phase
339
    @return: the new Exec result, based on the previous result
340
        and hook results
341

342
    """
343
    # API must be kept, thus we ignore the unused argument and could
344
    # be a function warnings
345
    # pylint: disable-msg=W0613,R0201
346
    return lu_result
347

    
348
  def _ExpandAndLockInstance(self):
349
    """Helper function to expand and lock an instance.
350

351
    Many LUs that work on an instance take its name in self.op.instance_name
352
    and need to expand it and then declare the expanded name for locking. This
353
    function does it, and then updates self.op.instance_name to the expanded
354
    name. It also initializes needed_locks as a dict, if this hasn't been done
355
    before.
356

357
    """
358
    if self.needed_locks is None:
359
      self.needed_locks = {}
360
    else:
361
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
362
        "_ExpandAndLockInstance called with instance-level locks set"
363
    self.op.instance_name = _ExpandInstanceName(self.cfg,
364
                                                self.op.instance_name)
365
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
366

    
367
  def _LockInstancesNodes(self, primary_only=False):
368
    """Helper function to declare instances' nodes for locking.
369

370
    This function should be called after locking one or more instances to lock
371
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
372
    with all primary or secondary nodes for instances already locked and
373
    present in self.needed_locks[locking.LEVEL_INSTANCE].
374

375
    It should be called from DeclareLocks, and for safety only works if
376
    self.recalculate_locks[locking.LEVEL_NODE] is set.
377

378
    In the future it may grow parameters to just lock some instance's nodes, or
379
    to just lock primaries or secondary nodes, if needed.
380

381
    If should be called in DeclareLocks in a way similar to::
382

383
      if level == locking.LEVEL_NODE:
384
        self._LockInstancesNodes()
385

386
    @type primary_only: boolean
387
    @param primary_only: only lock primary nodes of locked instances
388

389
    """
390
    assert locking.LEVEL_NODE in self.recalculate_locks, \
391
      "_LockInstancesNodes helper function called with no nodes to recalculate"
392

    
393
    # TODO: check if we're really been called with the instance locks held
394

    
395
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
396
    # future we might want to have different behaviors depending on the value
397
    # of self.recalculate_locks[locking.LEVEL_NODE]
398
    wanted_nodes = []
399
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
400
      instance = self.context.cfg.GetInstanceInfo(instance_name)
401
      wanted_nodes.append(instance.primary_node)
402
      if not primary_only:
403
        wanted_nodes.extend(instance.secondary_nodes)
404

    
405
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
406
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
407
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
408
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
409

    
410
    del self.recalculate_locks[locking.LEVEL_NODE]
411

    
412

    
413
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
414
  """Simple LU which runs no hooks.
415

416
  This LU is intended as a parent for other LogicalUnits which will
417
  run no hooks, in order to reduce duplicate code.
418

419
  """
420
  HPATH = None
421
  HTYPE = None
422

    
423
  def BuildHooksEnv(self):
424
    """Empty BuildHooksEnv for NoHooksLu.
425

426
    This just raises an error.
427

428
    """
429
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
430

    
431
  def BuildHooksNodes(self):
432
    """Empty BuildHooksNodes for NoHooksLU.
433

434
    """
435
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
436

    
437

    
438
class Tasklet:
439
  """Tasklet base class.
440

441
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
442
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
443
  tasklets know nothing about locks.
444

445
  Subclasses must follow these rules:
446
    - Implement CheckPrereq
447
    - Implement Exec
448

449
  """
450
  def __init__(self, lu):
451
    self.lu = lu
452

    
453
    # Shortcuts
454
    self.cfg = lu.cfg
455
    self.rpc = lu.rpc
456

    
457
  def CheckPrereq(self):
458
    """Check prerequisites for this tasklets.
459

460
    This method should check whether the prerequisites for the execution of
461
    this tasklet are fulfilled. It can do internode communication, but it
462
    should be idempotent - no cluster or system changes are allowed.
463

464
    The method should raise errors.OpPrereqError in case something is not
465
    fulfilled. Its return value is ignored.
466

467
    This method should also update all parameters to their canonical form if it
468
    hasn't been done before.
469

470
    """
471
    pass
472

    
473
  def Exec(self, feedback_fn):
474
    """Execute the tasklet.
475

476
    This method should implement the actual work. It should raise
477
    errors.OpExecError for failures that are somewhat dealt with in code, or
478
    expected.
479

480
    """
481
    raise NotImplementedError
482

    
483

    
484
class _QueryBase:
485
  """Base for query utility classes.
486

487
  """
488
  #: Attribute holding field definitions
489
  FIELDS = None
490

    
491
  def __init__(self, filter_, fields, use_locking):
492
    """Initializes this class.
493

494
    """
495
    self.use_locking = use_locking
496

    
497
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
498
                             namefield="name")
499
    self.requested_data = self.query.RequestedData()
500
    self.names = self.query.RequestedNames()
501

    
502
    # Sort only if no names were requested
503
    self.sort_by_name = not self.names
504

    
505
    self.do_locking = None
506
    self.wanted = None
507

    
508
  def _GetNames(self, lu, all_names, lock_level):
509
    """Helper function to determine names asked for in the query.
510

511
    """
512
    if self.do_locking:
513
      names = lu.acquired_locks[lock_level]
514
    else:
515
      names = all_names
516

    
517
    if self.wanted == locking.ALL_SET:
518
      assert not self.names
519
      # caller didn't specify names, so ordering is not important
520
      return utils.NiceSort(names)
521

    
522
    # caller specified names and we must keep the same order
523
    assert self.names
524
    assert not self.do_locking or lu.acquired_locks[lock_level]
525

    
526
    missing = set(self.wanted).difference(names)
527
    if missing:
528
      raise errors.OpExecError("Some items were removed before retrieving"
529
                               " their data: %s" % missing)
530

    
531
    # Return expanded names
532
    return self.wanted
533

    
534
  def ExpandNames(self, lu):
535
    """Expand names for this query.
536

537
    See L{LogicalUnit.ExpandNames}.
538

539
    """
540
    raise NotImplementedError()
541

    
542
  def DeclareLocks(self, lu, level):
543
    """Declare locks for this query.
544

545
    See L{LogicalUnit.DeclareLocks}.
546

547
    """
548
    raise NotImplementedError()
549

    
550
  def _GetQueryData(self, lu):
551
    """Collects all data for this query.
552

553
    @return: Query data object
554

555
    """
556
    raise NotImplementedError()
557

    
558
  def NewStyleQuery(self, lu):
559
    """Collect data and execute query.
560

561
    """
562
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
563
                                  sort_by_name=self.sort_by_name)
564

    
565
  def OldStyleQuery(self, lu):
566
    """Collect data and execute query.
567

568
    """
569
    return self.query.OldStyleQuery(self._GetQueryData(lu),
570
                                    sort_by_name=self.sort_by_name)
571

    
572

    
573
def _GetWantedNodes(lu, nodes):
574
  """Returns list of checked and expanded node names.
575

576
  @type lu: L{LogicalUnit}
577
  @param lu: the logical unit on whose behalf we execute
578
  @type nodes: list
579
  @param nodes: list of node names or None for all nodes
580
  @rtype: list
581
  @return: the list of nodes, sorted
582
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
583

584
  """
585
  if nodes:
586
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
587

    
588
  return utils.NiceSort(lu.cfg.GetNodeList())
589

    
590

    
591
def _GetWantedInstances(lu, instances):
592
  """Returns list of checked and expanded instance names.
593

594
  @type lu: L{LogicalUnit}
595
  @param lu: the logical unit on whose behalf we execute
596
  @type instances: list
597
  @param instances: list of instance names or None for all instances
598
  @rtype: list
599
  @return: the list of instances, sorted
600
  @raise errors.OpPrereqError: if the instances parameter is wrong type
601
  @raise errors.OpPrereqError: if any of the passed instances is not found
602

603
  """
604
  if instances:
605
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
606
  else:
607
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
608
  return wanted
609

    
610

    
611
def _GetUpdatedParams(old_params, update_dict,
612
                      use_default=True, use_none=False):
613
  """Return the new version of a parameter dictionary.
614

615
  @type old_params: dict
616
  @param old_params: old parameters
617
  @type update_dict: dict
618
  @param update_dict: dict containing new parameter values, or
619
      constants.VALUE_DEFAULT to reset the parameter to its default
620
      value
621
  @param use_default: boolean
622
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
623
      values as 'to be deleted' values
624
  @param use_none: boolean
625
  @type use_none: whether to recognise C{None} values as 'to be
626
      deleted' values
627
  @rtype: dict
628
  @return: the new parameter dictionary
629

630
  """
631
  params_copy = copy.deepcopy(old_params)
632
  for key, val in update_dict.iteritems():
633
    if ((use_default and val == constants.VALUE_DEFAULT) or
634
        (use_none and val is None)):
635
      try:
636
        del params_copy[key]
637
      except KeyError:
638
        pass
639
    else:
640
      params_copy[key] = val
641
  return params_copy
642

    
643

    
644
def _RunPostHook(lu, node_name):
645
  """Runs the post-hook for an opcode on a single node.
646

647
  """
648
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
649
  try:
650
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
651
  except:
652
    # pylint: disable-msg=W0702
653
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
654

    
655

    
656
def _CheckOutputFields(static, dynamic, selected):
657
  """Checks whether all selected fields are valid.
658

659
  @type static: L{utils.FieldSet}
660
  @param static: static fields set
661
  @type dynamic: L{utils.FieldSet}
662
  @param dynamic: dynamic fields set
663

664
  """
665
  f = utils.FieldSet()
666
  f.Extend(static)
667
  f.Extend(dynamic)
668

    
669
  delta = f.NonMatching(selected)
670
  if delta:
671
    raise errors.OpPrereqError("Unknown output fields selected: %s"
672
                               % ",".join(delta), errors.ECODE_INVAL)
673

    
674

    
675
def _CheckGlobalHvParams(params):
676
  """Validates that given hypervisor params are not global ones.
677

678
  This will ensure that instances don't get customised versions of
679
  global params.
680

681
  """
682
  used_globals = constants.HVC_GLOBALS.intersection(params)
683
  if used_globals:
684
    msg = ("The following hypervisor parameters are global and cannot"
685
           " be customized at instance level, please modify them at"
686
           " cluster level: %s" % utils.CommaJoin(used_globals))
687
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
688

    
689

    
690
def _CheckNodeOnline(lu, node, msg=None):
691
  """Ensure that a given node is online.
692

693
  @param lu: the LU on behalf of which we make the check
694
  @param node: the node to check
695
  @param msg: if passed, should be a message to replace the default one
696
  @raise errors.OpPrereqError: if the node is offline
697

698
  """
699
  if msg is None:
700
    msg = "Can't use offline node"
701
  if lu.cfg.GetNodeInfo(node).offline:
702
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
703

    
704

    
705
def _CheckNodeNotDrained(lu, node):
706
  """Ensure that a given node is not drained.
707

708
  @param lu: the LU on behalf of which we make the check
709
  @param node: the node to check
710
  @raise errors.OpPrereqError: if the node is drained
711

712
  """
713
  if lu.cfg.GetNodeInfo(node).drained:
714
    raise errors.OpPrereqError("Can't use drained node %s" % node,
715
                               errors.ECODE_STATE)
716

    
717

    
718
def _CheckNodeVmCapable(lu, node):
719
  """Ensure that a given node is vm capable.
720

721
  @param lu: the LU on behalf of which we make the check
722
  @param node: the node to check
723
  @raise errors.OpPrereqError: if the node is not vm capable
724

725
  """
726
  if not lu.cfg.GetNodeInfo(node).vm_capable:
727
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
728
                               errors.ECODE_STATE)
729

    
730

    
731
def _CheckNodeHasOS(lu, node, os_name, force_variant):
732
  """Ensure that a node supports a given OS.
733

734
  @param lu: the LU on behalf of which we make the check
735
  @param node: the node to check
736
  @param os_name: the OS to query about
737
  @param force_variant: whether to ignore variant errors
738
  @raise errors.OpPrereqError: if the node is not supporting the OS
739

740
  """
741
  result = lu.rpc.call_os_get(node, os_name)
742
  result.Raise("OS '%s' not in supported OS list for node %s" %
743
               (os_name, node),
744
               prereq=True, ecode=errors.ECODE_INVAL)
745
  if not force_variant:
746
    _CheckOSVariant(result.payload, os_name)
747

    
748

    
749
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
750
  """Ensure that a node has the given secondary ip.
751

752
  @type lu: L{LogicalUnit}
753
  @param lu: the LU on behalf of which we make the check
754
  @type node: string
755
  @param node: the node to check
756
  @type secondary_ip: string
757
  @param secondary_ip: the ip to check
758
  @type prereq: boolean
759
  @param prereq: whether to throw a prerequisite or an execute error
760
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
761
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
762

763
  """
764
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
765
  result.Raise("Failure checking secondary ip on node %s" % node,
766
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
767
  if not result.payload:
768
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
769
           " please fix and re-run this command" % secondary_ip)
770
    if prereq:
771
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
772
    else:
773
      raise errors.OpExecError(msg)
774

    
775

    
776
def _GetClusterDomainSecret():
777
  """Reads the cluster domain secret.
778

779
  """
780
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
781
                               strict=True)
782

    
783

    
784
def _CheckInstanceDown(lu, instance, reason):
785
  """Ensure that an instance is not running."""
786
  if instance.admin_up:
787
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
788
                               (instance.name, reason), errors.ECODE_STATE)
789

    
790
  pnode = instance.primary_node
791
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
792
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
793
              prereq=True, ecode=errors.ECODE_ENVIRON)
794

    
795
  if instance.name in ins_l.payload:
796
    raise errors.OpPrereqError("Instance %s is running, %s" %
797
                               (instance.name, reason), errors.ECODE_STATE)
798

    
799

    
800
def _ExpandItemName(fn, name, kind):
801
  """Expand an item name.
802

803
  @param fn: the function to use for expansion
804
  @param name: requested item name
805
  @param kind: text description ('Node' or 'Instance')
806
  @return: the resolved (full) name
807
  @raise errors.OpPrereqError: if the item is not found
808

809
  """
810
  full_name = fn(name)
811
  if full_name is None:
812
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
813
                               errors.ECODE_NOENT)
814
  return full_name
815

    
816

    
817
def _ExpandNodeName(cfg, name):
818
  """Wrapper over L{_ExpandItemName} for nodes."""
819
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
820

    
821

    
822
def _ExpandInstanceName(cfg, name):
823
  """Wrapper over L{_ExpandItemName} for instance."""
824
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
825

    
826

    
827
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
828
                          memory, vcpus, nics, disk_template, disks,
829
                          bep, hvp, hypervisor_name):
830
  """Builds instance related env variables for hooks
831

832
  This builds the hook environment from individual variables.
833

834
  @type name: string
835
  @param name: the name of the instance
836
  @type primary_node: string
837
  @param primary_node: the name of the instance's primary node
838
  @type secondary_nodes: list
839
  @param secondary_nodes: list of secondary nodes as strings
840
  @type os_type: string
841
  @param os_type: the name of the instance's OS
842
  @type status: boolean
843
  @param status: the should_run status of the instance
844
  @type memory: string
845
  @param memory: the memory size of the instance
846
  @type vcpus: string
847
  @param vcpus: the count of VCPUs the instance has
848
  @type nics: list
849
  @param nics: list of tuples (ip, mac, mode, link) representing
850
      the NICs the instance has
851
  @type disk_template: string
852
  @param disk_template: the disk template of the instance
853
  @type disks: list
854
  @param disks: the list of (size, mode) pairs
855
  @type bep: dict
856
  @param bep: the backend parameters for the instance
857
  @type hvp: dict
858
  @param hvp: the hypervisor parameters for the instance
859
  @type hypervisor_name: string
860
  @param hypervisor_name: the hypervisor for the instance
861
  @rtype: dict
862
  @return: the hook environment for this instance
863

864
  """
865
  if status:
866
    str_status = "up"
867
  else:
868
    str_status = "down"
869
  env = {
870
    "OP_TARGET": name,
871
    "INSTANCE_NAME": name,
872
    "INSTANCE_PRIMARY": primary_node,
873
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
874
    "INSTANCE_OS_TYPE": os_type,
875
    "INSTANCE_STATUS": str_status,
876
    "INSTANCE_MEMORY": memory,
877
    "INSTANCE_VCPUS": vcpus,
878
    "INSTANCE_DISK_TEMPLATE": disk_template,
879
    "INSTANCE_HYPERVISOR": hypervisor_name,
880
  }
881

    
882
  if nics:
883
    nic_count = len(nics)
884
    for idx, (ip, mac, mode, link) in enumerate(nics):
885
      if ip is None:
886
        ip = ""
887
      env["INSTANCE_NIC%d_IP" % idx] = ip
888
      env["INSTANCE_NIC%d_MAC" % idx] = mac
889
      env["INSTANCE_NIC%d_MODE" % idx] = mode
890
      env["INSTANCE_NIC%d_LINK" % idx] = link
891
      if mode == constants.NIC_MODE_BRIDGED:
892
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
893
  else:
894
    nic_count = 0
895

    
896
  env["INSTANCE_NIC_COUNT"] = nic_count
897

    
898
  if disks:
899
    disk_count = len(disks)
900
    for idx, (size, mode) in enumerate(disks):
901
      env["INSTANCE_DISK%d_SIZE" % idx] = size
902
      env["INSTANCE_DISK%d_MODE" % idx] = mode
903
  else:
904
    disk_count = 0
905

    
906
  env["INSTANCE_DISK_COUNT"] = disk_count
907

    
908
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
909
    for key, value in source.items():
910
      env["INSTANCE_%s_%s" % (kind, key)] = value
911

    
912
  return env
913

    
914

    
915
def _NICListToTuple(lu, nics):
916
  """Build a list of nic information tuples.
917

918
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
919
  value in LUInstanceQueryData.
920

921
  @type lu:  L{LogicalUnit}
922
  @param lu: the logical unit on whose behalf we execute
923
  @type nics: list of L{objects.NIC}
924
  @param nics: list of nics to convert to hooks tuples
925

926
  """
927
  hooks_nics = []
928
  cluster = lu.cfg.GetClusterInfo()
929
  for nic in nics:
930
    ip = nic.ip
931
    mac = nic.mac
932
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
933
    mode = filled_params[constants.NIC_MODE]
934
    link = filled_params[constants.NIC_LINK]
935
    hooks_nics.append((ip, mac, mode, link))
936
  return hooks_nics
937

    
938

    
939
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
940
  """Builds instance related env variables for hooks from an object.
941

942
  @type lu: L{LogicalUnit}
943
  @param lu: the logical unit on whose behalf we execute
944
  @type instance: L{objects.Instance}
945
  @param instance: the instance for which we should build the
946
      environment
947
  @type override: dict
948
  @param override: dictionary with key/values that will override
949
      our values
950
  @rtype: dict
951
  @return: the hook environment dictionary
952

953
  """
954
  cluster = lu.cfg.GetClusterInfo()
955
  bep = cluster.FillBE(instance)
956
  hvp = cluster.FillHV(instance)
957
  args = {
958
    'name': instance.name,
959
    'primary_node': instance.primary_node,
960
    'secondary_nodes': instance.secondary_nodes,
961
    'os_type': instance.os,
962
    'status': instance.admin_up,
963
    'memory': bep[constants.BE_MEMORY],
964
    'vcpus': bep[constants.BE_VCPUS],
965
    'nics': _NICListToTuple(lu, instance.nics),
966
    'disk_template': instance.disk_template,
967
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
968
    'bep': bep,
969
    'hvp': hvp,
970
    'hypervisor_name': instance.hypervisor,
971
  }
972
  if override:
973
    args.update(override)
974
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
975

    
976

    
977
def _AdjustCandidatePool(lu, exceptions):
978
  """Adjust the candidate pool after node operations.
979

980
  """
981
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
982
  if mod_list:
983
    lu.LogInfo("Promoted nodes to master candidate role: %s",
984
               utils.CommaJoin(node.name for node in mod_list))
985
    for name in mod_list:
986
      lu.context.ReaddNode(name)
987
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
988
  if mc_now > mc_max:
989
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
990
               (mc_now, mc_max))
991

    
992

    
993
def _DecideSelfPromotion(lu, exceptions=None):
994
  """Decide whether I should promote myself as a master candidate.
995

996
  """
997
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
998
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
999
  # the new node will increase mc_max with one, so:
1000
  mc_should = min(mc_should + 1, cp_size)
1001
  return mc_now < mc_should
1002

    
1003

    
1004
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1005
  """Check that the brigdes needed by a list of nics exist.
1006

1007
  """
1008
  cluster = lu.cfg.GetClusterInfo()
1009
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1010
  brlist = [params[constants.NIC_LINK] for params in paramslist
1011
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1012
  if brlist:
1013
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1014
    result.Raise("Error checking bridges on destination node '%s'" %
1015
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1016

    
1017

    
1018
def _CheckInstanceBridgesExist(lu, instance, node=None):
1019
  """Check that the brigdes needed by an instance exist.
1020

1021
  """
1022
  if node is None:
1023
    node = instance.primary_node
1024
  _CheckNicsBridgesExist(lu, instance.nics, node)
1025

    
1026

    
1027
def _CheckOSVariant(os_obj, name):
1028
  """Check whether an OS name conforms to the os variants specification.
1029

1030
  @type os_obj: L{objects.OS}
1031
  @param os_obj: OS object to check
1032
  @type name: string
1033
  @param name: OS name passed by the user, to check for validity
1034

1035
  """
1036
  if not os_obj.supported_variants:
1037
    return
1038
  variant = objects.OS.GetVariant(name)
1039
  if not variant:
1040
    raise errors.OpPrereqError("OS name must include a variant",
1041
                               errors.ECODE_INVAL)
1042

    
1043
  if variant not in os_obj.supported_variants:
1044
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1045

    
1046

    
1047
def _GetNodeInstancesInner(cfg, fn):
1048
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1049

    
1050

    
1051
def _GetNodeInstances(cfg, node_name):
1052
  """Returns a list of all primary and secondary instances on a node.
1053

1054
  """
1055

    
1056
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1057

    
1058

    
1059
def _GetNodePrimaryInstances(cfg, node_name):
1060
  """Returns primary instances on a node.
1061

1062
  """
1063
  return _GetNodeInstancesInner(cfg,
1064
                                lambda inst: node_name == inst.primary_node)
1065

    
1066

    
1067
def _GetNodeSecondaryInstances(cfg, node_name):
1068
  """Returns secondary instances on a node.
1069

1070
  """
1071
  return _GetNodeInstancesInner(cfg,
1072
                                lambda inst: node_name in inst.secondary_nodes)
1073

    
1074

    
1075
def _GetStorageTypeArgs(cfg, storage_type):
1076
  """Returns the arguments for a storage type.
1077

1078
  """
1079
  # Special case for file storage
1080
  if storage_type == constants.ST_FILE:
1081
    # storage.FileStorage wants a list of storage directories
1082
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1083

    
1084
  return []
1085

    
1086

    
1087
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1088
  faulty = []
1089

    
1090
  for dev in instance.disks:
1091
    cfg.SetDiskID(dev, node_name)
1092

    
1093
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1094
  result.Raise("Failed to get disk status from node %s" % node_name,
1095
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1096

    
1097
  for idx, bdev_status in enumerate(result.payload):
1098
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1099
      faulty.append(idx)
1100

    
1101
  return faulty
1102

    
1103

    
1104
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1105
  """Check the sanity of iallocator and node arguments and use the
1106
  cluster-wide iallocator if appropriate.
1107

1108
  Check that at most one of (iallocator, node) is specified. If none is
1109
  specified, then the LU's opcode's iallocator slot is filled with the
1110
  cluster-wide default iallocator.
1111

1112
  @type iallocator_slot: string
1113
  @param iallocator_slot: the name of the opcode iallocator slot
1114
  @type node_slot: string
1115
  @param node_slot: the name of the opcode target node slot
1116

1117
  """
1118
  node = getattr(lu.op, node_slot, None)
1119
  iallocator = getattr(lu.op, iallocator_slot, None)
1120

    
1121
  if node is not None and iallocator is not None:
1122
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1123
                               errors.ECODE_INVAL)
1124
  elif node is None and iallocator is None:
1125
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1126
    if default_iallocator:
1127
      setattr(lu.op, iallocator_slot, default_iallocator)
1128
    else:
1129
      raise errors.OpPrereqError("No iallocator or node given and no"
1130
                                 " cluster-wide default iallocator found."
1131
                                 " Please specify either an iallocator or a"
1132
                                 " node, or set a cluster-wide default"
1133
                                 " iallocator.")
1134

    
1135

    
1136
class LUClusterPostInit(LogicalUnit):
1137
  """Logical unit for running hooks after cluster initialization.
1138

1139
  """
1140
  HPATH = "cluster-init"
1141
  HTYPE = constants.HTYPE_CLUSTER
1142

    
1143
  def BuildHooksEnv(self):
1144
    """Build hooks env.
1145

1146
    """
1147
    return {
1148
      "OP_TARGET": self.cfg.GetClusterName(),
1149
      }
1150

    
1151
  def BuildHooksNodes(self):
1152
    """Build hooks nodes.
1153

1154
    """
1155
    return ([], [self.cfg.GetMasterNode()])
1156

    
1157
  def Exec(self, feedback_fn):
1158
    """Nothing to do.
1159

1160
    """
1161
    return True
1162

    
1163

    
1164
class LUClusterDestroy(LogicalUnit):
1165
  """Logical unit for destroying the cluster.
1166

1167
  """
1168
  HPATH = "cluster-destroy"
1169
  HTYPE = constants.HTYPE_CLUSTER
1170

    
1171
  def BuildHooksEnv(self):
1172
    """Build hooks env.
1173

1174
    """
1175
    return {
1176
      "OP_TARGET": self.cfg.GetClusterName(),
1177
      }
1178

    
1179
  def BuildHooksNodes(self):
1180
    """Build hooks nodes.
1181

1182
    """
1183
    return ([], [])
1184

    
1185
  def CheckPrereq(self):
1186
    """Check prerequisites.
1187

1188
    This checks whether the cluster is empty.
1189

1190
    Any errors are signaled by raising errors.OpPrereqError.
1191

1192
    """
1193
    master = self.cfg.GetMasterNode()
1194

    
1195
    nodelist = self.cfg.GetNodeList()
1196
    if len(nodelist) != 1 or nodelist[0] != master:
1197
      raise errors.OpPrereqError("There are still %d node(s) in"
1198
                                 " this cluster." % (len(nodelist) - 1),
1199
                                 errors.ECODE_INVAL)
1200
    instancelist = self.cfg.GetInstanceList()
1201
    if instancelist:
1202
      raise errors.OpPrereqError("There are still %d instance(s) in"
1203
                                 " this cluster." % len(instancelist),
1204
                                 errors.ECODE_INVAL)
1205

    
1206
  def Exec(self, feedback_fn):
1207
    """Destroys the cluster.
1208

1209
    """
1210
    master = self.cfg.GetMasterNode()
1211

    
1212
    # Run post hooks on master node before it's removed
1213
    _RunPostHook(self, master)
1214

    
1215
    result = self.rpc.call_node_stop_master(master, False)
1216
    result.Raise("Could not disable the master role")
1217

    
1218
    return master
1219

    
1220

    
1221
def _VerifyCertificate(filename):
1222
  """Verifies a certificate for LUClusterVerify.
1223

1224
  @type filename: string
1225
  @param filename: Path to PEM file
1226

1227
  """
1228
  try:
1229
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1230
                                           utils.ReadFile(filename))
1231
  except Exception, err: # pylint: disable-msg=W0703
1232
    return (LUClusterVerify.ETYPE_ERROR,
1233
            "Failed to load X509 certificate %s: %s" % (filename, err))
1234

    
1235
  (errcode, msg) = \
1236
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1237
                                constants.SSL_CERT_EXPIRATION_ERROR)
1238

    
1239
  if msg:
1240
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1241
  else:
1242
    fnamemsg = None
1243

    
1244
  if errcode is None:
1245
    return (None, fnamemsg)
1246
  elif errcode == utils.CERT_WARNING:
1247
    return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1248
  elif errcode == utils.CERT_ERROR:
1249
    return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1250

    
1251
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1252

    
1253

    
1254
class LUClusterVerify(LogicalUnit):
1255
  """Verifies the cluster status.
1256

1257
  """
1258
  HPATH = "cluster-verify"
1259
  HTYPE = constants.HTYPE_CLUSTER
1260
  REQ_BGL = False
1261

    
1262
  TCLUSTER = "cluster"
1263
  TNODE = "node"
1264
  TINSTANCE = "instance"
1265

    
1266
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1267
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1268
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1269
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1270
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1271
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1272
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1273
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1274
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1275
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1276
  ENODEDRBD = (TNODE, "ENODEDRBD")
1277
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1278
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1279
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1280
  ENODEHV = (TNODE, "ENODEHV")
1281
  ENODELVM = (TNODE, "ENODELVM")
1282
  ENODEN1 = (TNODE, "ENODEN1")
1283
  ENODENET = (TNODE, "ENODENET")
1284
  ENODEOS = (TNODE, "ENODEOS")
1285
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1286
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1287
  ENODERPC = (TNODE, "ENODERPC")
1288
  ENODESSH = (TNODE, "ENODESSH")
1289
  ENODEVERSION = (TNODE, "ENODEVERSION")
1290
  ENODESETUP = (TNODE, "ENODESETUP")
1291
  ENODETIME = (TNODE, "ENODETIME")
1292
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1293

    
1294
  ETYPE_FIELD = "code"
1295
  ETYPE_ERROR = "ERROR"
1296
  ETYPE_WARNING = "WARNING"
1297

    
1298
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1299

    
1300
  class NodeImage(object):
1301
    """A class representing the logical and physical status of a node.
1302

1303
    @type name: string
1304
    @ivar name: the node name to which this object refers
1305
    @ivar volumes: a structure as returned from
1306
        L{ganeti.backend.GetVolumeList} (runtime)
1307
    @ivar instances: a list of running instances (runtime)
1308
    @ivar pinst: list of configured primary instances (config)
1309
    @ivar sinst: list of configured secondary instances (config)
1310
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1311
        instances for which this node is secondary (config)
1312
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1313
    @ivar dfree: free disk, as reported by the node (runtime)
1314
    @ivar offline: the offline status (config)
1315
    @type rpc_fail: boolean
1316
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1317
        not whether the individual keys were correct) (runtime)
1318
    @type lvm_fail: boolean
1319
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1320
    @type hyp_fail: boolean
1321
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1322
    @type ghost: boolean
1323
    @ivar ghost: whether this is a known node or not (config)
1324
    @type os_fail: boolean
1325
    @ivar os_fail: whether the RPC call didn't return valid OS data
1326
    @type oslist: list
1327
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1328
    @type vm_capable: boolean
1329
    @ivar vm_capable: whether the node can host instances
1330

1331
    """
1332
    def __init__(self, offline=False, name=None, vm_capable=True):
1333
      self.name = name
1334
      self.volumes = {}
1335
      self.instances = []
1336
      self.pinst = []
1337
      self.sinst = []
1338
      self.sbp = {}
1339
      self.mfree = 0
1340
      self.dfree = 0
1341
      self.offline = offline
1342
      self.vm_capable = vm_capable
1343
      self.rpc_fail = False
1344
      self.lvm_fail = False
1345
      self.hyp_fail = False
1346
      self.ghost = False
1347
      self.os_fail = False
1348
      self.oslist = {}
1349

    
1350
  def ExpandNames(self):
1351
    self.needed_locks = {
1352
      locking.LEVEL_NODE: locking.ALL_SET,
1353
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1354
    }
1355
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1356

    
1357
  def _Error(self, ecode, item, msg, *args, **kwargs):
1358
    """Format an error message.
1359

1360
    Based on the opcode's error_codes parameter, either format a
1361
    parseable error code, or a simpler error string.
1362

1363
    This must be called only from Exec and functions called from Exec.
1364

1365
    """
1366
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1367
    itype, etxt = ecode
1368
    # first complete the msg
1369
    if args:
1370
      msg = msg % args
1371
    # then format the whole message
1372
    if self.op.error_codes:
1373
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1374
    else:
1375
      if item:
1376
        item = " " + item
1377
      else:
1378
        item = ""
1379
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1380
    # and finally report it via the feedback_fn
1381
    self._feedback_fn("  - %s" % msg)
1382

    
1383
  def _ErrorIf(self, cond, *args, **kwargs):
1384
    """Log an error message if the passed condition is True.
1385

1386
    """
1387
    cond = bool(cond) or self.op.debug_simulate_errors
1388
    if cond:
1389
      self._Error(*args, **kwargs)
1390
    # do not mark the operation as failed for WARN cases only
1391
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1392
      self.bad = self.bad or cond
1393

    
1394
  def _VerifyNode(self, ninfo, nresult):
1395
    """Perform some basic validation on data returned from a node.
1396

1397
      - check the result data structure is well formed and has all the
1398
        mandatory fields
1399
      - check ganeti version
1400

1401
    @type ninfo: L{objects.Node}
1402
    @param ninfo: the node to check
1403
    @param nresult: the results from the node
1404
    @rtype: boolean
1405
    @return: whether overall this call was successful (and we can expect
1406
         reasonable values in the respose)
1407

1408
    """
1409
    node = ninfo.name
1410
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1411

    
1412
    # main result, nresult should be a non-empty dict
1413
    test = not nresult or not isinstance(nresult, dict)
1414
    _ErrorIf(test, self.ENODERPC, node,
1415
                  "unable to verify node: no data returned")
1416
    if test:
1417
      return False
1418

    
1419
    # compares ganeti version
1420
    local_version = constants.PROTOCOL_VERSION
1421
    remote_version = nresult.get("version", None)
1422
    test = not (remote_version and
1423
                isinstance(remote_version, (list, tuple)) and
1424
                len(remote_version) == 2)
1425
    _ErrorIf(test, self.ENODERPC, node,
1426
             "connection to node returned invalid data")
1427
    if test:
1428
      return False
1429

    
1430
    test = local_version != remote_version[0]
1431
    _ErrorIf(test, self.ENODEVERSION, node,
1432
             "incompatible protocol versions: master %s,"
1433
             " node %s", local_version, remote_version[0])
1434
    if test:
1435
      return False
1436

    
1437
    # node seems compatible, we can actually try to look into its results
1438

    
1439
    # full package version
1440
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1441
                  self.ENODEVERSION, node,
1442
                  "software version mismatch: master %s, node %s",
1443
                  constants.RELEASE_VERSION, remote_version[1],
1444
                  code=self.ETYPE_WARNING)
1445

    
1446
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1447
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1448
      for hv_name, hv_result in hyp_result.iteritems():
1449
        test = hv_result is not None
1450
        _ErrorIf(test, self.ENODEHV, node,
1451
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1452

    
1453
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1454
    if ninfo.vm_capable and isinstance(hvp_result, list):
1455
      for item, hv_name, hv_result in hvp_result:
1456
        _ErrorIf(True, self.ENODEHV, node,
1457
                 "hypervisor %s parameter verify failure (source %s): %s",
1458
                 hv_name, item, hv_result)
1459

    
1460
    test = nresult.get(constants.NV_NODESETUP,
1461
                           ["Missing NODESETUP results"])
1462
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1463
             "; ".join(test))
1464

    
1465
    return True
1466

    
1467
  def _VerifyNodeTime(self, ninfo, nresult,
1468
                      nvinfo_starttime, nvinfo_endtime):
1469
    """Check the node time.
1470

1471
    @type ninfo: L{objects.Node}
1472
    @param ninfo: the node to check
1473
    @param nresult: the remote results for the node
1474
    @param nvinfo_starttime: the start time of the RPC call
1475
    @param nvinfo_endtime: the end time of the RPC call
1476

1477
    """
1478
    node = ninfo.name
1479
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1480

    
1481
    ntime = nresult.get(constants.NV_TIME, None)
1482
    try:
1483
      ntime_merged = utils.MergeTime(ntime)
1484
    except (ValueError, TypeError):
1485
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1486
      return
1487

    
1488
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1489
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1490
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1491
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1492
    else:
1493
      ntime_diff = None
1494

    
1495
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1496
             "Node time diverges by at least %s from master node time",
1497
             ntime_diff)
1498

    
1499
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1500
    """Check the node time.
1501

1502
    @type ninfo: L{objects.Node}
1503
    @param ninfo: the node to check
1504
    @param nresult: the remote results for the node
1505
    @param vg_name: the configured VG name
1506

1507
    """
1508
    if vg_name is None:
1509
      return
1510

    
1511
    node = ninfo.name
1512
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1513

    
1514
    # checks vg existence and size > 20G
1515
    vglist = nresult.get(constants.NV_VGLIST, None)
1516
    test = not vglist
1517
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1518
    if not test:
1519
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1520
                                            constants.MIN_VG_SIZE)
1521
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1522

    
1523
    # check pv names
1524
    pvlist = nresult.get(constants.NV_PVLIST, None)
1525
    test = pvlist is None
1526
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1527
    if not test:
1528
      # check that ':' is not present in PV names, since it's a
1529
      # special character for lvcreate (denotes the range of PEs to
1530
      # use on the PV)
1531
      for _, pvname, owner_vg in pvlist:
1532
        test = ":" in pvname
1533
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1534
                 " '%s' of VG '%s'", pvname, owner_vg)
1535

    
1536
  def _VerifyNodeNetwork(self, ninfo, nresult):
1537
    """Check the node time.
1538

1539
    @type ninfo: L{objects.Node}
1540
    @param ninfo: the node to check
1541
    @param nresult: the remote results for the node
1542

1543
    """
1544
    node = ninfo.name
1545
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1546

    
1547
    test = constants.NV_NODELIST not in nresult
1548
    _ErrorIf(test, self.ENODESSH, node,
1549
             "node hasn't returned node ssh connectivity data")
1550
    if not test:
1551
      if nresult[constants.NV_NODELIST]:
1552
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1553
          _ErrorIf(True, self.ENODESSH, node,
1554
                   "ssh communication with node '%s': %s", a_node, a_msg)
1555

    
1556
    test = constants.NV_NODENETTEST not in nresult
1557
    _ErrorIf(test, self.ENODENET, node,
1558
             "node hasn't returned node tcp connectivity data")
1559
    if not test:
1560
      if nresult[constants.NV_NODENETTEST]:
1561
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1562
        for anode in nlist:
1563
          _ErrorIf(True, self.ENODENET, node,
1564
                   "tcp communication with node '%s': %s",
1565
                   anode, nresult[constants.NV_NODENETTEST][anode])
1566

    
1567
    test = constants.NV_MASTERIP not in nresult
1568
    _ErrorIf(test, self.ENODENET, node,
1569
             "node hasn't returned node master IP reachability data")
1570
    if not test:
1571
      if not nresult[constants.NV_MASTERIP]:
1572
        if node == self.master_node:
1573
          msg = "the master node cannot reach the master IP (not configured?)"
1574
        else:
1575
          msg = "cannot reach the master IP"
1576
        _ErrorIf(True, self.ENODENET, node, msg)
1577

    
1578
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1579
                      diskstatus):
1580
    """Verify an instance.
1581

1582
    This function checks to see if the required block devices are
1583
    available on the instance's node.
1584

1585
    """
1586
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1587
    node_current = instanceconfig.primary_node
1588

    
1589
    node_vol_should = {}
1590
    instanceconfig.MapLVsByNode(node_vol_should)
1591

    
1592
    for node in node_vol_should:
1593
      n_img = node_image[node]
1594
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1595
        # ignore missing volumes on offline or broken nodes
1596
        continue
1597
      for volume in node_vol_should[node]:
1598
        test = volume not in n_img.volumes
1599
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1600
                 "volume %s missing on node %s", volume, node)
1601

    
1602
    if instanceconfig.admin_up:
1603
      pri_img = node_image[node_current]
1604
      test = instance not in pri_img.instances and not pri_img.offline
1605
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1606
               "instance not running on its primary node %s",
1607
               node_current)
1608

    
1609
    for node, n_img in node_image.items():
1610
      if node != node_current:
1611
        test = instance in n_img.instances
1612
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1613
                 "instance should not run on node %s", node)
1614

    
1615
    diskdata = [(nname, success, status, idx)
1616
                for (nname, disks) in diskstatus.items()
1617
                for idx, (success, status) in enumerate(disks)]
1618

    
1619
    for nname, success, bdev_status, idx in diskdata:
1620
      # the 'ghost node' construction in Exec() ensures that we have a
1621
      # node here
1622
      snode = node_image[nname]
1623
      bad_snode = snode.ghost or snode.offline
1624
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1625
               self.EINSTANCEFAULTYDISK, instance,
1626
               "couldn't retrieve status for disk/%s on %s: %s",
1627
               idx, nname, bdev_status)
1628
      _ErrorIf((instanceconfig.admin_up and success and
1629
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1630
               self.EINSTANCEFAULTYDISK, instance,
1631
               "disk/%s on %s is faulty", idx, nname)
1632

    
1633
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1634
    """Verify if there are any unknown volumes in the cluster.
1635

1636
    The .os, .swap and backup volumes are ignored. All other volumes are
1637
    reported as unknown.
1638

1639
    @type reserved: L{ganeti.utils.FieldSet}
1640
    @param reserved: a FieldSet of reserved volume names
1641

1642
    """
1643
    for node, n_img in node_image.items():
1644
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1645
        # skip non-healthy nodes
1646
        continue
1647
      for volume in n_img.volumes:
1648
        test = ((node not in node_vol_should or
1649
                volume not in node_vol_should[node]) and
1650
                not reserved.Matches(volume))
1651
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1652
                      "volume %s is unknown", volume)
1653

    
1654
  def _VerifyOrphanInstances(self, instancelist, node_image):
1655
    """Verify the list of running instances.
1656

1657
    This checks what instances are running but unknown to the cluster.
1658

1659
    """
1660
    for node, n_img in node_image.items():
1661
      for o_inst in n_img.instances:
1662
        test = o_inst not in instancelist
1663
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1664
                      "instance %s on node %s should not exist", o_inst, node)
1665

    
1666
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1667
    """Verify N+1 Memory Resilience.
1668

1669
    Check that if one single node dies we can still start all the
1670
    instances it was primary for.
1671

1672
    """
1673
    cluster_info = self.cfg.GetClusterInfo()
1674
    for node, n_img in node_image.items():
1675
      # This code checks that every node which is now listed as
1676
      # secondary has enough memory to host all instances it is
1677
      # supposed to should a single other node in the cluster fail.
1678
      # FIXME: not ready for failover to an arbitrary node
1679
      # FIXME: does not support file-backed instances
1680
      # WARNING: we currently take into account down instances as well
1681
      # as up ones, considering that even if they're down someone
1682
      # might want to start them even in the event of a node failure.
1683
      if n_img.offline:
1684
        # we're skipping offline nodes from the N+1 warning, since
1685
        # most likely we don't have good memory infromation from them;
1686
        # we already list instances living on such nodes, and that's
1687
        # enough warning
1688
        continue
1689
      for prinode, instances in n_img.sbp.items():
1690
        needed_mem = 0
1691
        for instance in instances:
1692
          bep = cluster_info.FillBE(instance_cfg[instance])
1693
          if bep[constants.BE_AUTO_BALANCE]:
1694
            needed_mem += bep[constants.BE_MEMORY]
1695
        test = n_img.mfree < needed_mem
1696
        self._ErrorIf(test, self.ENODEN1, node,
1697
                      "not enough memory to accomodate instance failovers"
1698
                      " should node %s fail", prinode)
1699

    
1700
  @classmethod
1701
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1702
                   (files_all, files_all_opt, files_mc, files_vm)):
1703
    """Verifies file checksums collected from all nodes.
1704

1705
    @param errorif: Callback for reporting errors
1706
    @param nodeinfo: List of L{objects.Node} objects
1707
    @param master_node: Name of master node
1708
    @param all_nvinfo: RPC results
1709

1710
    """
1711
    node_names = frozenset(node.name for node in nodeinfo)
1712

    
1713
    assert master_node in node_names
1714
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1715
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1716
           "Found file listed in more than one file list"
1717

    
1718
    # Define functions determining which nodes to consider for a file
1719
    file2nodefn = dict([(filename, fn)
1720
      for (files, fn) in [(files_all, None),
1721
                          (files_all_opt, None),
1722
                          (files_mc, lambda node: (node.master_candidate or
1723
                                                   node.name == master_node)),
1724
                          (files_vm, lambda node: node.vm_capable)]
1725
      for filename in files])
1726

    
1727
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1728

    
1729
    for node in nodeinfo:
1730
      nresult = all_nvinfo[node.name]
1731

    
1732
      if nresult.fail_msg or not nresult.payload:
1733
        node_files = None
1734
      else:
1735
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
1736

    
1737
      test = not (node_files and isinstance(node_files, dict))
1738
      errorif(test, cls.ENODEFILECHECK, node.name,
1739
              "Node did not return file checksum data")
1740
      if test:
1741
        continue
1742

    
1743
      for (filename, checksum) in node_files.items():
1744
        # Check if the file should be considered for a node
1745
        fn = file2nodefn[filename]
1746
        if fn is None or fn(node):
1747
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
1748

    
1749
    for (filename, checksums) in fileinfo.items():
1750
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1751

    
1752
      # Nodes having the file
1753
      with_file = frozenset(node_name
1754
                            for nodes in fileinfo[filename].values()
1755
                            for node_name in nodes)
1756

    
1757
      # Nodes missing file
1758
      missing_file = node_names - with_file
1759

    
1760
      if filename in files_all_opt:
1761
        # All or no nodes
1762
        errorif(missing_file and missing_file != node_names,
1763
                cls.ECLUSTERFILECHECK, None,
1764
                "File %s is optional, but it must exist on all or no nodes (not"
1765
                " found on %s)",
1766
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
1767
      else:
1768
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
1769
                "File %s is missing from node(s) %s", filename,
1770
                utils.CommaJoin(utils.NiceSort(missing_file)))
1771

    
1772
      # See if there are multiple versions of the file
1773
      test = len(checksums) > 1
1774
      if test:
1775
        variants = ["variant %s on %s" %
1776
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
1777
                    for (idx, (checksum, nodes)) in
1778
                      enumerate(sorted(checksums.items()))]
1779
      else:
1780
        variants = []
1781

    
1782
      errorif(test, cls.ECLUSTERFILECHECK, None,
1783
              "File %s found with %s different checksums (%s)",
1784
              filename, len(checksums), "; ".join(variants))
1785

    
1786
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1787
                      drbd_map):
1788
    """Verifies and the node DRBD status.
1789

1790
    @type ninfo: L{objects.Node}
1791
    @param ninfo: the node to check
1792
    @param nresult: the remote results for the node
1793
    @param instanceinfo: the dict of instances
1794
    @param drbd_helper: the configured DRBD usermode helper
1795
    @param drbd_map: the DRBD map as returned by
1796
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1797

1798
    """
1799
    node = ninfo.name
1800
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1801

    
1802
    if drbd_helper:
1803
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1804
      test = (helper_result == None)
1805
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1806
               "no drbd usermode helper returned")
1807
      if helper_result:
1808
        status, payload = helper_result
1809
        test = not status
1810
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1811
                 "drbd usermode helper check unsuccessful: %s", payload)
1812
        test = status and (payload != drbd_helper)
1813
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1814
                 "wrong drbd usermode helper: %s", payload)
1815

    
1816
    # compute the DRBD minors
1817
    node_drbd = {}
1818
    for minor, instance in drbd_map[node].items():
1819
      test = instance not in instanceinfo
1820
      _ErrorIf(test, self.ECLUSTERCFG, None,
1821
               "ghost instance '%s' in temporary DRBD map", instance)
1822
        # ghost instance should not be running, but otherwise we
1823
        # don't give double warnings (both ghost instance and
1824
        # unallocated minor in use)
1825
      if test:
1826
        node_drbd[minor] = (instance, False)
1827
      else:
1828
        instance = instanceinfo[instance]
1829
        node_drbd[minor] = (instance.name, instance.admin_up)
1830

    
1831
    # and now check them
1832
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1833
    test = not isinstance(used_minors, (tuple, list))
1834
    _ErrorIf(test, self.ENODEDRBD, node,
1835
             "cannot parse drbd status file: %s", str(used_minors))
1836
    if test:
1837
      # we cannot check drbd status
1838
      return
1839

    
1840
    for minor, (iname, must_exist) in node_drbd.items():
1841
      test = minor not in used_minors and must_exist
1842
      _ErrorIf(test, self.ENODEDRBD, node,
1843
               "drbd minor %d of instance %s is not active", minor, iname)
1844
    for minor in used_minors:
1845
      test = minor not in node_drbd
1846
      _ErrorIf(test, self.ENODEDRBD, node,
1847
               "unallocated drbd minor %d is in use", minor)
1848

    
1849
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1850
    """Builds the node OS structures.
1851

1852
    @type ninfo: L{objects.Node}
1853
    @param ninfo: the node to check
1854
    @param nresult: the remote results for the node
1855
    @param nimg: the node image object
1856

1857
    """
1858
    node = ninfo.name
1859
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1860

    
1861
    remote_os = nresult.get(constants.NV_OSLIST, None)
1862
    test = (not isinstance(remote_os, list) or
1863
            not compat.all(isinstance(v, list) and len(v) == 7
1864
                           for v in remote_os))
1865

    
1866
    _ErrorIf(test, self.ENODEOS, node,
1867
             "node hasn't returned valid OS data")
1868

    
1869
    nimg.os_fail = test
1870

    
1871
    if test:
1872
      return
1873

    
1874
    os_dict = {}
1875

    
1876
    for (name, os_path, status, diagnose,
1877
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1878

    
1879
      if name not in os_dict:
1880
        os_dict[name] = []
1881

    
1882
      # parameters is a list of lists instead of list of tuples due to
1883
      # JSON lacking a real tuple type, fix it:
1884
      parameters = [tuple(v) for v in parameters]
1885
      os_dict[name].append((os_path, status, diagnose,
1886
                            set(variants), set(parameters), set(api_ver)))
1887

    
1888
    nimg.oslist = os_dict
1889

    
1890
  def _VerifyNodeOS(self, ninfo, nimg, base):
1891
    """Verifies the node OS list.
1892

1893
    @type ninfo: L{objects.Node}
1894
    @param ninfo: the node to check
1895
    @param nimg: the node image object
1896
    @param base: the 'template' node we match against (e.g. from the master)
1897

1898
    """
1899
    node = ninfo.name
1900
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1901

    
1902
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1903

    
1904
    for os_name, os_data in nimg.oslist.items():
1905
      assert os_data, "Empty OS status for OS %s?!" % os_name
1906
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1907
      _ErrorIf(not f_status, self.ENODEOS, node,
1908
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1909
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1910
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1911
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1912
      # this will catched in backend too
1913
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1914
               and not f_var, self.ENODEOS, node,
1915
               "OS %s with API at least %d does not declare any variant",
1916
               os_name, constants.OS_API_V15)
1917
      # comparisons with the 'base' image
1918
      test = os_name not in base.oslist
1919
      _ErrorIf(test, self.ENODEOS, node,
1920
               "Extra OS %s not present on reference node (%s)",
1921
               os_name, base.name)
1922
      if test:
1923
        continue
1924
      assert base.oslist[os_name], "Base node has empty OS status?"
1925
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1926
      if not b_status:
1927
        # base OS is invalid, skipping
1928
        continue
1929
      for kind, a, b in [("API version", f_api, b_api),
1930
                         ("variants list", f_var, b_var),
1931
                         ("parameters", f_param, b_param)]:
1932
        _ErrorIf(a != b, self.ENODEOS, node,
1933
                 "OS %s %s differs from reference node %s: %s vs. %s",
1934
                 kind, os_name, base.name,
1935
                 utils.CommaJoin(a), utils.CommaJoin(b))
1936

    
1937
    # check any missing OSes
1938
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1939
    _ErrorIf(missing, self.ENODEOS, node,
1940
             "OSes present on reference node %s but missing on this node: %s",
1941
             base.name, utils.CommaJoin(missing))
1942

    
1943
  def _VerifyOob(self, ninfo, nresult):
1944
    """Verifies out of band functionality of a node.
1945

1946
    @type ninfo: L{objects.Node}
1947
    @param ninfo: the node to check
1948
    @param nresult: the remote results for the node
1949

1950
    """
1951
    node = ninfo.name
1952
    # We just have to verify the paths on master and/or master candidates
1953
    # as the oob helper is invoked on the master
1954
    if ((ninfo.master_candidate or ninfo.master_capable) and
1955
        constants.NV_OOB_PATHS in nresult):
1956
      for path_result in nresult[constants.NV_OOB_PATHS]:
1957
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1958

    
1959
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1960
    """Verifies and updates the node volume data.
1961

1962
    This function will update a L{NodeImage}'s internal structures
1963
    with data from the remote call.
1964

1965
    @type ninfo: L{objects.Node}
1966
    @param ninfo: the node to check
1967
    @param nresult: the remote results for the node
1968
    @param nimg: the node image object
1969
    @param vg_name: the configured VG name
1970

1971
    """
1972
    node = ninfo.name
1973
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1974

    
1975
    nimg.lvm_fail = True
1976
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1977
    if vg_name is None:
1978
      pass
1979
    elif isinstance(lvdata, basestring):
1980
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1981
               utils.SafeEncode(lvdata))
1982
    elif not isinstance(lvdata, dict):
1983
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1984
    else:
1985
      nimg.volumes = lvdata
1986
      nimg.lvm_fail = False
1987

    
1988
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1989
    """Verifies and updates the node instance list.
1990

1991
    If the listing was successful, then updates this node's instance
1992
    list. Otherwise, it marks the RPC call as failed for the instance
1993
    list key.
1994

1995
    @type ninfo: L{objects.Node}
1996
    @param ninfo: the node to check
1997
    @param nresult: the remote results for the node
1998
    @param nimg: the node image object
1999

2000
    """
2001
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2002
    test = not isinstance(idata, list)
2003
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2004
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2005
    if test:
2006
      nimg.hyp_fail = True
2007
    else:
2008
      nimg.instances = idata
2009

    
2010
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2011
    """Verifies and computes a node information map
2012

2013
    @type ninfo: L{objects.Node}
2014
    @param ninfo: the node to check
2015
    @param nresult: the remote results for the node
2016
    @param nimg: the node image object
2017
    @param vg_name: the configured VG name
2018

2019
    """
2020
    node = ninfo.name
2021
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2022

    
2023
    # try to read free memory (from the hypervisor)
2024
    hv_info = nresult.get(constants.NV_HVINFO, None)
2025
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2026
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2027
    if not test:
2028
      try:
2029
        nimg.mfree = int(hv_info["memory_free"])
2030
      except (ValueError, TypeError):
2031
        _ErrorIf(True, self.ENODERPC, node,
2032
                 "node returned invalid nodeinfo, check hypervisor")
2033

    
2034
    # FIXME: devise a free space model for file based instances as well
2035
    if vg_name is not None:
2036
      test = (constants.NV_VGLIST not in nresult or
2037
              vg_name not in nresult[constants.NV_VGLIST])
2038
      _ErrorIf(test, self.ENODELVM, node,
2039
               "node didn't return data for the volume group '%s'"
2040
               " - it is either missing or broken", vg_name)
2041
      if not test:
2042
        try:
2043
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2044
        except (ValueError, TypeError):
2045
          _ErrorIf(True, self.ENODERPC, node,
2046
                   "node returned invalid LVM info, check LVM status")
2047

    
2048
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2049
    """Gets per-disk status information for all instances.
2050

2051
    @type nodelist: list of strings
2052
    @param nodelist: Node names
2053
    @type node_image: dict of (name, L{objects.Node})
2054
    @param node_image: Node objects
2055
    @type instanceinfo: dict of (name, L{objects.Instance})
2056
    @param instanceinfo: Instance objects
2057
    @rtype: {instance: {node: [(succes, payload)]}}
2058
    @return: a dictionary of per-instance dictionaries with nodes as
2059
        keys and disk information as values; the disk information is a
2060
        list of tuples (success, payload)
2061

2062
    """
2063
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2064

    
2065
    node_disks = {}
2066
    node_disks_devonly = {}
2067
    diskless_instances = set()
2068
    diskless = constants.DT_DISKLESS
2069

    
2070
    for nname in nodelist:
2071
      node_instances = list(itertools.chain(node_image[nname].pinst,
2072
                                            node_image[nname].sinst))
2073
      diskless_instances.update(inst for inst in node_instances
2074
                                if instanceinfo[inst].disk_template == diskless)
2075
      disks = [(inst, disk)
2076
               for inst in node_instances
2077
               for disk in instanceinfo[inst].disks]
2078

    
2079
      if not disks:
2080
        # No need to collect data
2081
        continue
2082

    
2083
      node_disks[nname] = disks
2084

    
2085
      # Creating copies as SetDiskID below will modify the objects and that can
2086
      # lead to incorrect data returned from nodes
2087
      devonly = [dev.Copy() for (_, dev) in disks]
2088

    
2089
      for dev in devonly:
2090
        self.cfg.SetDiskID(dev, nname)
2091

    
2092
      node_disks_devonly[nname] = devonly
2093

    
2094
    assert len(node_disks) == len(node_disks_devonly)
2095

    
2096
    # Collect data from all nodes with disks
2097
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2098
                                                          node_disks_devonly)
2099

    
2100
    assert len(result) == len(node_disks)
2101

    
2102
    instdisk = {}
2103

    
2104
    for (nname, nres) in result.items():
2105
      disks = node_disks[nname]
2106

    
2107
      if nres.offline:
2108
        # No data from this node
2109
        data = len(disks) * [(False, "node offline")]
2110
      else:
2111
        msg = nres.fail_msg
2112
        _ErrorIf(msg, self.ENODERPC, nname,
2113
                 "while getting disk information: %s", msg)
2114
        if msg:
2115
          # No data from this node
2116
          data = len(disks) * [(False, msg)]
2117
        else:
2118
          data = []
2119
          for idx, i in enumerate(nres.payload):
2120
            if isinstance(i, (tuple, list)) and len(i) == 2:
2121
              data.append(i)
2122
            else:
2123
              logging.warning("Invalid result from node %s, entry %d: %s",
2124
                              nname, idx, i)
2125
              data.append((False, "Invalid result from the remote node"))
2126

    
2127
      for ((inst, _), status) in zip(disks, data):
2128
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2129

    
2130
    # Add empty entries for diskless instances.
2131
    for inst in diskless_instances:
2132
      assert inst not in instdisk
2133
      instdisk[inst] = {}
2134

    
2135
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2136
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2137
                      compat.all(isinstance(s, (tuple, list)) and
2138
                                 len(s) == 2 for s in statuses)
2139
                      for inst, nnames in instdisk.items()
2140
                      for nname, statuses in nnames.items())
2141
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2142

    
2143
    return instdisk
2144

    
2145
  def _VerifyHVP(self, hvp_data):
2146
    """Verifies locally the syntax of the hypervisor parameters.
2147

2148
    """
2149
    for item, hv_name, hv_params in hvp_data:
2150
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2151
             (item, hv_name))
2152
      try:
2153
        hv_class = hypervisor.GetHypervisor(hv_name)
2154
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2155
        hv_class.CheckParameterSyntax(hv_params)
2156
      except errors.GenericError, err:
2157
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2158

    
2159
  def BuildHooksEnv(self):
2160
    """Build hooks env.
2161

2162
    Cluster-Verify hooks just ran in the post phase and their failure makes
2163
    the output be logged in the verify output and the verification to fail.
2164

2165
    """
2166
    cfg = self.cfg
2167

    
2168
    env = {
2169
      "CLUSTER_TAGS": " ".join(cfg.GetClusterInfo().GetTags())
2170
      }
2171

    
2172
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2173
               for node in cfg.GetAllNodesInfo().values())
2174

    
2175
    return env
2176

    
2177
  def BuildHooksNodes(self):
2178
    """Build hooks nodes.
2179

2180
    """
2181
    return ([], self.cfg.GetNodeList())
2182

    
2183
  def Exec(self, feedback_fn):
2184
    """Verify integrity of cluster, performing various test on nodes.
2185

2186
    """
2187
    # This method has too many local variables. pylint: disable-msg=R0914
2188
    self.bad = False
2189
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2190
    verbose = self.op.verbose
2191
    self._feedback_fn = feedback_fn
2192
    feedback_fn("* Verifying global settings")
2193
    for msg in self.cfg.VerifyConfig():
2194
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2195

    
2196
    # Check the cluster certificates
2197
    for cert_filename in constants.ALL_CERT_FILES:
2198
      (errcode, msg) = _VerifyCertificate(cert_filename)
2199
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2200

    
2201
    vg_name = self.cfg.GetVGName()
2202
    drbd_helper = self.cfg.GetDRBDHelper()
2203
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2204
    cluster = self.cfg.GetClusterInfo()
2205
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2206
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2207
    nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2208
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2209
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2210
                        for iname in instancelist)
2211
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2212
    i_non_redundant = [] # Non redundant instances
2213
    i_non_a_balanced = [] # Non auto-balanced instances
2214
    n_offline = 0 # Count of offline nodes
2215
    n_drained = 0 # Count of nodes being drained
2216
    node_vol_should = {}
2217

    
2218
    # FIXME: verify OS list
2219

    
2220
    # File verification
2221
    filemap = _ComputeAncillaryFiles(cluster, False)
2222

    
2223
    # do local checksums
2224
    master_node = self.master_node = self.cfg.GetMasterNode()
2225
    master_ip = self.cfg.GetMasterIP()
2226

    
2227
    # Compute the set of hypervisor parameters
2228
    hvp_data = []
2229
    for hv_name in hypervisors:
2230
      hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2231
    for os_name, os_hvp in cluster.os_hvp.items():
2232
      for hv_name, hv_params in os_hvp.items():
2233
        if not hv_params:
2234
          continue
2235
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2236
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
2237
    # TODO: collapse identical parameter values in a single one
2238
    for instance in instanceinfo.values():
2239
      if not instance.hvparams:
2240
        continue
2241
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2242
                       cluster.FillHV(instance)))
2243
    # and verify them locally
2244
    self._VerifyHVP(hvp_data)
2245

    
2246
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2247
    node_verify_param = {
2248
      constants.NV_FILELIST:
2249
        utils.UniqueSequence(filename
2250
                             for files in filemap
2251
                             for filename in files),
2252
      constants.NV_NODELIST: [node.name for node in nodeinfo
2253
                              if not node.offline],
2254
      constants.NV_HYPERVISOR: hypervisors,
2255
      constants.NV_HVPARAMS: hvp_data,
2256
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2257
                                  node.secondary_ip) for node in nodeinfo
2258
                                 if not node.offline],
2259
      constants.NV_INSTANCELIST: hypervisors,
2260
      constants.NV_VERSION: None,
2261
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2262
      constants.NV_NODESETUP: None,
2263
      constants.NV_TIME: None,
2264
      constants.NV_MASTERIP: (master_node, master_ip),
2265
      constants.NV_OSLIST: None,
2266
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2267
      }
2268

    
2269
    if vg_name is not None:
2270
      node_verify_param[constants.NV_VGLIST] = None
2271
      node_verify_param[constants.NV_LVLIST] = vg_name
2272
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2273
      node_verify_param[constants.NV_DRBDLIST] = None
2274

    
2275
    if drbd_helper:
2276
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2277

    
2278
    # Build our expected cluster state
2279
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2280
                                                 name=node.name,
2281
                                                 vm_capable=node.vm_capable))
2282
                      for node in nodeinfo)
2283

    
2284
    # Gather OOB paths
2285
    oob_paths = []
2286
    for node in nodeinfo:
2287
      path = _SupportsOob(self.cfg, node)
2288
      if path and path not in oob_paths:
2289
        oob_paths.append(path)
2290

    
2291
    if oob_paths:
2292
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2293

    
2294
    for instance in instancelist:
2295
      inst_config = instanceinfo[instance]
2296

    
2297
      for nname in inst_config.all_nodes:
2298
        if nname not in node_image:
2299
          # ghost node
2300
          gnode = self.NodeImage(name=nname)
2301
          gnode.ghost = True
2302
          node_image[nname] = gnode
2303

    
2304
      inst_config.MapLVsByNode(node_vol_should)
2305

    
2306
      pnode = inst_config.primary_node
2307
      node_image[pnode].pinst.append(instance)
2308

    
2309
      for snode in inst_config.secondary_nodes:
2310
        nimg = node_image[snode]
2311
        nimg.sinst.append(instance)
2312
        if pnode not in nimg.sbp:
2313
          nimg.sbp[pnode] = []
2314
        nimg.sbp[pnode].append(instance)
2315

    
2316
    # At this point, we have the in-memory data structures complete,
2317
    # except for the runtime information, which we'll gather next
2318

    
2319
    # Due to the way our RPC system works, exact response times cannot be
2320
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2321
    # time before and after executing the request, we can at least have a time
2322
    # window.
2323
    nvinfo_starttime = time.time()
2324
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2325
                                           self.cfg.GetClusterName())
2326
    nvinfo_endtime = time.time()
2327

    
2328
    all_drbd_map = self.cfg.ComputeDRBDMap()
2329

    
2330
    feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2331
    instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2332

    
2333
    feedback_fn("* Verifying configuration file consistency")
2334
    self._VerifyFiles(_ErrorIf, nodeinfo, master_node, all_nvinfo, filemap)
2335

    
2336
    feedback_fn("* Verifying node status")
2337

    
2338
    refos_img = None
2339

    
2340
    for node_i in nodeinfo:
2341
      node = node_i.name
2342
      nimg = node_image[node]
2343

    
2344
      if node_i.offline:
2345
        if verbose:
2346
          feedback_fn("* Skipping offline node %s" % (node,))
2347
        n_offline += 1
2348
        continue
2349

    
2350
      if node == master_node:
2351
        ntype = "master"
2352
      elif node_i.master_candidate:
2353
        ntype = "master candidate"
2354
      elif node_i.drained:
2355
        ntype = "drained"
2356
        n_drained += 1
2357
      else:
2358
        ntype = "regular"
2359
      if verbose:
2360
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2361

    
2362
      msg = all_nvinfo[node].fail_msg
2363
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2364
      if msg:
2365
        nimg.rpc_fail = True
2366
        continue
2367

    
2368
      nresult = all_nvinfo[node].payload
2369

    
2370
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2371
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2372
      self._VerifyNodeNetwork(node_i, nresult)
2373
      self._VerifyOob(node_i, nresult)
2374

    
2375
      if nimg.vm_capable:
2376
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2377
        self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2378
                             all_drbd_map)
2379

    
2380
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2381
        self._UpdateNodeInstances(node_i, nresult, nimg)
2382
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2383
        self._UpdateNodeOS(node_i, nresult, nimg)
2384
        if not nimg.os_fail:
2385
          if refos_img is None:
2386
            refos_img = nimg
2387
          self._VerifyNodeOS(node_i, nimg, refos_img)
2388

    
2389
    feedback_fn("* Verifying instance status")
2390
    for instance in instancelist:
2391
      if verbose:
2392
        feedback_fn("* Verifying instance %s" % instance)
2393
      inst_config = instanceinfo[instance]
2394
      self._VerifyInstance(instance, inst_config, node_image,
2395
                           instdisk[instance])
2396
      inst_nodes_offline = []
2397

    
2398
      pnode = inst_config.primary_node
2399
      pnode_img = node_image[pnode]
2400
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2401
               self.ENODERPC, pnode, "instance %s, connection to"
2402
               " primary node failed", instance)
2403

    
2404
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2405
               self.EINSTANCEBADNODE, instance,
2406
               "instance is marked as running and lives on offline node %s",
2407
               inst_config.primary_node)
2408

    
2409
      # If the instance is non-redundant we cannot survive losing its primary
2410
      # node, so we are not N+1 compliant. On the other hand we have no disk
2411
      # templates with more than one secondary so that situation is not well
2412
      # supported either.
2413
      # FIXME: does not support file-backed instances
2414
      if not inst_config.secondary_nodes:
2415
        i_non_redundant.append(instance)
2416

    
2417
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2418
               instance, "instance has multiple secondary nodes: %s",
2419
               utils.CommaJoin(inst_config.secondary_nodes),
2420
               code=self.ETYPE_WARNING)
2421

    
2422
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2423
        pnode = inst_config.primary_node
2424
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2425
        instance_groups = {}
2426

    
2427
        for node in instance_nodes:
2428
          instance_groups.setdefault(nodeinfo_byname[node].group,
2429
                                     []).append(node)
2430

    
2431
        pretty_list = [
2432
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2433
          # Sort so that we always list the primary node first.
2434
          for group, nodes in sorted(instance_groups.items(),
2435
                                     key=lambda (_, nodes): pnode in nodes,
2436
                                     reverse=True)]
2437

    
2438
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2439
                      instance, "instance has primary and secondary nodes in"
2440
                      " different groups: %s", utils.CommaJoin(pretty_list),
2441
                      code=self.ETYPE_WARNING)
2442

    
2443
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2444
        i_non_a_balanced.append(instance)
2445

    
2446
      for snode in inst_config.secondary_nodes:
2447
        s_img = node_image[snode]
2448
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2449
                 "instance %s, connection to secondary node failed", instance)
2450

    
2451
        if s_img.offline:
2452
          inst_nodes_offline.append(snode)
2453

    
2454
      # warn that the instance lives on offline nodes
2455
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2456
               "instance has offline secondary node(s) %s",
2457
               utils.CommaJoin(inst_nodes_offline))
2458
      # ... or ghost/non-vm_capable nodes
2459
      for node in inst_config.all_nodes:
2460
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2461
                 "instance lives on ghost node %s", node)
2462
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2463
                 instance, "instance lives on non-vm_capable node %s", node)
2464

    
2465
    feedback_fn("* Verifying orphan volumes")
2466
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2467
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2468

    
2469
    feedback_fn("* Verifying orphan instances")
2470
    self._VerifyOrphanInstances(instancelist, node_image)
2471

    
2472
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2473
      feedback_fn("* Verifying N+1 Memory redundancy")
2474
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2475

    
2476
    feedback_fn("* Other Notes")
2477
    if i_non_redundant:
2478
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2479
                  % len(i_non_redundant))
2480

    
2481
    if i_non_a_balanced:
2482
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2483
                  % len(i_non_a_balanced))
2484

    
2485
    if n_offline:
2486
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2487

    
2488
    if n_drained:
2489
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2490

    
2491
    return not self.bad
2492

    
2493
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2494
    """Analyze the post-hooks' result
2495

2496
    This method analyses the hook result, handles it, and sends some
2497
    nicely-formatted feedback back to the user.
2498

2499
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2500
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2501
    @param hooks_results: the results of the multi-node hooks rpc call
2502
    @param feedback_fn: function used send feedback back to the caller
2503
    @param lu_result: previous Exec result
2504
    @return: the new Exec result, based on the previous result
2505
        and hook results
2506

2507
    """
2508
    # We only really run POST phase hooks, and are only interested in
2509
    # their results
2510
    if phase == constants.HOOKS_PHASE_POST:
2511
      # Used to change hooks' output to proper indentation
2512
      feedback_fn("* Hooks Results")
2513
      assert hooks_results, "invalid result from hooks"
2514

    
2515
      for node_name in hooks_results:
2516
        res = hooks_results[node_name]
2517
        msg = res.fail_msg
2518
        test = msg and not res.offline
2519
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2520
                      "Communication failure in hooks execution: %s", msg)
2521
        if res.offline or msg:
2522
          # No need to investigate payload if node is offline or gave an error.
2523
          # override manually lu_result here as _ErrorIf only
2524
          # overrides self.bad
2525
          lu_result = 1
2526
          continue
2527
        for script, hkr, output in res.payload:
2528
          test = hkr == constants.HKR_FAIL
2529
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2530
                        "Script %s failed, output:", script)
2531
          if test:
2532
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2533
            feedback_fn("%s" % output)
2534
            lu_result = 0
2535

    
2536
      return lu_result
2537

    
2538

    
2539
class LUClusterVerifyDisks(NoHooksLU):
2540
  """Verifies the cluster disks status.
2541

2542
  """
2543
  REQ_BGL = False
2544

    
2545
  def ExpandNames(self):
2546
    self.needed_locks = {
2547
      locking.LEVEL_NODE: locking.ALL_SET,
2548
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2549
    }
2550
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2551

    
2552
  def Exec(self, feedback_fn):
2553
    """Verify integrity of cluster disks.
2554

2555
    @rtype: tuple of three items
2556
    @return: a tuple of (dict of node-to-node_error, list of instances
2557
        which need activate-disks, dict of instance: (node, volume) for
2558
        missing volumes
2559

2560
    """
2561
    result = res_nodes, res_instances, res_missing = {}, [], {}
2562

    
2563
    nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2564
    instances = self.cfg.GetAllInstancesInfo().values()
2565

    
2566
    nv_dict = {}
2567
    for inst in instances:
2568
      inst_lvs = {}
2569
      if not inst.admin_up:
2570
        continue
2571
      inst.MapLVsByNode(inst_lvs)
2572
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2573
      for node, vol_list in inst_lvs.iteritems():
2574
        for vol in vol_list:
2575
          nv_dict[(node, vol)] = inst
2576

    
2577
    if not nv_dict:
2578
      return result
2579

    
2580
    node_lvs = self.rpc.call_lv_list(nodes, [])
2581
    for node, node_res in node_lvs.items():
2582
      if node_res.offline:
2583
        continue
2584
      msg = node_res.fail_msg
2585
      if msg:
2586
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2587
        res_nodes[node] = msg
2588
        continue
2589

    
2590
      lvs = node_res.payload
2591
      for lv_name, (_, _, lv_online) in lvs.items():
2592
        inst = nv_dict.pop((node, lv_name), None)
2593
        if (not lv_online and inst is not None
2594
            and inst.name not in res_instances):
2595
          res_instances.append(inst.name)
2596

    
2597
    # any leftover items in nv_dict are missing LVs, let's arrange the
2598
    # data better
2599
    for key, inst in nv_dict.iteritems():
2600
      if inst.name not in res_missing:
2601
        res_missing[inst.name] = []
2602
      res_missing[inst.name].append(key)
2603

    
2604
    return result
2605

    
2606

    
2607
class LUClusterRepairDiskSizes(NoHooksLU):
2608
  """Verifies the cluster disks sizes.
2609

2610
  """
2611
  REQ_BGL = False
2612

    
2613
  def ExpandNames(self):
2614
    if self.op.instances:
2615
      self.wanted_names = []
2616
      for name in self.op.instances:
2617
        full_name = _ExpandInstanceName(self.cfg, name)
2618
        self.wanted_names.append(full_name)
2619
      self.needed_locks = {
2620
        locking.LEVEL_NODE: [],
2621
        locking.LEVEL_INSTANCE: self.wanted_names,
2622
        }
2623
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2624
    else:
2625
      self.wanted_names = None
2626
      self.needed_locks = {
2627
        locking.LEVEL_NODE: locking.ALL_SET,
2628
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2629
        }
2630
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2631

    
2632
  def DeclareLocks(self, level):
2633
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2634
      self._LockInstancesNodes(primary_only=True)
2635

    
2636
  def CheckPrereq(self):
2637
    """Check prerequisites.
2638

2639
    This only checks the optional instance list against the existing names.
2640

2641
    """
2642
    if self.wanted_names is None:
2643
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2644

    
2645
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2646
                             in self.wanted_names]
2647

    
2648
  def _EnsureChildSizes(self, disk):
2649
    """Ensure children of the disk have the needed disk size.
2650

2651
    This is valid mainly for DRBD8 and fixes an issue where the
2652
    children have smaller disk size.
2653

2654
    @param disk: an L{ganeti.objects.Disk} object
2655

2656
    """
2657
    if disk.dev_type == constants.LD_DRBD8:
2658
      assert disk.children, "Empty children for DRBD8?"
2659
      fchild = disk.children[0]
2660
      mismatch = fchild.size < disk.size
2661
      if mismatch:
2662
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2663
                     fchild.size, disk.size)
2664
        fchild.size = disk.size
2665

    
2666
      # and we recurse on this child only, not on the metadev
2667
      return self._EnsureChildSizes(fchild) or mismatch
2668
    else:
2669
      return False
2670

    
2671
  def Exec(self, feedback_fn):
2672
    """Verify the size of cluster disks.
2673

2674
    """
2675
    # TODO: check child disks too
2676
    # TODO: check differences in size between primary/secondary nodes
2677
    per_node_disks = {}
2678
    for instance in self.wanted_instances:
2679
      pnode = instance.primary_node
2680
      if pnode not in per_node_disks:
2681
        per_node_disks[pnode] = []
2682
      for idx, disk in enumerate(instance.disks):
2683
        per_node_disks[pnode].append((instance, idx, disk))
2684

    
2685
    changed = []
2686
    for node, dskl in per_node_disks.items():
2687
      newl = [v[2].Copy() for v in dskl]
2688
      for dsk in newl:
2689
        self.cfg.SetDiskID(dsk, node)
2690
      result = self.rpc.call_blockdev_getsize(node, newl)
2691
      if result.fail_msg:
2692
        self.LogWarning("Failure in blockdev_getsize call to node"
2693
                        " %s, ignoring", node)
2694
        continue
2695
      if len(result.payload) != len(dskl):
2696
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
2697
                        " result.payload=%s", node, len(dskl), result.payload)
2698
        self.LogWarning("Invalid result from node %s, ignoring node results",
2699
                        node)
2700
        continue
2701
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
2702
        if size is None:
2703
          self.LogWarning("Disk %d of instance %s did not return size"
2704
                          " information, ignoring", idx, instance.name)
2705
          continue
2706
        if not isinstance(size, (int, long)):
2707
          self.LogWarning("Disk %d of instance %s did not return valid"
2708
                          " size information, ignoring", idx, instance.name)
2709
          continue
2710
        size = size >> 20
2711
        if size != disk.size:
2712
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2713
                       " correcting: recorded %d, actual %d", idx,
2714
                       instance.name, disk.size, size)
2715
          disk.size = size
2716
          self.cfg.Update(instance, feedback_fn)
2717
          changed.append((instance.name, idx, size))
2718
        if self._EnsureChildSizes(disk):
2719
          self.cfg.Update(instance, feedback_fn)
2720
          changed.append((instance.name, idx, disk.size))
2721
    return changed
2722

    
2723

    
2724
class LUClusterRename(LogicalUnit):
2725
  """Rename the cluster.
2726

2727
  """
2728
  HPATH = "cluster-rename"
2729
  HTYPE = constants.HTYPE_CLUSTER
2730

    
2731
  def BuildHooksEnv(self):
2732
    """Build hooks env.
2733

2734
    """
2735
    return {
2736
      "OP_TARGET": self.cfg.GetClusterName(),
2737
      "NEW_NAME": self.op.name,
2738
      }
2739

    
2740
  def BuildHooksNodes(self):
2741
    """Build hooks nodes.
2742

2743
    """
2744
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2745

    
2746
  def CheckPrereq(self):
2747
    """Verify that the passed name is a valid one.
2748

2749
    """
2750
    hostname = netutils.GetHostname(name=self.op.name,
2751
                                    family=self.cfg.GetPrimaryIPFamily())
2752

    
2753
    new_name = hostname.name
2754
    self.ip = new_ip = hostname.ip
2755
    old_name = self.cfg.GetClusterName()
2756
    old_ip = self.cfg.GetMasterIP()
2757
    if new_name == old_name and new_ip == old_ip:
2758
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2759
                                 " cluster has changed",
2760
                                 errors.ECODE_INVAL)
2761
    if new_ip != old_ip:
2762
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2763
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2764
                                   " reachable on the network" %
2765
                                   new_ip, errors.ECODE_NOTUNIQUE)
2766

    
2767
    self.op.name = new_name
2768

    
2769
  def Exec(self, feedback_fn):
2770
    """Rename the cluster.
2771

2772
    """
2773
    clustername = self.op.name
2774
    ip = self.ip
2775

    
2776
    # shutdown the master IP
2777
    master = self.cfg.GetMasterNode()
2778
    result = self.rpc.call_node_stop_master(master, False)
2779
    result.Raise("Could not disable the master role")
2780

    
2781
    try:
2782
      cluster = self.cfg.GetClusterInfo()
2783
      cluster.cluster_name = clustername
2784
      cluster.master_ip = ip
2785
      self.cfg.Update(cluster, feedback_fn)
2786

    
2787
      # update the known hosts file
2788
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2789
      node_list = self.cfg.GetOnlineNodeList()
2790
      try:
2791
        node_list.remove(master)
2792
      except ValueError:
2793
        pass
2794
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2795
    finally:
2796
      result = self.rpc.call_node_start_master(master, False, False)
2797
      msg = result.fail_msg
2798
      if msg:
2799
        self.LogWarning("Could not re-enable the master role on"
2800
                        " the master, please restart manually: %s", msg)
2801

    
2802
    return clustername
2803

    
2804

    
2805
class LUClusterSetParams(LogicalUnit):
2806
  """Change the parameters of the cluster.
2807

2808
  """
2809
  HPATH = "cluster-modify"
2810
  HTYPE = constants.HTYPE_CLUSTER
2811
  REQ_BGL = False
2812

    
2813
  def CheckArguments(self):
2814
    """Check parameters
2815

2816
    """
2817
    if self.op.uid_pool:
2818
      uidpool.CheckUidPool(self.op.uid_pool)
2819

    
2820
    if self.op.add_uids:
2821
      uidpool.CheckUidPool(self.op.add_uids)
2822

    
2823
    if self.op.remove_uids:
2824
      uidpool.CheckUidPool(self.op.remove_uids)
2825

    
2826
  def ExpandNames(self):
2827
    # FIXME: in the future maybe other cluster params won't require checking on
2828
    # all nodes to be modified.
2829
    self.needed_locks = {
2830
      locking.LEVEL_NODE: locking.ALL_SET,
2831
    }
2832
    self.share_locks[locking.LEVEL_NODE] = 1
2833

    
2834
  def BuildHooksEnv(self):
2835
    """Build hooks env.
2836

2837
    """
2838
    return {
2839
      "OP_TARGET": self.cfg.GetClusterName(),
2840
      "NEW_VG_NAME": self.op.vg_name,
2841
      }
2842

    
2843
  def BuildHooksNodes(self):
2844
    """Build hooks nodes.
2845

2846
    """
2847
    mn = self.cfg.GetMasterNode()
2848
    return ([mn], [mn])
2849

    
2850
  def CheckPrereq(self):
2851
    """Check prerequisites.
2852

2853
    This checks whether the given params don't conflict and
2854
    if the given volume group is valid.
2855

2856
    """
2857
    if self.op.vg_name is not None and not self.op.vg_name:
2858
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2859
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2860
                                   " instances exist", errors.ECODE_INVAL)
2861

    
2862
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2863
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2864
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2865
                                   " drbd-based instances exist",
2866
                                   errors.ECODE_INVAL)
2867

    
2868
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2869

    
2870
    # if vg_name not None, checks given volume group on all nodes
2871
    if self.op.vg_name:
2872
      vglist = self.rpc.call_vg_list(node_list)
2873
      for node in node_list:
2874
        msg = vglist[node].fail_msg
2875
        if msg:
2876
          # ignoring down node
2877
          self.LogWarning("Error while gathering data on node %s"
2878
                          " (ignoring node): %s", node, msg)
2879
          continue
2880
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2881
                                              self.op.vg_name,
2882
                                              constants.MIN_VG_SIZE)
2883
        if vgstatus:
2884
          raise errors.OpPrereqError("Error on node '%s': %s" %
2885
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2886

    
2887
    if self.op.drbd_helper:
2888
      # checks given drbd helper on all nodes
2889
      helpers = self.rpc.call_drbd_helper(node_list)
2890
      for node in node_list:
2891
        ninfo = self.cfg.GetNodeInfo(node)
2892
        if ninfo.offline:
2893
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2894
          continue
2895
        msg = helpers[node].fail_msg
2896
        if msg:
2897
          raise errors.OpPrereqError("Error checking drbd helper on node"
2898
                                     " '%s': %s" % (node, msg),
2899
                                     errors.ECODE_ENVIRON)
2900
        node_helper = helpers[node].payload
2901
        if node_helper != self.op.drbd_helper:
2902
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2903
                                     (node, node_helper), errors.ECODE_ENVIRON)
2904

    
2905
    self.cluster = cluster = self.cfg.GetClusterInfo()
2906
    # validate params changes
2907
    if self.op.beparams:
2908
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2909
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2910

    
2911
    if self.op.ndparams:
2912
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2913
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2914

    
2915
    if self.op.nicparams:
2916
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2917
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2918
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2919
      nic_errors = []
2920

    
2921
      # check all instances for consistency
2922
      for instance in self.cfg.GetAllInstancesInfo().values():
2923
        for nic_idx, nic in enumerate(instance.nics):
2924
          params_copy = copy.deepcopy(nic.nicparams)
2925
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2926

    
2927
          # check parameter syntax
2928
          try:
2929
            objects.NIC.CheckParameterSyntax(params_filled)
2930
          except errors.ConfigurationError, err:
2931
            nic_errors.append("Instance %s, nic/%d: %s" %
2932
                              (instance.name, nic_idx, err))
2933

    
2934
          # if we're moving instances to routed, check that they have an ip
2935
          target_mode = params_filled[constants.NIC_MODE]
2936
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2937
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2938
                              (instance.name, nic_idx))
2939
      if nic_errors:
2940
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2941
                                   "\n".join(nic_errors))
2942

    
2943
    # hypervisor list/parameters
2944
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2945
    if self.op.hvparams:
2946
      for hv_name, hv_dict in self.op.hvparams.items():
2947
        if hv_name not in self.new_hvparams:
2948
          self.new_hvparams[hv_name] = hv_dict
2949
        else:
2950
          self.new_hvparams[hv_name].update(hv_dict)
2951

    
2952
    # os hypervisor parameters
2953
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2954
    if self.op.os_hvp:
2955
      for os_name, hvs in self.op.os_hvp.items():
2956
        if os_name not in self.new_os_hvp:
2957
          self.new_os_hvp[os_name] = hvs
2958
        else:
2959
          for hv_name, hv_dict in hvs.items():
2960
            if hv_name not in self.new_os_hvp[os_name]:
2961
              self.new_os_hvp[os_name][hv_name] = hv_dict
2962
            else:
2963
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2964

    
2965
    # os parameters
2966
    self.new_osp = objects.FillDict(cluster.osparams, {})
2967
    if self.op.osparams:
2968
      for os_name, osp in self.op.osparams.items():
2969
        if os_name not in self.new_osp:
2970
          self.new_osp[os_name] = {}
2971

    
2972
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2973
                                                  use_none=True)
2974

    
2975
        if not self.new_osp[os_name]:
2976
          # we removed all parameters
2977
          del self.new_osp[os_name]
2978
        else:
2979
          # check the parameter validity (remote check)
2980
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2981
                         os_name, self.new_osp[os_name])
2982

    
2983
    # changes to the hypervisor list
2984
    if self.op.enabled_hypervisors is not None:
2985
      self.hv_list = self.op.enabled_hypervisors
2986
      for hv in self.hv_list:
2987
        # if the hypervisor doesn't already exist in the cluster
2988
        # hvparams, we initialize it to empty, and then (in both
2989
        # cases) we make sure to fill the defaults, as we might not
2990
        # have a complete defaults list if the hypervisor wasn't
2991
        # enabled before
2992
        if hv not in new_hvp:
2993
          new_hvp[hv] = {}
2994
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2995
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2996
    else:
2997
      self.hv_list = cluster.enabled_hypervisors
2998

    
2999
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3000
      # either the enabled list has changed, or the parameters have, validate
3001
      for hv_name, hv_params in self.new_hvparams.items():
3002
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3003
            (self.op.enabled_hypervisors and
3004
             hv_name in self.op.enabled_hypervisors)):
3005
          # either this is a new hypervisor, or its parameters have changed
3006
          hv_class = hypervisor.GetHypervisor(hv_name)
3007
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3008
          hv_class.CheckParameterSyntax(hv_params)
3009
          _CheckHVParams(self, node_list, hv_name, hv_params)
3010

    
3011
    if self.op.os_hvp:
3012
      # no need to check any newly-enabled hypervisors, since the
3013
      # defaults have already been checked in the above code-block
3014
      for os_name, os_hvp in self.new_os_hvp.items():
3015
        for hv_name, hv_params in os_hvp.items():
3016
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3017
          # we need to fill in the new os_hvp on top of the actual hv_p
3018
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3019
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3020
          hv_class = hypervisor.GetHypervisor(hv_name)
3021
          hv_class.CheckParameterSyntax(new_osp)
3022
          _CheckHVParams(self, node_list, hv_name, new_osp)
3023

    
3024
    if self.op.default_iallocator:
3025
      alloc_script = utils.FindFile(self.op.default_iallocator,
3026
                                    constants.IALLOCATOR_SEARCH_PATH,
3027
                                    os.path.isfile)
3028
      if alloc_script is None:
3029
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3030
                                   " specified" % self.op.default_iallocator,
3031
                                   errors.ECODE_INVAL)
3032

    
3033
  def Exec(self, feedback_fn):
3034
    """Change the parameters of the cluster.
3035

3036
    """
3037
    if self.op.vg_name is not None:
3038
      new_volume = self.op.vg_name
3039
      if not new_volume:
3040
        new_volume = None
3041
      if new_volume != self.cfg.GetVGName():
3042
        self.cfg.SetVGName(new_volume)
3043
      else:
3044
        feedback_fn("Cluster LVM configuration already in desired"
3045
                    " state, not changing")
3046
    if self.op.drbd_helper is not None:
3047
      new_helper = self.op.drbd_helper
3048
      if not new_helper:
3049
        new_helper = None
3050
      if new_helper != self.cfg.GetDRBDHelper():
3051
        self.cfg.SetDRBDHelper(new_helper)
3052
      else:
3053
        feedback_fn("Cluster DRBD helper already in desired state,"
3054
                    " not changing")
3055
    if self.op.hvparams:
3056
      self.cluster.hvparams = self.new_hvparams
3057
    if self.op.os_hvp:
3058
      self.cluster.os_hvp = self.new_os_hvp
3059
    if self.op.enabled_hypervisors is not None:
3060
      self.cluster.hvparams = self.new_hvparams
3061
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3062
    if self.op.beparams:
3063
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3064
    if self.op.nicparams:
3065
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3066
    if self.op.osparams:
3067
      self.cluster.osparams = self.new_osp
3068
    if self.op.ndparams:
3069
      self.cluster.ndparams = self.new_ndparams
3070

    
3071
    if self.op.candidate_pool_size is not None:
3072
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3073
      # we need to update the pool size here, otherwise the save will fail
3074
      _AdjustCandidatePool(self, [])
3075

    
3076
    if self.op.maintain_node_health is not None:
3077
      self.cluster.maintain_node_health = self.op.maintain_node_health
3078

    
3079
    if self.op.prealloc_wipe_disks is not None:
3080
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3081

    
3082
    if self.op.add_uids is not None:
3083
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3084

    
3085
    if self.op.remove_uids is not None:
3086
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3087

    
3088
    if self.op.uid_pool is not None:
3089
      self.cluster.uid_pool = self.op.uid_pool
3090

    
3091
    if self.op.default_iallocator is not None:
3092
      self.cluster.default_iallocator = self.op.default_iallocator
3093

    
3094
    if self.op.reserved_lvs is not None:
3095
      self.cluster.reserved_lvs = self.op.reserved_lvs
3096

    
3097
    def helper_os(aname, mods, desc):
3098
      desc += " OS list"
3099
      lst = getattr(self.cluster, aname)
3100
      for key, val in mods:
3101
        if key == constants.DDM_ADD:
3102
          if val in lst:
3103
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3104
          else:
3105
            lst.append(val)
3106
        elif key == constants.DDM_REMOVE:
3107
          if val in lst:
3108
            lst.remove(val)
3109
          else:
3110
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3111
        else:
3112
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3113

    
3114
    if self.op.hidden_os:
3115
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3116

    
3117
    if self.op.blacklisted_os:
3118
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3119

    
3120
    if self.op.master_netdev:
3121
      master = self.cfg.GetMasterNode()
3122
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3123
                  self.cluster.master_netdev)
3124
      result = self.rpc.call_node_stop_master(master, False)
3125
      result.Raise("Could not disable the master ip")
3126
      feedback_fn("Changing master_netdev from %s to %s" %
3127
                  (self.cluster.master_netdev, self.op.master_netdev))
3128
      self.cluster.master_netdev = self.op.master_netdev
3129

    
3130
    self.cfg.Update(self.cluster, feedback_fn)
3131

    
3132
    if self.op.master_netdev:
3133
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3134
                  self.op.master_netdev)
3135
      result = self.rpc.call_node_start_master(master, False, False)
3136
      if result.fail_msg:
3137
        self.LogWarning("Could not re-enable the master ip on"
3138
                        " the master, please restart manually: %s",
3139
                        result.fail_msg)
3140

    
3141

    
3142
def _UploadHelper(lu, nodes, fname):
3143
  """Helper for uploading a file and showing warnings.
3144

3145
  """
3146
  if os.path.exists(fname):
3147
    result = lu.rpc.call_upload_file(nodes, fname)
3148
    for to_node, to_result in result.items():
3149
      msg = to_result.fail_msg
3150
      if msg:
3151
        msg = ("Copy of file %s to node %s failed: %s" %
3152
               (fname, to_node, msg))
3153
        lu.proc.LogWarning(msg)
3154

    
3155

    
3156
def _ComputeAncillaryFiles(cluster, redist):
3157
  """Compute files external to Ganeti which need to be consistent.
3158

3159
  @type redist: boolean
3160
  @param redist: Whether to include files which need to be redistributed
3161

3162
  """
3163
  # Compute files for all nodes
3164
  files_all = set([
3165
    constants.SSH_KNOWN_HOSTS_FILE,
3166
    constants.CONFD_HMAC_KEY,
3167
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3168
    ])
3169

    
3170
  if not redist:
3171
    files_all.update(constants.ALL_CERT_FILES)
3172
    files_all.update(ssconf.SimpleStore().GetFileList())
3173

    
3174
  if cluster.modify_etc_hosts:
3175
    files_all.add(constants.ETC_HOSTS)
3176

    
3177
  # Files which must either exist on all nodes or on none
3178
  files_all_opt = set([
3179
    constants.RAPI_USERS_FILE,
3180
    ])
3181

    
3182
  # Files which should only be on master candidates
3183
  files_mc = set()
3184
  if not redist:
3185
    files_mc.add(constants.CLUSTER_CONF_FILE)
3186

    
3187
  # Files which should only be on VM-capable nodes
3188
  files_vm = set(filename
3189
    for hv_name in cluster.enabled_hypervisors
3190
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3191

    
3192
  # Filenames must be unique
3193
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3194
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3195
         "Found file listed in more than one file list"
3196

    
3197
  return (files_all, files_all_opt, files_mc, files_vm)
3198

    
3199

    
3200
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3201
  """Distribute additional files which are part of the cluster configuration.
3202

3203
  ConfigWriter takes care of distributing the config and ssconf files, but
3204
  there are more files which should be distributed to all nodes. This function
3205
  makes sure those are copied.
3206

3207
  @param lu: calling logical unit
3208
  @param additional_nodes: list of nodes not in the config to distribute to
3209
  @type additional_vm: boolean
3210
  @param additional_vm: whether the additional nodes are vm-capable or not
3211

3212
  """
3213
  # Gather target nodes
3214
  cluster = lu.cfg.GetClusterInfo()
3215
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3216

    
3217
  online_nodes = lu.cfg.GetOnlineNodeList()
3218
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3219

    
3220
  if additional_nodes is not None:
3221
    online_nodes.extend(additional_nodes)
3222
    if additional_vm:
3223
      vm_nodes.extend(additional_nodes)
3224

    
3225
  # Never distribute to master node
3226
  for nodelist in [online_nodes, vm_nodes]:
3227
    if master_info.name in nodelist:
3228
      nodelist.remove(master_info.name)
3229

    
3230
  # Gather file lists
3231
  (files_all, files_all_opt, files_mc, files_vm) = \
3232
    _ComputeAncillaryFiles(cluster, True)
3233

    
3234
  # Never re-distribute configuration file from here
3235
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3236
              constants.CLUSTER_CONF_FILE in files_vm)
3237
  assert not files_mc, "Master candidates not handled in this function"
3238

    
3239
  filemap = [
3240
    (online_nodes, files_all),
3241
    (online_nodes, files_all_opt),
3242
    (vm_nodes, files_vm),
3243
    ]
3244

    
3245
  # Upload the files
3246
  for (node_list, files) in filemap:
3247
    for fname in files:
3248
      _UploadHelper(lu, node_list, fname)
3249

    
3250

    
3251
class LUClusterRedistConf(NoHooksLU):
3252
  """Force the redistribution of cluster configuration.
3253

3254
  This is a very simple LU.
3255

3256
  """
3257
  REQ_BGL = False
3258

    
3259
  def ExpandNames(self):
3260
    self.needed_locks = {
3261
      locking.LEVEL_NODE: locking.ALL_SET,
3262
    }
3263
    self.share_locks[locking.LEVEL_NODE] = 1
3264

    
3265
  def Exec(self, feedback_fn):
3266
    """Redistribute the configuration.
3267

3268
    """
3269
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3270
    _RedistributeAncillaryFiles(self)
3271

    
3272

    
3273
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3274
  """Sleep and poll for an instance's disk to sync.
3275

3276
  """
3277
  if not instance.disks or disks is not None and not disks:
3278
    return True
3279

    
3280
  disks = _ExpandCheckDisks(instance, disks)
3281

    
3282
  if not oneshot:
3283
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3284

    
3285
  node = instance.primary_node
3286

    
3287
  for dev in disks:
3288
    lu.cfg.SetDiskID(dev, node)
3289

    
3290
  # TODO: Convert to utils.Retry
3291

    
3292
  retries = 0
3293
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3294
  while True:
3295
    max_time = 0
3296
    done = True
3297
    cumul_degraded = False
3298
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3299
    msg = rstats.fail_msg
3300
    if msg:
3301
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3302
      retries += 1
3303
      if retries >= 10:
3304
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3305
                                 " aborting." % node)
3306
      time.sleep(6)
3307
      continue
3308
    rstats = rstats.payload
3309
    retries = 0
3310
    for i, mstat in enumerate(rstats):
3311
      if mstat is None:
3312
        lu.LogWarning("Can't compute data for node %s/%s",
3313
                           node, disks[i].iv_name)
3314
        continue
3315

    
3316
      cumul_degraded = (cumul_degraded or
3317
                        (mstat.is_degraded and mstat.sync_percent is None))
3318
      if mstat.sync_percent is not None:
3319
        done = False
3320
        if mstat.estimated_time is not None:
3321
          rem_time = ("%s remaining (estimated)" %
3322
                      utils.FormatSeconds(mstat.estimated_time))
3323
          max_time = mstat.estimated_time
3324
        else:
3325
          rem_time = "no time estimate"
3326
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3327
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3328

    
3329
    # if we're done but degraded, let's do a few small retries, to
3330
    # make sure we see a stable and not transient situation; therefore
3331
    # we force restart of the loop
3332
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3333
      logging.info("Degraded disks found, %d retries left", degr_retries)
3334
      degr_retries -= 1
3335
      time.sleep(1)
3336
      continue
3337

    
3338
    if done or oneshot:
3339
      break
3340

    
3341
    time.sleep(min(60, max_time))
3342

    
3343
  if done:
3344
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3345
  return not cumul_degraded
3346

    
3347

    
3348
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3349
  """Check that mirrors are not degraded.
3350

3351
  The ldisk parameter, if True, will change the test from the
3352
  is_degraded attribute (which represents overall non-ok status for
3353
  the device(s)) to the ldisk (representing the local storage status).
3354

3355
  """
3356
  lu.cfg.SetDiskID(dev, node)
3357

    
3358
  result = True
3359

    
3360
  if on_primary or dev.AssembleOnSecondary():
3361
    rstats = lu.rpc.call_blockdev_find(node, dev)
3362
    msg = rstats.fail_msg
3363
    if msg:
3364
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3365
      result = False
3366
    elif not rstats.payload:
3367
      lu.LogWarning("Can't find disk on node %s", node)
3368
      result = False
3369
    else:
3370
      if ldisk:
3371
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3372
      else:
3373
        result = result and not rstats.payload.is_degraded
3374

    
3375
  if dev.children:
3376
    for child in dev.children:
3377
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3378

    
3379
  return result
3380

    
3381

    
3382
class LUOobCommand(NoHooksLU):
3383
  """Logical unit for OOB handling.
3384

3385
  """
3386
  REG_BGL = False
3387
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3388

    
3389
  def CheckPrereq(self):
3390
    """Check prerequisites.
3391

3392
    This checks:
3393
     - the node exists in the configuration
3394
     - OOB is supported
3395

3396
    Any errors are signaled by raising errors.OpPrereqError.
3397

3398
    """
3399
    self.nodes = []
3400
    self.master_node = self.cfg.GetMasterNode()
3401

    
3402
    assert self.op.power_delay >= 0.0
3403

    
3404
    if self.op.node_names:
3405
      if self.op.command in self._SKIP_MASTER:
3406
        if self.master_node in self.op.node_names:
3407
          master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3408
          master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3409

    
3410
          if master_oob_handler:
3411
            additional_text = ("Run '%s %s %s' if you want to operate on the"
3412
                               " master regardless") % (master_oob_handler,
3413
                                                        self.op.command,
3414
                                                        self.master_node)
3415
          else:
3416
            additional_text = "The master node does not support out-of-band"
3417

    
3418
          raise errors.OpPrereqError(("Operating on the master node %s is not"
3419
                                      " allowed for %s\n%s") %
3420
                                     (self.master_node, self.op.command,
3421
                                      additional_text), errors.ECODE_INVAL)
3422
    else:
3423
      self.op.node_names = self.cfg.GetNodeList()
3424
      if self.op.command in self._SKIP_MASTER:
3425
        self.op.node_names.remove(self.master_node)
3426

    
3427
    if self.op.command in self._SKIP_MASTER:
3428
      assert self.master_node not in self.op.node_names
3429

    
3430
    for node_name in self.op.node_names:
3431
      node = self.cfg.GetNodeInfo(node_name)
3432

    
3433
      if node is None:
3434
        raise errors.OpPrereqError("Node %s not found" % node_name,
3435
                                   errors.ECODE_NOENT)
3436
      else:
3437
        self.nodes.append(node)
3438

    
3439
      if (not self.op.ignore_status and
3440
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3441
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3442
                                    " not marked offline") % node_name,
3443
                                   errors.ECODE_STATE)
3444

    
3445
  def ExpandNames(self):
3446
    """Gather locks we need.
3447

3448
    """
3449
    if self.op.node_names:
3450
      self.op.node_names = [_ExpandNodeName(self.cfg, name)
3451
                            for name in self.op.node_names]
3452
      lock_names = self.op.node_names
3453
    else:
3454
      lock_names = locking.ALL_SET
3455

    
3456
    self.needed_locks = {
3457
      locking.LEVEL_NODE: lock_names,
3458
      }
3459

    
3460
  def Exec(self, feedback_fn):
3461
    """Execute OOB and return result if we expect any.
3462

3463
    """
3464
    master_node = self.master_node
3465
    ret = []
3466

    
3467
    for idx, node in enumerate(self.nodes):
3468
      node_entry = [(constants.RS_NORMAL, node.name)]
3469
      ret.append(node_entry)
3470

    
3471
      oob_program = _SupportsOob(self.cfg, node)
3472

    
3473
      if not oob_program:
3474
        node_entry.append((constants.RS_UNAVAIL, None))
3475
        continue
3476

    
3477
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3478
                   self.op.command, oob_program, node.name)
3479
      result = self.rpc.call_run_oob(master_node, oob_program,
3480
                                     self.op.command, node.name,
3481
                                     self.op.timeout)
3482

    
3483
      if result.fail_msg:
3484
        self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3485
                        node.name, result.fail_msg)
3486
        node_entry.append((constants.RS_NODATA, None))
3487
      else:
3488
        try:
3489
          self._CheckPayload(result)
3490
        except errors.OpExecError, err:
3491
          self.LogWarning("The payload returned by '%s' is not valid: %s",
3492
                          node.name, err)
3493
          node_entry.append((constants.RS_NODATA, None))
3494
        else:
3495
          if self.op.command == constants.OOB_HEALTH:
3496
            # For health we should log important events
3497
            for item, status in result.payload:
3498
              if status in [constants.OOB_STATUS_WARNING,
3499
                            constants.OOB_STATUS_CRITICAL]:
3500
                self.LogWarning("On node '%s' item '%s' has status '%s'",
3501
                                node.name, item, status)
3502

    
3503
          if self.op.command == constants.OOB_POWER_ON:
3504
            node.powered = True
3505
          elif self.op.command == constants.OOB_POWER_OFF:
3506
            node.powered = False
3507
          elif self.op.command == constants.OOB_POWER_STATUS:
3508
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3509
            if powered != node.powered:
3510
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3511
                               " match actual power state (%s)"), node.powered,
3512
                              node.name, powered)
3513

    
3514
          # For configuration changing commands we should update the node
3515
          if self.op.command in (constants.OOB_POWER_ON,
3516
                                 constants.OOB_POWER_OFF):
3517
            self.cfg.Update(node, feedback_fn)
3518

    
3519
          node_entry.append((constants.RS_NORMAL, result.payload))
3520

    
3521
          if (self.op.command == constants.OOB_POWER_ON and
3522
              idx < len(self.nodes) - 1):
3523
            time.sleep(self.op.power_delay)
3524

    
3525
    return ret
3526

    
3527
  def _CheckPayload(self, result):
3528
    """Checks if the payload is valid.
3529

3530
    @param result: RPC result
3531
    @raises errors.OpExecError: If payload is not valid
3532

3533
    """
3534
    errs = []
3535
    if self.op.command == constants.OOB_HEALTH:
3536
      if not isinstance(result.payload, list):
3537
        errs.append("command 'health' is expected to return a list but got %s" %
3538
                    type(result.payload))
3539
      else:
3540
        for item, status in result.payload:
3541
          if status not in constants.OOB_STATUSES:
3542
            errs.append("health item '%s' has invalid status '%s'" %
3543
                        (item, status))
3544

    
3545
    if self.op.command == constants.OOB_POWER_STATUS:
3546
      if not isinstance(result.payload, dict):
3547
        errs.append("power-status is expected to return a dict but got %s" %
3548
                    type(result.payload))
3549

    
3550
    if self.op.command in [
3551
        constants.OOB_POWER_ON,
3552
        constants.OOB_POWER_OFF,
3553
        constants.OOB_POWER_CYCLE,
3554
        ]:
3555
      if result.payload is not None:
3556
        errs.append("%s is expected to not return payload but got '%s'" %
3557
                    (self.op.command, result.payload))
3558

    
3559
    if errs:
3560
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3561
                               utils.CommaJoin(errs))
3562

    
3563
class _OsQuery(_QueryBase):
3564
  FIELDS = query.OS_FIELDS
3565

    
3566
  def ExpandNames(self, lu):
3567
    # Lock all nodes in shared mode
3568
    # Temporary removal of locks, should be reverted later
3569
    # TODO: reintroduce locks when they are lighter-weight
3570
    lu.needed_locks = {}
3571
    #self.share_locks[locking.LEVEL_NODE] = 1
3572
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3573

    
3574
    # The following variables interact with _QueryBase._GetNames
3575
    if self.names:
3576
      self.wanted = self.names
3577
    else:
3578
      self.wanted = locking.ALL_SET
3579

    
3580
    self.do_locking = self.use_locking
3581

    
3582
  def DeclareLocks(self, lu, level):
3583
    pass
3584

    
3585
  @staticmethod
3586
  def _DiagnoseByOS(rlist):
3587
    """Remaps a per-node return list into an a per-os per-node dictionary
3588

3589
    @param rlist: a map with node names as keys and OS objects as values
3590

3591
    @rtype: dict
3592
    @return: a dictionary with osnames as keys and as value another
3593
        map, with nodes as keys and tuples of (path, status, diagnose,
3594
        variants, parameters, api_versions) as values, eg::
3595

3596
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3597
                                     (/srv/..., False, "invalid api")],
3598
                           "node2": [(/srv/..., True, "", [], [])]}
3599
          }
3600

3601
    """
3602
    all_os = {}
3603
    # we build here the list of nodes that didn't fail the RPC (at RPC
3604
    # level), so that nodes with a non-responding node daemon don't
3605
    # make all OSes invalid
3606
    good_nodes = [node_name for node_name in rlist
3607
                  if not rlist[node_name].fail_msg]
3608
    for node_name, nr in rlist.items():
3609
      if nr.fail_msg or not nr.payload:
3610
        continue
3611
      for (name, path, status, diagnose, variants,
3612
           params, api_versions) in nr.payload:
3613
        if name not in all_os:
3614
          # build a list of nodes for this os containing empty lists
3615
          # for each node in node_list
3616
          all_os[name] = {}
3617
          for nname in good_nodes:
3618
            all_os[name][nname] = []
3619
        # convert params from [name, help] to (name, help)
3620
        params = [tuple(v) for v in params]
3621
        all_os[name][node_name].append((path, status, diagnose,
3622
                                        variants, params, api_versions))
3623
    return all_os
3624

    
3625
  def _GetQueryData(self, lu):
3626
    """Computes the list of nodes and their attributes.
3627

3628
    """
3629
    # Locking is not used
3630
    assert not (lu.acquired_locks or self.do_locking or self.use_locking)
3631

    
3632
    valid_nodes = [node.name
3633
                   for node in lu.cfg.GetAllNodesInfo().values()
3634
                   if not node.offline and node.vm_capable]
3635
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3636
    cluster = lu.cfg.GetClusterInfo()
3637

    
3638
    data = {}
3639

    
3640
    for (os_name, os_data) in pol.items():
3641
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3642
                          hidden=(os_name in cluster.hidden_os),
3643
                          blacklisted=(os_name in cluster.blacklisted_os))
3644

    
3645
      variants = set()
3646
      parameters = set()
3647
      api_versions = set()
3648

    
3649
      for idx, osl in enumerate(os_data.values()):
3650
        info.valid = bool(info.valid and osl and osl[0][1])
3651
        if not info.valid:
3652
          break
3653

    
3654
        (node_variants, node_params, node_api) = osl[0][3:6]
3655
        if idx == 0:
3656
          # First entry
3657
          variants.update(node_variants)
3658
          parameters.update(node_params)
3659
          api_versions.update(node_api)
3660
        else:
3661
          # Filter out inconsistent values
3662
          variants.intersection_update(node_variants)
3663
          parameters.intersection_update(node_params)
3664
          api_versions.intersection_update(node_api)
3665

    
3666
      info.variants = list(variants)
3667
      info.parameters = list(parameters)
3668
      info.api_versions = list(api_versions)
3669

    
3670
      data[os_name] = info
3671

    
3672
    # Prepare data in requested order
3673
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3674
            if name in data]
3675

    
3676

    
3677
class LUOsDiagnose(NoHooksLU):
3678
  """Logical unit for OS diagnose/query.
3679

3680
  """
3681
  REQ_BGL = False
3682

    
3683
  @staticmethod
3684
  def _BuildFilter(fields, names):
3685
    """Builds a filter for querying OSes.
3686

3687
    """
3688
    name_filter = qlang.MakeSimpleFilter("name", names)
3689

    
3690
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3691
    # respective field is not requested
3692
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3693
                     for fname in ["hidden", "blacklisted"]
3694
                     if fname not in fields]
3695
    if "valid" not in fields:
3696
      status_filter.append([qlang.OP_TRUE, "valid"])
3697

    
3698
    if status_filter:
3699
      status_filter.insert(0, qlang.OP_AND)
3700
    else:
3701
      status_filter = None
3702

    
3703
    if name_filter and status_filter:
3704
      return [qlang.OP_AND, name_filter, status_filter]
3705
    elif name_filter:
3706
      return name_filter
3707
    else:
3708
      return status_filter
3709

    
3710
  def CheckArguments(self):
3711
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3712
                       self.op.output_fields, False)
3713

    
3714
  def ExpandNames(self):
3715
    self.oq.ExpandNames(self)
3716

    
3717
  def Exec(self, feedback_fn):
3718
    return self.oq.OldStyleQuery(self)
3719

    
3720

    
3721
class LUNodeRemove(LogicalUnit):
3722
  """Logical unit for removing a node.
3723

3724
  """
3725
  HPATH = "node-remove"
3726
  HTYPE = constants.HTYPE_NODE
3727

    
3728
  def BuildHooksEnv(self):
3729
    """Build hooks env.
3730

3731
    This doesn't run on the target node in the pre phase as a failed
3732
    node would then be impossible to remove.
3733

3734
    """
3735
    return {
3736
      "OP_TARGET": self.op.node_name,
3737
      "NODE_NAME": self.op.node_name,
3738
      }
3739

    
3740
  def BuildHooksNodes(self):
3741
    """Build hooks nodes.
3742

3743
    """
3744
    all_nodes = self.cfg.GetNodeList()
3745
    try:
3746
      all_nodes.remove(self.op.node_name)
3747
    except ValueError:
3748
      logging.warning("Node '%s', which is about to be removed, was not found"
3749
                      " in the list of all nodes", self.op.node_name)
3750
    return (all_nodes, all_nodes)
3751

    
3752
  def CheckPrereq(self):
3753
    """Check prerequisites.
3754

3755
    This checks:
3756
     - the node exists in the configuration
3757
     - it does not have primary or secondary instances
3758
     - it's not the master
3759

3760
    Any errors are signaled by raising errors.OpPrereqError.
3761

3762
    """
3763
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3764
    node = self.cfg.GetNodeInfo(self.op.node_name)
3765
    assert node is not None
3766

    
3767
    instance_list = self.cfg.GetInstanceList()
3768

    
3769
    masternode = self.cfg.GetMasterNode()
3770
    if node.name == masternode:
3771
      raise errors.OpPrereqError("Node is the master node,"
3772
                                 " you need to failover first.",
3773
                                 errors.ECODE_INVAL)
3774

    
3775
    for instance_name in instance_list:
3776
      instance = self.cfg.GetInstanceInfo(instance_name)
3777
      if node.name in instance.all_nodes:
3778
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3779
                                   " please remove first." % instance_name,
3780
                                   errors.ECODE_INVAL)
3781
    self.op.node_name = node.name
3782
    self.node = node
3783

    
3784
  def Exec(self, feedback_fn):
3785
    """Removes the node from the cluster.
3786

3787
    """
3788
    node = self.node
3789
    logging.info("Stopping the node daemon and removing configs from node %s",
3790
                 node.name)
3791

    
3792
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3793

    
3794
    # Promote nodes to master candidate as needed
3795
    _AdjustCandidatePool(self, exceptions=[node.name])
3796
    self.context.RemoveNode(node.name)
3797

    
3798
    # Run post hooks on the node before it's removed
3799
    _RunPostHook(self, node.name)
3800

    
3801
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3802
    msg = result.fail_msg
3803
    if msg:
3804
      self.LogWarning("Errors encountered on the remote node while leaving"
3805
                      " the cluster: %s", msg)
3806

    
3807
    # Remove node from our /etc/hosts
3808
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3809
      master_node = self.cfg.GetMasterNode()
3810
      result = self.rpc.call_etc_hosts_modify(master_node,
3811
                                              constants.ETC_HOSTS_REMOVE,
3812
                                              node.name, None)
3813
      result.Raise("Can't update hosts file with new host data")
3814
      _RedistributeAncillaryFiles(self)
3815

    
3816

    
3817
class _NodeQuery(_QueryBase):
3818
  FIELDS = query.NODE_FIELDS
3819

    
3820
  def ExpandNames(self, lu):
3821
    lu.needed_locks = {}
3822
    lu.share_locks[locking.LEVEL_NODE] = 1
3823

    
3824
    if self.names:
3825
      self.wanted = _GetWantedNodes(lu, self.names)
3826
    else:
3827
      self.wanted = locking.ALL_SET
3828

    
3829
    self.do_locking = (self.use_locking and
3830
                       query.NQ_LIVE in self.requested_data)
3831

    
3832
    if self.do_locking:
3833
      # if we don't request only static fields, we need to lock the nodes
3834
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3835

    
3836
  def DeclareLocks(self, lu, level):
3837
    pass
3838

    
3839
  def _GetQueryData(self, lu):
3840
    """Computes the list of nodes and their attributes.
3841

3842
    """
3843
    all_info = lu.cfg.GetAllNodesInfo()
3844

    
3845
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3846

    
3847
    # Gather data as requested
3848
    if query.NQ_LIVE in self.requested_data:
3849
      # filter out non-vm_capable nodes
3850
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3851

    
3852
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3853
                                        lu.cfg.GetHypervisorType())
3854
      live_data = dict((name, nresult.payload)
3855
                       for (name, nresult) in node_data.items()
3856
                       if not nresult.fail_msg and nresult.payload)
3857
    else:
3858
      live_data = None
3859

    
3860
    if query.NQ_INST in self.requested_data:
3861
      node_to_primary = dict([(name, set()) for name in nodenames])
3862
      node_to_secondary = dict([(name, set()) for name in nodenames])
3863

    
3864
      inst_data = lu.cfg.GetAllInstancesInfo()
3865

    
3866
      for inst in inst_data.values():
3867
        if inst.primary_node in node_to_primary:
3868
          node_to_primary[inst.primary_node].add(inst.name)
3869
        for secnode in inst.secondary_nodes:
3870
          if secnode in node_to_secondary:
3871
            node_to_secondary[secnode].add(inst.name)
3872
    else:
3873
      node_to_primary = None
3874
      node_to_secondary = None
3875

    
3876
    if query.NQ_OOB in self.requested_data:
3877
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3878
                         for name, node in all_info.iteritems())
3879
    else:
3880
      oob_support = None
3881

    
3882
    if query.NQ_GROUP in self.requested_data:
3883
      groups = lu.cfg.GetAllNodeGroupsInfo()
3884
    else:
3885
      groups = {}
3886

    
3887
    return query.NodeQueryData([all_info[name] for name in nodenames],
3888
                               live_data, lu.cfg.GetMasterNode(),
3889
                               node_to_primary, node_to_secondary, groups,
3890
                               oob_support, lu.cfg.GetClusterInfo())
3891

    
3892

    
3893
class LUNodeQuery(NoHooksLU):
3894
  """Logical unit for querying nodes.
3895

3896
  """
3897
  # pylint: disable-msg=W0142
3898
  REQ_BGL = False
3899

    
3900
  def CheckArguments(self):
3901
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3902
                         self.op.output_fields, self.op.use_locking)
3903

    
3904
  def ExpandNames(self):
3905
    self.nq.ExpandNames(self)
3906

    
3907
  def Exec(self, feedback_fn):
3908
    return self.nq.OldStyleQuery(self)
3909

    
3910

    
3911
class LUNodeQueryvols(NoHooksLU):
3912
  """Logical unit for getting volumes on node(s).
3913

3914
  """
3915
  REQ_BGL = False
3916
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3917
  _FIELDS_STATIC = utils.FieldSet("node")
3918

    
3919
  def CheckArguments(self):
3920
    _CheckOutputFields(static=self._FIELDS_STATIC,
3921
                       dynamic=self._FIELDS_DYNAMIC,
3922
                       selected=self.op.output_fields)
3923

    
3924
  def ExpandNames(self):
3925
    self.needed_locks = {}
3926
    self.share_locks[locking.LEVEL_NODE] = 1
3927
    if not self.op.nodes:
3928
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3929
    else:
3930
      self.needed_locks[locking.LEVEL_NODE] = \
3931
        _GetWantedNodes(self, self.op.nodes)
3932

    
3933
  def Exec(self, feedback_fn):
3934
    """Computes the list of nodes and their attributes.
3935

3936
    """
3937
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3938
    volumes = self.rpc.call_node_volumes(nodenames)
3939

    
3940
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3941
             in self.cfg.GetInstanceList()]
3942

    
3943
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3944

    
3945
    output = []
3946
    for node in nodenames:
3947
      nresult = volumes[node]
3948
      if nresult.offline:
3949
        continue
3950
      msg = nresult.fail_msg
3951
      if msg:
3952
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3953
        continue
3954

    
3955
      node_vols = nresult.payload[:]
3956
      node_vols.sort(key=lambda vol: vol['dev'])
3957

    
3958
      for vol in node_vols:
3959
        node_output = []
3960
        for field in self.op.output_fields:
3961
          if field == "node":
3962
            val = node
3963
          elif field == "phys":
3964
            val = vol['dev']
3965
          elif field == "vg":
3966
            val = vol['vg']
3967
          elif field == "name":
3968
            val = vol['name']
3969
          elif field == "size":
3970
            val = int(float(vol['size']))
3971
          elif field == "instance":
3972
            for inst in ilist:
3973
              if node not in lv_by_node[inst]:
3974
                continue
3975
              if vol['name'] in lv_by_node[inst][node]:
3976
                val = inst.name
3977
                break
3978
            else:
3979
              val = '-'
3980
          else:
3981
            raise errors.ParameterError(field)
3982
          node_output.append(str(val))
3983

    
3984
        output.append(node_output)
3985

    
3986
    return output
3987

    
3988

    
3989
class LUNodeQueryStorage(NoHooksLU):
3990
  """Logical unit for getting information on storage units on node(s).
3991

3992
  """
3993
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3994
  REQ_BGL = False
3995

    
3996
  def CheckArguments(self):
3997
    _CheckOutputFields(static=self._FIELDS_STATIC,
3998
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3999
                       selected=self.op.output_fields)
4000

    
4001
  def ExpandNames(self):
4002
    self.needed_locks = {}
4003
    self.share_locks[locking.LEVEL_NODE] = 1
4004

    
4005
    if self.op.nodes:
4006
      self.needed_locks[locking.LEVEL_NODE] = \
4007
        _GetWantedNodes(self, self.op.nodes)
4008
    else:
4009
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4010

    
4011
  def Exec(self, feedback_fn):
4012
    """Computes the list of nodes and their attributes.
4013

4014
    """
4015
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
4016

    
4017
    # Always get name to sort by
4018
    if constants.SF_NAME in self.op.output_fields:
4019
      fields = self.op.output_fields[:]
4020
    else:
4021
      fields = [constants.SF_NAME] + self.op.output_fields
4022

    
4023
    # Never ask for node or type as it's only known to the LU
4024
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4025
      while extra in fields:
4026
        fields.remove(extra)
4027

    
4028
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4029
    name_idx = field_idx[constants.SF_NAME]
4030

    
4031
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4032
    data = self.rpc.call_storage_list(self.nodes,
4033
                                      self.op.storage_type, st_args,
4034
                                      self.op.name, fields)
4035

    
4036
    result = []
4037

    
4038
    for node in utils.NiceSort(self.nodes):
4039
      nresult = data[node]
4040
      if nresult.offline:
4041
        continue
4042

    
4043
      msg = nresult.fail_msg
4044
      if msg:
4045
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4046
        continue
4047

    
4048
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4049

    
4050
      for name in utils.NiceSort(rows.keys()):
4051
        row = rows[name]
4052

    
4053
        out = []
4054

    
4055
        for field in self.op.output_fields:
4056
          if field == constants.SF_NODE:
4057
            val = node
4058
          elif field == constants.SF_TYPE:
4059
            val = self.op.storage_type
4060
          elif field in field_idx:
4061
            val = row[field_idx[field]]
4062
          else:
4063
            raise errors.ParameterError(field)
4064

    
4065
          out.append(val)
4066

    
4067
        result.append(out)
4068

    
4069
    return result
4070

    
4071

    
4072
class _InstanceQuery(_QueryBase):
4073
  FIELDS = query.INSTANCE_FIELDS
4074

    
4075
  def ExpandNames(self, lu):
4076
    lu.needed_locks = {}
4077
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
4078
    lu.share_locks[locking.LEVEL_NODE] = 1
4079

    
4080
    if self.names:
4081
      self.wanted = _GetWantedInstances(lu, self.names)
4082
    else:
4083
      self.wanted = locking.ALL_SET
4084

    
4085
    self.do_locking = (self.use_locking and
4086
                       query.IQ_LIVE in self.requested_data)
4087
    if self.do_locking:
4088
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4089
      lu.needed_locks[locking.LEVEL_NODE] = []
4090
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4091

    
4092
  def DeclareLocks(self, lu, level):
4093
    if level == locking.LEVEL_NODE and self.do_locking:
4094
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
4095

    
4096
  def _GetQueryData(self, lu):
4097
    """Computes the list of instances and their attributes.
4098

4099
    """
4100
    cluster = lu.cfg.GetClusterInfo()
4101
    all_info = lu.cfg.GetAllInstancesInfo()
4102

    
4103
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4104

    
4105
    instance_list = [all_info[name] for name in instance_names]
4106
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4107
                                        for inst in instance_list)))
4108
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4109
    bad_nodes = []
4110
    offline_nodes = []
4111
    wrongnode_inst = set()
4112

    
4113
    # Gather data as requested
4114
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4115
      live_data = {}
4116
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4117
      for name in nodes:
4118
        result = node_data[name]
4119
        if result.offline:
4120
          # offline nodes will be in both lists
4121
          assert result.fail_msg
4122
          offline_nodes.append(name)
4123
        if result.fail_msg:
4124
          bad_nodes.append(name)
4125
        elif result.payload:
4126
          for inst in result.payload:
4127
            if all_info[inst].primary_node == name:
4128
              live_data.update(result.payload)
4129
            else:
4130
              wrongnode_inst.add(inst)
4131
        # else no instance is alive
4132
    else:
4133
      live_data = {}
4134

    
4135
    if query.IQ_DISKUSAGE in self.requested_data:
4136
      disk_usage = dict((inst.name,
4137
                         _ComputeDiskSize(inst.disk_template,
4138
                                          [{constants.IDISK_SIZE: disk.size}
4139
                                           for disk in inst.disks]))
4140
                        for inst in instance_list)
4141
    else:
4142
      disk_usage = None
4143

    
4144
    if query.IQ_CONSOLE in self.requested_data:
4145
      consinfo = {}
4146
      for inst in instance_list:
4147
        if inst.name in live_data:
4148
          # Instance is running
4149
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4150
        else:
4151
          consinfo[inst.name] = None
4152
      assert set(consinfo.keys()) == set(instance_names)
4153
    else:
4154
      consinfo = None
4155

    
4156
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4157
                                   disk_usage, offline_nodes, bad_nodes,
4158
                                   live_data, wrongnode_inst, consinfo)
4159

    
4160

    
4161
class LUQuery(NoHooksLU):
4162
  """Query for resources/items of a certain kind.
4163

4164
  """
4165
  # pylint: disable-msg=W0142
4166
  REQ_BGL = False
4167

    
4168
  def CheckArguments(self):
4169
    qcls = _GetQueryImplementation(self.op.what)
4170

    
4171
    self.impl = qcls(self.op.filter, self.op.fields, False)
4172

    
4173
  def ExpandNames(self):
4174
    self.impl.ExpandNames(self)
4175

    
4176
  def DeclareLocks(self, level):
4177
    self.impl.DeclareLocks(self, level)
4178

    
4179
  def Exec(self, feedback_fn):
4180
    return self.impl.NewStyleQuery(self)
4181

    
4182

    
4183
class LUQueryFields(NoHooksLU):
4184
  """Query for resources/items of a certain kind.
4185

4186
  """
4187
  # pylint: disable-msg=W0142
4188
  REQ_BGL = False
4189

    
4190
  def CheckArguments(self):
4191
    self.qcls = _GetQueryImplementation(self.op.what)
4192

    
4193
  def ExpandNames(self):
4194
    self.needed_locks = {}
4195

    
4196
  def Exec(self, feedback_fn):
4197
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4198

    
4199

    
4200
class LUNodeModifyStorage(NoHooksLU):
4201
  """Logical unit for modifying a storage volume on a node.
4202

4203
  """
4204
  REQ_BGL = False
4205

    
4206
  def CheckArguments(self):
4207
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4208

    
4209
    storage_type = self.op.storage_type
4210

    
4211
    try:
4212
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4213
    except KeyError:
4214
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4215
                                 " modified" % storage_type,
4216
                                 errors.ECODE_INVAL)
4217

    
4218
    diff = set(self.op.changes.keys()) - modifiable
4219
    if diff:
4220
      raise errors.OpPrereqError("The following fields can not be modified for"
4221
                                 " storage units of type '%s': %r" %
4222
                                 (storage_type, list(diff)),
4223
                                 errors.ECODE_INVAL)
4224

    
4225
  def ExpandNames(self):
4226
    self.needed_locks = {
4227
      locking.LEVEL_NODE: self.op.node_name,
4228
      }
4229

    
4230
  def Exec(self, feedback_fn):
4231
    """Computes the list of nodes and their attributes.
4232

4233
    """
4234
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4235
    result = self.rpc.call_storage_modify(self.op.node_name,
4236
                                          self.op.storage_type, st_args,
4237
                                          self.op.name, self.op.changes)
4238
    result.Raise("Failed to modify storage unit '%s' on %s" %
4239
                 (self.op.name, self.op.node_name))
4240

    
4241

    
4242
class LUNodeAdd(LogicalUnit):
4243
  """Logical unit for adding node to the cluster.
4244

4245
  """
4246
  HPATH = "node-add"
4247
  HTYPE = constants.HTYPE_NODE
4248
  _NFLAGS = ["master_capable", "vm_capable"]
4249

    
4250
  def CheckArguments(self):
4251
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4252
    # validate/normalize the node name
4253
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4254
                                         family=self.primary_ip_family)
4255
    self.op.node_name = self.hostname.name
4256
    if self.op.readd and self.op.group:
4257
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4258
                                 " being readded", errors.ECODE_INVAL)
4259

    
4260
  def BuildHooksEnv(self):
4261
    """Build hooks env.
4262

4263
    This will run on all nodes before, and on all nodes + the new node after.
4264

4265
    """
4266
    return {
4267
      "OP_TARGET": self.op.node_name,
4268
      "NODE_NAME": self.op.node_name,
4269
      "NODE_PIP": self.op.primary_ip,
4270
      "NODE_SIP": self.op.secondary_ip,
4271
      "MASTER_CAPABLE": str(self.op.master_capable),
4272
      "VM_CAPABLE": str(self.op.vm_capable),
4273
      }
4274

    
4275
  def BuildHooksNodes(self):
4276
    """Build hooks nodes.
4277

4278
    """
4279
    # Exclude added node
4280
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4281
    post_nodes = pre_nodes + [self.op.node_name, ]
4282

    
4283
    return (pre_nodes, post_nodes)
4284

    
4285
  def CheckPrereq(self):
4286
    """Check prerequisites.
4287

4288
    This checks:
4289
     - the new node is not already in the config
4290
     - it is resolvable
4291
     - its parameters (single/dual homed) matches the cluster
4292

4293
    Any errors are signaled by raising errors.OpPrereqError.
4294

4295
    """
4296
    cfg = self.cfg
4297
    hostname = self.hostname
4298
    node = hostname.name
4299
    primary_ip = self.op.primary_ip = hostname.ip
4300
    if self.op.secondary_ip is None:
4301
      if self.primary_ip_family == netutils.IP6Address.family:
4302
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4303
                                   " IPv4 address must be given as secondary",
4304
                                   errors.ECODE_INVAL)
4305
      self.op.secondary_ip = primary_ip
4306

    
4307
    secondary_ip = self.op.secondary_ip
4308
    if not netutils.IP4Address.IsValid(secondary_ip):
4309
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4310
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4311

    
4312
    node_list = cfg.GetNodeList()
4313
    if not self.op.readd and node in node_list:
4314
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4315
                                 node, errors.ECODE_EXISTS)
4316
    elif self.op.readd and node not in node_list:
4317
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4318
                                 errors.ECODE_NOENT)
4319

    
4320
    self.changed_primary_ip = False
4321

    
4322
    for existing_node_name in node_list:
4323
      existing_node = cfg.GetNodeInfo(existing_node_name)
4324

    
4325
      if self.op.readd and node == existing_node_name:
4326
        if existing_node.secondary_ip != secondary_ip:
4327
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4328
                                     " address configuration as before",
4329
                                     errors.ECODE_INVAL)
4330
        if existing_node.primary_ip != primary_ip:
4331
          self.changed_primary_ip = True
4332

    
4333
        continue
4334

    
4335
      if (existing_node.primary_ip == primary_ip or
4336
          existing_node.secondary_ip == primary_ip or
4337
          existing_node.primary_ip == secondary_ip or
4338
          existing_node.secondary_ip == secondary_ip):
4339
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4340
                                   " existing node %s" % existing_node.name,
4341
                                   errors.ECODE_NOTUNIQUE)
4342

    
4343
    # After this 'if' block, None is no longer a valid value for the
4344
    # _capable op attributes
4345
    if self.op.readd:
4346
      old_node = self.cfg.GetNodeInfo(node)
4347
      assert old_node is not None, "Can't retrieve locked node %s" % node
4348
      for attr in self._NFLAGS:
4349
        if getattr(self.op, attr) is None:
4350
          setattr(self.op, attr, getattr(old_node, attr))
4351
    else:
4352
      for attr in self._NFLAGS:
4353
        if getattr(self.op, attr) is None:
4354
          setattr(self.op, attr, True)
4355

    
4356
    if self.op.readd and not self.op.vm_capable:
4357
      pri, sec = cfg.GetNodeInstances(node)
4358
      if pri or sec:
4359
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4360
                                   " flag set to false, but it already holds"
4361
                                   " instances" % node,
4362
                                   errors.ECODE_STATE)
4363

    
4364
    # check that the type of the node (single versus dual homed) is the
4365
    # same as for the master
4366
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4367
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4368
    newbie_singlehomed = secondary_ip == primary_ip
4369
    if master_singlehomed != newbie_singlehomed:
4370
      if master_singlehomed:
4371
        raise errors.OpPrereqError("The master has no secondary ip but the"
4372
                                   " new node has one",
4373
                                   errors.ECODE_INVAL)
4374
      else:
4375
        raise errors.OpPrereqError("The master has a secondary ip but the"
4376
                                   " new node doesn't have one",
4377
                                   errors.ECODE_INVAL)
4378

    
4379
    # checks reachability
4380
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4381
      raise errors.OpPrereqError("Node not reachable by ping",
4382
                                 errors.ECODE_ENVIRON)
4383

    
4384
    if not newbie_singlehomed:
4385
      # check reachability from my secondary ip to newbie's secondary ip
4386
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4387
                           source=myself.secondary_ip):
4388
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4389
                                   " based ping to node daemon port",
4390
                                   errors.ECODE_ENVIRON)
4391

    
4392
    if self.op.readd:
4393
      exceptions = [node]
4394
    else:
4395
      exceptions = []
4396

    
4397
    if self.op.master_capable:
4398
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4399
    else:
4400
      self.master_candidate = False
4401

    
4402
    if self.op.readd:
4403
      self.new_node = old_node
4404
    else:
4405
      node_group = cfg.LookupNodeGroup(self.op.group)
4406
      self.new_node = objects.Node(name=node,
4407
                                   primary_ip=primary_ip,
4408
                                   secondary_ip=secondary_ip,
4409
                                   master_candidate=self.master_candidate,
4410
                                   offline=False, drained=False,
4411
                                   group=node_group)
4412

    
4413
    if self.op.ndparams:
4414
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4415

    
4416
  def Exec(self, feedback_fn):
4417
    """Adds the new node to the cluster.
4418

4419
    """
4420
    new_node = self.new_node
4421
    node = new_node.name
4422

    
4423
    # We adding a new node so we assume it's powered
4424
    new_node.powered = True
4425

    
4426
    # for re-adds, reset the offline/drained/master-candidate flags;
4427
    # we need to reset here, otherwise offline would prevent RPC calls
4428
    # later in the procedure; this also means that if the re-add
4429
    # fails, we are left with a non-offlined, broken node
4430
    if self.op.readd:
4431
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4432
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4433
      # if we demote the node, we do cleanup later in the procedure
4434
      new_node.master_candidate = self.master_candidate
4435
      if self.changed_primary_ip:
4436
        new_node.primary_ip = self.op.primary_ip
4437

    
4438
    # copy the master/vm_capable flags
4439
    for attr in self._NFLAGS:
4440
      setattr(new_node, attr, getattr(self.op, attr))
4441

    
4442
    # notify the user about any possible mc promotion
4443
    if new_node.master_candidate:
4444
      self.LogInfo("Node will be a master candidate")
4445

    
4446
    if self.op.ndparams:
4447
      new_node.ndparams = self.op.ndparams
4448
    else:
4449
      new_node.ndparams = {}
4450

    
4451
    # check connectivity
4452
    result = self.rpc.call_version([node])[node]
4453
    result.Raise("Can't get version information from node %s" % node)
4454
    if constants.PROTOCOL_VERSION == result.payload:
4455
      logging.info("Communication to node %s fine, sw version %s match",
4456
                   node, result.payload)
4457
    else:
4458
      raise errors.OpExecError("Version mismatch master version %s,"
4459
                               " node version %s" %
4460
                               (constants.PROTOCOL_VERSION, result.payload))
4461

    
4462
    # Add node to our /etc/hosts, and add key to known_hosts
4463
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4464
      master_node = self.cfg.GetMasterNode()
4465
      result = self.rpc.call_etc_hosts_modify(master_node,
4466
                                              constants.ETC_HOSTS_ADD,
4467
                                              self.hostname.name,
4468
                                              self.hostname.ip)
4469
      result.Raise("Can't update hosts file with new host data")
4470

    
4471
    if new_node.secondary_ip != new_node.primary_ip:
4472
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4473
                               False)
4474

    
4475
    node_verify_list = [self.cfg.GetMasterNode()]
4476
    node_verify_param = {
4477
      constants.NV_NODELIST: [node],
4478
      # TODO: do a node-net-test as well?
4479
    }
4480

    
4481
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4482
                                       self.cfg.GetClusterName())
4483
    for verifier in node_verify_list:
4484
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4485
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4486
      if nl_payload:
4487
        for failed in nl_payload:
4488
          feedback_fn("ssh/hostname verification failed"
4489
                      " (checking from %s): %s" %
4490
                      (verifier, nl_payload[failed]))
4491
        raise errors.OpExecError("ssh/hostname verification failed.")
4492

    
4493
    if self.op.readd:
4494
      _RedistributeAncillaryFiles(self)
4495
      self.context.ReaddNode(new_node)
4496
      # make sure we redistribute the config
4497
      self.cfg.Update(new_node, feedback_fn)
4498
      # and make sure the new node will not have old files around
4499
      if not new_node.master_candidate:
4500
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4501
        msg = result.fail_msg
4502
        if msg:
4503
          self.LogWarning("Node failed to demote itself from master"
4504
                          " candidate status: %s" % msg)
4505
    else:
4506
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4507
                                  additional_vm=self.op.vm_capable)
4508
      self.context.AddNode(new_node, self.proc.GetECId())
4509

    
4510

    
4511
class LUNodeSetParams(LogicalUnit):
4512
  """Modifies the parameters of a node.
4513

4514
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4515
      to the node role (as _ROLE_*)
4516
  @cvar _R2F: a dictionary from node role to tuples of flags
4517
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4518

4519
  """
4520
  HPATH = "node-modify"
4521
  HTYPE = constants.HTYPE_NODE
4522
  REQ_BGL = False
4523
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4524
  _F2R = {
4525
    (True, False, False): _ROLE_CANDIDATE,
4526
    (False, True, False): _ROLE_DRAINED,
4527
    (False, False, True): _ROLE_OFFLINE,
4528
    (False, False, False): _ROLE_REGULAR,
4529
    }
4530
  _R2F = dict((v, k) for k, v in _F2R.items())
4531
  _FLAGS = ["master_candidate", "drained", "offline"]
4532

    
4533
  def CheckArguments(self):
4534
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4535
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4536
                self.op.master_capable, self.op.vm_capable,
4537
                self.op.secondary_ip, self.op.ndparams]
4538
    if all_mods.count(None) == len(all_mods):
4539
      raise errors.OpPrereqError("Please pass at least one modification",
4540
                                 errors.ECODE_INVAL)
4541
    if all_mods.count(True) > 1:
4542
      raise errors.OpPrereqError("Can't set the node into more than one"
4543
                                 " state at the same time",
4544
                                 errors.ECODE_INVAL)
4545

    
4546
    # Boolean value that tells us whether we might be demoting from MC
4547
    self.might_demote = (self.op.master_candidate == False or
4548
                         self.op.offline == True or
4549
                         self.op.drained == True or
4550
                         self.op.master_capable == False)
4551

    
4552
    if self.op.secondary_ip:
4553
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4554
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4555
                                   " address" % self.op.secondary_ip,
4556
                                   errors.ECODE_INVAL)
4557

    
4558
    self.lock_all = self.op.auto_promote and self.might_demote
4559
    self.lock_instances = self.op.secondary_ip is not None
4560

    
4561
  def ExpandNames(self):
4562
    if self.lock_all:
4563
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4564
    else:
4565
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4566

    
4567
    if self.lock_instances:
4568
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4569

    
4570
  def DeclareLocks(self, level):
4571
    # If we have locked all instances, before waiting to lock nodes, release
4572
    # all the ones living on nodes unrelated to the current operation.
4573
    if level == locking.LEVEL_NODE and self.lock_instances:
4574
      instances_release = []
4575
      instances_keep = []
4576
      self.affected_instances = []
4577
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4578
        for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4579
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4580
          i_mirrored = instance.disk_template in constants.DTS_INT_MIRROR
4581
          if i_mirrored and self.op.node_name in instance.all_nodes:
4582
            instances_keep.append(instance_name)
4583
            self.affected_instances.append(instance)
4584
          else:
4585
            instances_release.append(instance_name)
4586
        if instances_release:
4587
          self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4588
          self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4589

    
4590
  def BuildHooksEnv(self):
4591
    """Build hooks env.
4592

4593
    This runs on the master node.
4594

4595
    """
4596
    return {
4597
      "OP_TARGET": self.op.node_name,
4598
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4599
      "OFFLINE": str(self.op.offline),
4600
      "DRAINED": str(self.op.drained),
4601
      "MASTER_CAPABLE": str(self.op.master_capable),
4602
      "VM_CAPABLE": str(self.op.vm_capable),
4603
      }
4604

    
4605
  def BuildHooksNodes(self):
4606
    """Build hooks nodes.
4607

4608
    """
4609
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
4610
    return (nl, nl)
4611

    
4612
  def CheckPrereq(self):
4613
    """Check prerequisites.
4614

4615
    This only checks the instance list against the existing names.
4616

4617
    """
4618
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4619

    
4620
    if (self.op.master_candidate is not None or
4621
        self.op.drained is not None or
4622
        self.op.offline is not None):
4623
      # we can't change the master's node flags
4624
      if self.op.node_name == self.cfg.GetMasterNode():
4625
        raise errors.OpPrereqError("The master role can be changed"
4626
                                   " only via master-failover",
4627
                                   errors.ECODE_INVAL)
4628

    
4629
    if self.op.master_candidate and not node.master_capable:
4630
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4631
                                 " it a master candidate" % node.name,
4632
                                 errors.ECODE_STATE)
4633

    
4634
    if self.op.vm_capable == False:
4635
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4636
      if ipri or isec:
4637
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4638
                                   " the vm_capable flag" % node.name,
4639
                                   errors.ECODE_STATE)
4640

    
4641
    if node.master_candidate and self.might_demote and not self.lock_all:
4642
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
4643
      # check if after removing the current node, we're missing master
4644
      # candidates
4645
      (mc_remaining, mc_should, _) = \
4646
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4647
      if mc_remaining < mc_should:
4648
        raise errors.OpPrereqError("Not enough master candidates, please"
4649
                                   " pass auto promote option to allow"
4650
                                   " promotion", errors.ECODE_STATE)
4651

    
4652
    self.old_flags = old_flags = (node.master_candidate,
4653
                                  node.drained, node.offline)
4654
    assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4655
    self.old_role = old_role = self._F2R[old_flags]
4656

    
4657
    # Check for ineffective changes
4658
    for attr in self._FLAGS:
4659
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4660
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4661
        setattr(self.op, attr, None)
4662

    
4663
    # Past this point, any flag change to False means a transition
4664
    # away from the respective state, as only real changes are kept
4665

    
4666
    # TODO: We might query the real power state if it supports OOB
4667
    if _SupportsOob(self.cfg, node):
4668
      if self.op.offline is False and not (node.powered or
4669
                                           self.op.powered == True):
4670
        raise errors.OpPrereqError(("Please power on node %s first before you"
4671
                                    " can reset offline state") %
4672
                                   self.op.node_name)
4673
    elif self.op.powered is not None:
4674
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
4675
                                  " which does not support out-of-band"
4676
                                  " handling") % self.op.node_name)
4677

    
4678
    # If we're being deofflined/drained, we'll MC ourself if needed
4679
    if (self.op.drained == False or self.op.offline == False or
4680
        (self.op.master_capable and not node.master_capable)):
4681
      if _DecideSelfPromotion(self):
4682
        self.op.master_candidate = True
4683
        self.LogInfo("Auto-promoting node to master candidate")
4684

    
4685
    # If we're no longer master capable, we'll demote ourselves from MC
4686
    if self.op.master_capable == False and node.master_candidate:
4687
      self.LogInfo("Demoting from master candidate")
4688
      self.op.master_candidate = False
4689

    
4690
    # Compute new role
4691
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4692
    if self.op.master_candidate:
4693
      new_role = self._ROLE_CANDIDATE
4694
    elif self.op.drained:
4695
      new_role = self._ROLE_DRAINED
4696
    elif self.op.offline:
4697
      new_role = self._ROLE_OFFLINE
4698
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4699
      # False is still in new flags, which means we're un-setting (the
4700
      # only) True flag
4701
      new_role = self._ROLE_REGULAR
4702
    else: # no new flags, nothing, keep old role
4703
      new_role = old_role
4704

    
4705
    self.new_role = new_role
4706

    
4707
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4708
      # Trying to transition out of offline status
4709
      result = self.rpc.call_version([node.name])[node.name]
4710
      if result.fail_msg:
4711
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4712
                                   " to report its version: %s" %
4713
                                   (node.name, result.fail_msg),
4714
                                   errors.ECODE_STATE)
4715
      else:
4716
        self.LogWarning("Transitioning node from offline to online state"
4717
                        " without using re-add. Please make sure the node"
4718
                        " is healthy!")
4719

    
4720
    if self.op.secondary_ip:
4721
      # Ok even without locking, because this can't be changed by any LU
4722
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4723
      master_singlehomed = master.secondary_ip == master.primary_ip
4724
      if master_singlehomed and self.op.secondary_ip:
4725
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4726
                                   " homed cluster", errors.ECODE_INVAL)
4727

    
4728
      if node.offline:
4729
        if self.affected_instances:
4730
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4731
                                     " node has instances (%s) configured"
4732
                                     " to use it" % self.affected_instances)
4733
      else:
4734
        # On online nodes, check that no instances are running, and that
4735
        # the node has the new ip and we can reach it.
4736
        for instance in self.affected_instances:
4737
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4738

    
4739
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4740
        if master.name != node.name:
4741
          # check reachability from master secondary ip to new secondary ip
4742
          if not netutils.TcpPing(self.op.secondary_ip,
4743
                                  constants.DEFAULT_NODED_PORT,
4744
                                  source=master.secondary_ip):
4745
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4746
                                       " based ping to node daemon port",
4747
                                       errors.ECODE_ENVIRON)
4748

    
4749
    if self.op.ndparams:
4750
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4751
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4752
      self.new_ndparams = new_ndparams
4753

    
4754
  def Exec(self, feedback_fn):
4755
    """Modifies a node.
4756

4757
    """
4758
    node = self.node
4759
    old_role = self.old_role
4760
    new_role = self.new_role
4761

    
4762
    result = []
4763

    
4764
    if self.op.ndparams:
4765
      node.ndparams = self.new_ndparams
4766

    
4767
    if self.op.powered is not None:
4768
      node.powered = self.op.powered
4769

    
4770
    for attr in ["master_capable", "vm_capable"]:
4771
      val = getattr(self.op, attr)
4772
      if val is not None:
4773
        setattr(node, attr, val)
4774
        result.append((attr, str(val)))
4775

    
4776
    if new_role != old_role:
4777
      # Tell the node to demote itself, if no longer MC and not offline
4778
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4779
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4780
        if msg:
4781
          self.LogWarning("Node failed to demote itself: %s", msg)
4782

    
4783
      new_flags = self._R2F[new_role]
4784
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4785
        if of != nf:
4786
          result.append((desc, str(nf)))
4787
      (node.master_candidate, node.drained, node.offline) = new_flags
4788

    
4789
      # we locked all nodes, we adjust the CP before updating this node
4790
      if self.lock_all:
4791
        _AdjustCandidatePool(self, [node.name])
4792

    
4793
    if self.op.secondary_ip:
4794
      node.secondary_ip = self.op.secondary_ip
4795
      result.append(("secondary_ip", self.op.secondary_ip))
4796

    
4797
    # this will trigger configuration file update, if needed
4798
    self.cfg.Update(node, feedback_fn)
4799

    
4800
    # this will trigger job queue propagation or cleanup if the mc
4801
    # flag changed
4802
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4803
      self.context.ReaddNode(node)
4804

    
4805
    return result
4806

    
4807

    
4808
class LUNodePowercycle(NoHooksLU):
4809
  """Powercycles a node.
4810

4811
  """
4812
  REQ_BGL = False
4813

    
4814
  def CheckArguments(self):
4815
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4816
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4817
      raise errors.OpPrereqError("The node is the master and the force"
4818
                                 " parameter was not set",
4819
                                 errors.ECODE_INVAL)
4820

    
4821
  def ExpandNames(self):
4822
    """Locking for PowercycleNode.
4823

4824
    This is a last-resort option and shouldn't block on other
4825
    jobs. Therefore, we grab no locks.
4826

4827
    """
4828
    self.needed_locks = {}
4829

    
4830
  def Exec(self, feedback_fn):
4831
    """Reboots a node.
4832

4833
    """
4834
    result = self.rpc.call_node_powercycle(self.op.node_name,
4835
                                           self.cfg.GetHypervisorType())
4836
    result.Raise("Failed to schedule the reboot")
4837
    return result.payload
4838

    
4839

    
4840
class LUClusterQuery(NoHooksLU):
4841
  """Query cluster configuration.
4842

4843
  """
4844
  REQ_BGL = False
4845

    
4846
  def ExpandNames(self):
4847
    self.needed_locks = {}
4848

    
4849
  def Exec(self, feedback_fn):
4850
    """Return cluster config.
4851

4852
    """
4853
    cluster = self.cfg.GetClusterInfo()
4854
    os_hvp = {}
4855

    
4856
    # Filter just for enabled hypervisors
4857
    for os_name, hv_dict in cluster.os_hvp.items():
4858
      os_hvp[os_name] = {}
4859
      for hv_name, hv_params in hv_dict.items():
4860
        if hv_name in cluster.enabled_hypervisors:
4861
          os_hvp[os_name][hv_name] = hv_params
4862

    
4863
    # Convert ip_family to ip_version
4864
    primary_ip_version = constants.IP4_VERSION
4865
    if cluster.primary_ip_family == netutils.IP6Address.family:
4866
      primary_ip_version = constants.IP6_VERSION
4867

    
4868
    result = {
4869
      "software_version": constants.RELEASE_VERSION,
4870
      "protocol_version": constants.PROTOCOL_VERSION,
4871
      "config_version": constants.CONFIG_VERSION,
4872
      "os_api_version": max(constants.OS_API_VERSIONS),
4873
      "export_version": constants.EXPORT_VERSION,
4874
      "architecture": (platform.architecture()[0], platform.machine()),
4875
      "name": cluster.cluster_name,
4876
      "master": cluster.master_node,
4877
      "default_hypervisor": cluster.enabled_hypervisors[0],
4878
      "enabled_hypervisors": cluster.enabled_hypervisors,
4879
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4880
                        for hypervisor_name in cluster.enabled_hypervisors]),
4881
      "os_hvp": os_hvp,
4882
      "beparams": cluster.beparams,
4883
      "osparams": cluster.osparams,
4884
      "nicparams": cluster.nicparams,
4885
      "ndparams": cluster.ndparams,
4886
      "candidate_pool_size": cluster.candidate_pool_size,
4887
      "master_netdev": cluster.master_netdev,
4888
      "volume_group_name": cluster.volume_group_name,
4889
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4890
      "file_storage_dir": cluster.file_storage_dir,
4891
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
4892
      "maintain_node_health": cluster.maintain_node_health,
4893
      "ctime": cluster.ctime,
4894
      "mtime": cluster.mtime,
4895
      "uuid": cluster.uuid,
4896
      "tags": list(cluster.GetTags()),
4897
      "uid_pool": cluster.uid_pool,
4898
      "default_iallocator": cluster.default_iallocator,
4899
      "reserved_lvs": cluster.reserved_lvs,
4900
      "primary_ip_version": primary_ip_version,
4901
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4902
      "hidden_os": cluster.hidden_os,
4903
      "blacklisted_os": cluster.blacklisted_os,
4904
      }
4905

    
4906
    return result
4907

    
4908

    
4909
class LUClusterConfigQuery(NoHooksLU):
4910
  """Return configuration values.
4911

4912
  """
4913
  REQ_BGL = False
4914
  _FIELDS_DYNAMIC = utils.FieldSet()
4915
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4916
                                  "watcher_pause", "volume_group_name")
4917

    
4918
  def CheckArguments(self):
4919
    _CheckOutputFields(static=self._FIELDS_STATIC,
4920
                       dynamic=self._FIELDS_DYNAMIC,
4921
                       selected=self.op.output_fields)
4922

    
4923
  def ExpandNames(self):
4924
    self.needed_locks = {}
4925

    
4926
  def Exec(self, feedback_fn):
4927
    """Dump a representation of the cluster config to the standard output.
4928

4929
    """
4930
    values = []
4931
    for field in self.op.output_fields:
4932
      if field == "cluster_name":
4933
        entry = self.cfg.GetClusterName()
4934
      elif field == "master_node":
4935
        entry = self.cfg.GetMasterNode()
4936
      elif field == "drain_flag":
4937
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4938
      elif field == "watcher_pause":
4939
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4940
      elif field == "volume_group_name":
4941
        entry = self.cfg.GetVGName()
4942
      else:
4943
        raise errors.ParameterError(field)
4944
      values.append(entry)
4945
    return values
4946

    
4947

    
4948
class LUInstanceActivateDisks(NoHooksLU):
4949
  """Bring up an instance's disks.
4950

4951
  """
4952
  REQ_BGL = False
4953

    
4954
  def ExpandNames(self):
4955
    self._ExpandAndLockInstance()
4956
    self.needed_locks[locking.LEVEL_NODE] = []
4957
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4958

    
4959
  def DeclareLocks(self, level):
4960
    if level == locking.LEVEL_NODE:
4961
      self._LockInstancesNodes()
4962

    
4963
  def CheckPrereq(self):
4964
    """Check prerequisites.
4965

4966
    This checks that the instance is in the cluster.
4967

4968
    """
4969
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4970
    assert self.instance is not None, \
4971
      "Cannot retrieve locked instance %s" % self.op.instance_name
4972
    _CheckNodeOnline(self, self.instance.primary_node)
4973

    
4974
  def Exec(self, feedback_fn):
4975
    """Activate the disks.
4976

4977
    """
4978
    disks_ok, disks_info = \
4979
              _AssembleInstanceDisks(self, self.instance,
4980
                                     ignore_size=self.op.ignore_size)
4981
    if not disks_ok:
4982
      raise errors.OpExecError("Cannot activate block devices")
4983

    
4984
    return disks_info
4985

    
4986

    
4987
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4988
                           ignore_size=False):
4989
  """Prepare the block devices for an instance.
4990

4991
  This sets up the block devices on all nodes.
4992

4993
  @type lu: L{LogicalUnit}
4994
  @param lu: the logical unit on whose behalf we execute
4995
  @type instance: L{objects.Instance}
4996
  @param instance: the instance for whose disks we assemble
4997
  @type disks: list of L{objects.Disk} or None
4998
  @param disks: which disks to assemble (or all, if None)
4999
  @type ignore_secondaries: boolean
5000
  @param ignore_secondaries: if true, errors on secondary nodes
5001
      won't result in an error return from the function
5002
  @type ignore_size: boolean
5003
  @param ignore_size: if true, the current known size of the disk
5004
      will not be used during the disk activation, useful for cases
5005
      when the size is wrong
5006
  @return: False if the operation failed, otherwise a list of
5007
      (host, instance_visible_name, node_visible_name)
5008
      with the mapping from node devices to instance devices
5009

5010
  """
5011
  device_info = []
5012
  disks_ok = True
5013
  iname = instance.name
5014
  disks = _ExpandCheckDisks(instance, disks)
5015

    
5016
  # With the two passes mechanism we try to reduce the window of
5017
  # opportunity for the race condition of switching DRBD to primary
5018
  # before handshaking occured, but we do not eliminate it
5019

    
5020
  # The proper fix would be to wait (with some limits) until the
5021
  # connection has been made and drbd transitions from WFConnection
5022
  # into any other network-connected state (Connected, SyncTarget,
5023
  # SyncSource, etc.)
5024

    
5025
  # 1st pass, assemble on all nodes in secondary mode
5026
  for idx, inst_disk in enumerate(disks):
5027
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5028
      if ignore_size:
5029
        node_disk = node_disk.Copy()
5030
        node_disk.UnsetSize()
5031
      lu.cfg.SetDiskID(node_disk, node)
5032
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5033
      msg = result.fail_msg
5034
      if msg:
5035
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5036
                           " (is_primary=False, pass=1): %s",
5037
                           inst_disk.iv_name, node, msg)
5038
        if not ignore_secondaries:
5039
          disks_ok = False
5040

    
5041
  # FIXME: race condition on drbd migration to primary
5042

    
5043
  # 2nd pass, do only the primary node
5044
  for idx, inst_disk in enumerate(disks):
5045
    dev_path = None
5046

    
5047
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5048
      if node != instance.primary_node:
5049
        continue
5050
      if ignore_size:
5051
        node_disk = node_disk.Copy()
5052
        node_disk.UnsetSize()
5053
      lu.cfg.SetDiskID(node_disk, node)
5054
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5055
      msg = result.fail_msg
5056
      if msg:
5057
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5058
                           " (is_primary=True, pass=2): %s",
5059
                           inst_disk.iv_name, node, msg)
5060
        disks_ok = False
5061
      else:
5062
        dev_path = result.payload
5063

    
5064
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5065

    
5066
  # leave the disks configured for the primary node
5067
  # this is a workaround that would be fixed better by
5068
  # improving the logical/physical id handling
5069
  for disk in disks:
5070
    lu.cfg.SetDiskID(disk, instance.primary_node)
5071

    
5072
  return disks_ok, device_info
5073

    
5074

    
5075
def _StartInstanceDisks(lu, instance, force):
5076
  """Start the disks of an instance.
5077

5078
  """
5079
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5080
                                           ignore_secondaries=force)
5081
  if not disks_ok:
5082
    _ShutdownInstanceDisks(lu, instance)
5083
    if force is not None and not force:
5084
      lu.proc.LogWarning("", hint="If the message above refers to a"
5085
                         " secondary node,"
5086
                         " you can retry the operation using '--force'.")
5087
    raise errors.OpExecError("Disk consistency error")
5088

    
5089

    
5090
class LUInstanceDeactivateDisks(NoHooksLU):
5091
  """Shutdown an instance's disks.
5092

5093
  """
5094
  REQ_BGL = False
5095

    
5096
  def ExpandNames(self):
5097
    self._ExpandAndLockInstance()
5098
    self.needed_locks[locking.LEVEL_NODE] = []
5099
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5100

    
5101
  def DeclareLocks(self, level):
5102
    if level == locking.LEVEL_NODE:
5103
      self._LockInstancesNodes()
5104

    
5105
  def CheckPrereq(self):
5106
    """Check prerequisites.
5107

5108
    This checks that the instance is in the cluster.
5109

5110
    """
5111
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5112
    assert self.instance is not None, \
5113
      "Cannot retrieve locked instance %s" % self.op.instance_name
5114

    
5115
  def Exec(self, feedback_fn):
5116
    """Deactivate the disks
5117

5118
    """
5119
    instance = self.instance
5120
    if self.op.force:
5121
      _ShutdownInstanceDisks(self, instance)
5122
    else:
5123
      _SafeShutdownInstanceDisks(self, instance)
5124

    
5125

    
5126
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5127
  """Shutdown block devices of an instance.
5128

5129
  This function checks if an instance is running, before calling
5130
  _ShutdownInstanceDisks.
5131

5132
  """
5133
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5134
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5135

    
5136

    
5137
def _ExpandCheckDisks(instance, disks):
5138
  """Return the instance disks selected by the disks list
5139

5140
  @type disks: list of L{objects.Disk} or None
5141
  @param disks: selected disks
5142
  @rtype: list of L{objects.Disk}
5143
  @return: selected instance disks to act on
5144

5145
  """
5146
  if disks is None:
5147
    return instance.disks
5148
  else:
5149
    if not set(disks).issubset(instance.disks):
5150
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5151
                                   " target instance")
5152
    return disks
5153

    
5154

    
5155
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5156
  """Shutdown block devices of an instance.
5157

5158
  This does the shutdown on all nodes of the instance.
5159

5160
  If the ignore_primary is false, errors on the primary node are
5161
  ignored.
5162

5163
  """
5164
  all_result = True
5165
  disks = _ExpandCheckDisks(instance, disks)
5166

    
5167
  for disk in disks:
5168
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5169
      lu.cfg.SetDiskID(top_disk, node)
5170
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5171
      msg = result.fail_msg
5172
      if msg:
5173
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5174
                      disk.iv_name, node, msg)
5175
        if ((node == instance.primary_node and not ignore_primary) or
5176
            (node != instance.primary_node and not result.offline)):
5177
          all_result = False
5178
  return all_result
5179

    
5180

    
5181
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5182
  """Checks if a node has enough free memory.
5183

5184
  This function check if a given node has the needed amount of free
5185
  memory. In case the node has less memory or we cannot get the
5186
  information from the node, this function raise an OpPrereqError
5187
  exception.
5188

5189
  @type lu: C{LogicalUnit}
5190
  @param lu: a logical unit from which we get configuration data
5191
  @type node: C{str}
5192
  @param node: the node to check
5193
  @type reason: C{str}
5194
  @param reason: string to use in the error message
5195
  @type requested: C{int}
5196
  @param requested: the amount of memory in MiB to check for
5197
  @type hypervisor_name: C{str}
5198
  @param hypervisor_name: the hypervisor to ask for memory stats
5199
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5200
      we cannot check the node
5201

5202
  """
5203
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5204
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5205
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5206
  free_mem = nodeinfo[node].payload.get('memory_free', None)
5207
  if not isinstance(free_mem, int):
5208
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5209
                               " was '%s'" % (node, free_mem),
5210
                               errors.ECODE_ENVIRON)
5211
  if requested > free_mem:
5212
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5213
                               " needed %s MiB, available %s MiB" %
5214
                               (node, reason, requested, free_mem),
5215
                               errors.ECODE_NORES)
5216

    
5217

    
5218
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5219
  """Checks if nodes have enough free disk space in the all VGs.
5220

5221
  This function check if all given nodes have the needed amount of
5222
  free disk. In case any node has less disk or we cannot get the
5223
  information from the node, this function raise an OpPrereqError
5224
  exception.
5225

5226
  @type lu: C{LogicalUnit}
5227
  @param lu: a logical unit from which we get configuration data
5228
  @type nodenames: C{list}
5229
  @param nodenames: the list of node names to check
5230
  @type req_sizes: C{dict}
5231
  @param req_sizes: the hash of vg and corresponding amount of disk in
5232
      MiB to check for
5233
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5234
      or we cannot check the node
5235

5236
  """
5237
  for vg, req_size in req_sizes.items():
5238
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5239

    
5240

    
5241
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5242
  """Checks if nodes have enough free disk space in the specified VG.
5243

5244
  This function check if all given nodes have the needed amount of
5245
  free disk. In case any node has less disk or we cannot get the
5246
  information from the node, this function raise an OpPrereqError
5247
  exception.
5248

5249
  @type lu: C{LogicalUnit}
5250
  @param lu: a logical unit from which we get configuration data
5251
  @type nodenames: C{list}
5252
  @param nodenames: the list of node names to check
5253
  @type vg: C{str}
5254
  @param vg: the volume group to check
5255
  @type requested: C{int}
5256
  @param requested: the amount of disk in MiB to check for
5257
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5258
      or we cannot check the node
5259

5260
  """
5261
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5262
  for node in nodenames:
5263
    info = nodeinfo[node]
5264
    info.Raise("Cannot get current information from node %s" % node,
5265
               prereq=True, ecode=errors.ECODE_ENVIRON)
5266
    vg_free = info.payload.get("vg_free", None)
5267
    if not isinstance(vg_free, int):
5268
      raise errors.OpPrereqError("Can't compute free disk space on node"
5269
                                 " %s for vg %s, result was '%s'" %
5270
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5271
    if requested > vg_free:
5272
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5273
                                 " vg %s: required %d MiB, available %d MiB" %
5274
                                 (node, vg, requested, vg_free),
5275
                                 errors.ECODE_NORES)
5276

    
5277

    
5278
class LUInstanceStartup(LogicalUnit):
5279
  """Starts an instance.
5280

5281
  """
5282
  HPATH = "instance-start"
5283
  HTYPE = constants.HTYPE_INSTANCE
5284
  REQ_BGL = False
5285

    
5286
  def CheckArguments(self):
5287
    # extra beparams
5288
    if self.op.beparams:
5289
      # fill the beparams dict
5290
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5291

    
5292
  def ExpandNames(self):
5293
    self._ExpandAndLockInstance()
5294

    
5295
  def BuildHooksEnv(self):
5296
    """Build hooks env.
5297

5298
    This runs on master, primary and secondary nodes of the instance.
5299

5300
    """
5301
    env = {
5302
      "FORCE": self.op.force,
5303
      }
5304

    
5305
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5306

    
5307
    return env
5308

    
5309
  def BuildHooksNodes(self):
5310
    """Build hooks nodes.
5311

5312
    """
5313
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5314
    return (nl, nl)
5315

    
5316
  def CheckPrereq(self):
5317
    """Check prerequisites.
5318

5319
    This checks that the instance is in the cluster.
5320

5321
    """
5322
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5323
    assert self.instance is not None, \
5324
      "Cannot retrieve locked instance %s" % self.op.instance_name
5325

    
5326
    # extra hvparams
5327
    if self.op.hvparams:
5328
      # check hypervisor parameter syntax (locally)
5329
      cluster = self.cfg.GetClusterInfo()
5330
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5331
      filled_hvp = cluster.FillHV(instance)
5332
      filled_hvp.update(self.op.hvparams)
5333
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5334
      hv_type.CheckParameterSyntax(filled_hvp)
5335
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5336

    
5337
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5338

    
5339
    if self.primary_offline and self.op.ignore_offline_nodes:
5340
      self.proc.LogWarning("Ignoring offline primary node")
5341

    
5342
      if self.op.hvparams or self.op.beparams:
5343
        self.proc.LogWarning("Overridden parameters are ignored")
5344
    else:
5345
      _CheckNodeOnline(self, instance.primary_node)
5346

    
5347
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5348

    
5349
      # check bridges existence
5350
      _CheckInstanceBridgesExist(self, instance)
5351

    
5352
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5353
                                                instance.name,
5354
                                                instance.hypervisor)
5355
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5356
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5357
      if not remote_info.payload: # not running already
5358
        _CheckNodeFreeMemory(self, instance.primary_node,
5359
                             "starting instance %s" % instance.name,
5360
                             bep[constants.BE_MEMORY], instance.hypervisor)
5361

    
5362
  def Exec(self, feedback_fn):
5363
    """Start the instance.
5364

5365
    """
5366
    instance = self.instance
5367
    force = self.op.force
5368

    
5369
    self.cfg.MarkInstanceUp(instance.name)
5370

    
5371
    if self.primary_offline:
5372
      assert self.op.ignore_offline_nodes
5373
      self.proc.LogInfo("Primary node offline, marked instance as started")
5374
    else:
5375
      node_current = instance.primary_node
5376

    
5377
      _StartInstanceDisks(self, instance, force)
5378

    
5379
      result = self.rpc.call_instance_start(node_current, instance,
5380
                                            self.op.hvparams, self.op.beparams)
5381
      msg = result.fail_msg
5382
      if msg:
5383
        _ShutdownInstanceDisks(self, instance)
5384
        raise errors.OpExecError("Could not start instance: %s" % msg)
5385

    
5386

    
5387
class LUInstanceReboot(LogicalUnit):
5388
  """Reboot an instance.
5389

5390
  """
5391
  HPATH = "instance-reboot"
5392
  HTYPE = constants.HTYPE_INSTANCE
5393
  REQ_BGL = False
5394

    
5395
  def ExpandNames(self):
5396
    self._ExpandAndLockInstance()
5397

    
5398
  def BuildHooksEnv(self):
5399
    """Build hooks env.
5400

5401
    This runs on master, primary and secondary nodes of the instance.
5402

5403
    """
5404
    env = {
5405
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5406
      "REBOOT_TYPE": self.op.reboot_type,
5407
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5408
      }
5409

    
5410
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5411

    
5412
    return env
5413

    
5414
  def BuildHooksNodes(self):
5415
    """Build hooks nodes.
5416

5417
    """
5418
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5419
    return (nl, nl)
5420

    
5421
  def CheckPrereq(self):
5422
    """Check prerequisites.
5423

5424
    This checks that the instance is in the cluster.
5425

5426
    """
5427
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5428
    assert self.instance is not None, \
5429
      "Cannot retrieve locked instance %s" % self.op.instance_name
5430

    
5431
    _CheckNodeOnline(self, instance.primary_node)
5432

    
5433
    # check bridges existence
5434
    _CheckInstanceBridgesExist(self, instance)
5435

    
5436
  def Exec(self, feedback_fn):
5437
    """Reboot the instance.
5438

5439
    """
5440
    instance = self.instance
5441
    ignore_secondaries = self.op.ignore_secondaries
5442
    reboot_type = self.op.reboot_type
5443

    
5444
    remote_info = self.rpc.call_instance_info(instance.primary_node,
5445
                                              instance.name,
5446
                                              instance.hypervisor)
5447
    remote_info.Raise("Error checking node %s" % instance.primary_node)
5448
    instance_running = bool(remote_info.payload)
5449

    
5450
    node_current = instance.primary_node
5451

    
5452
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5453
                                            constants.INSTANCE_REBOOT_HARD]:
5454
      for disk in instance.disks:
5455
        self.cfg.SetDiskID(disk, node_current)
5456
      result = self.rpc.call_instance_reboot(node_current, instance,
5457
                                             reboot_type,
5458
                                             self.op.shutdown_timeout)
5459
      result.Raise("Could not reboot instance")
5460
    else:
5461
      if instance_running:
5462
        result = self.rpc.call_instance_shutdown(node_current, instance,
5463
                                                 self.op.shutdown_timeout)
5464
        result.Raise("Could not shutdown instance for full reboot")
5465
        _ShutdownInstanceDisks(self, instance)
5466
      else:
5467
        self.LogInfo("Instance %s was already stopped, starting now",
5468
                     instance.name)
5469
      _StartInstanceDisks(self, instance, ignore_secondaries)
5470
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5471
      msg = result.fail_msg
5472
      if msg:
5473
        _ShutdownInstanceDisks(self, instance)
5474
        raise errors.OpExecError("Could not start instance for"
5475
                                 " full reboot: %s" % msg)
5476

    
5477
    self.cfg.MarkInstanceUp(instance.name)
5478

    
5479

    
5480
class LUInstanceShutdown(LogicalUnit):
5481
  """Shutdown an instance.
5482

5483
  """
5484
  HPATH = "instance-stop"
5485
  HTYPE = constants.HTYPE_INSTANCE
5486
  REQ_BGL = False
5487

    
5488
  def ExpandNames(self):
5489
    self._ExpandAndLockInstance()
5490

    
5491
  def BuildHooksEnv(self):
5492
    """Build hooks env.
5493

5494
    This runs on master, primary and secondary nodes of the instance.
5495

5496
    """
5497
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5498
    env["TIMEOUT"] = self.op.timeout
5499
    return env
5500

    
5501
  def BuildHooksNodes(self):
5502
    """Build hooks nodes.
5503

5504
    """
5505
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5506
    return (nl, nl)
5507

    
5508
  def CheckPrereq(self):
5509
    """Check prerequisites.
5510

5511
    This checks that the instance is in the cluster.
5512

5513
    """
5514
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5515
    assert self.instance is not None, \
5516
      "Cannot retrieve locked instance %s" % self.op.instance_name
5517

    
5518
    self.primary_offline = \
5519
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5520

    
5521
    if self.primary_offline and self.op.ignore_offline_nodes:
5522
      self.proc.LogWarning("Ignoring offline primary node")
5523
    else:
5524
      _CheckNodeOnline(self, self.instance.primary_node)
5525

    
5526
  def Exec(self, feedback_fn):
5527
    """Shutdown the instance.
5528

5529
    """
5530
    instance = self.instance
5531
    node_current = instance.primary_node
5532
    timeout = self.op.timeout
5533

    
5534
    self.cfg.MarkInstanceDown(instance.name)
5535

    
5536
    if self.primary_offline:
5537
      assert self.op.ignore_offline_nodes
5538
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5539
    else:
5540
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5541
      msg = result.fail_msg
5542
      if msg:
5543
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5544

    
5545
      _ShutdownInstanceDisks(self, instance)
5546

    
5547

    
5548
class LUInstanceReinstall(LogicalUnit):
5549
  """Reinstall an instance.
5550

5551
  """
5552
  HPATH = "instance-reinstall"
5553
  HTYPE = constants.HTYPE_INSTANCE
5554
  REQ_BGL = False
5555

    
5556
  def ExpandNames(self):
5557
    self._ExpandAndLockInstance()
5558

    
5559
  def BuildHooksEnv(self):
5560
    """Build hooks env.
5561

5562
    This runs on master, primary and secondary nodes of the instance.
5563

5564
    """
5565
    return _BuildInstanceHookEnvByObject(self, self.instance)
5566

    
5567
  def BuildHooksNodes(self):
5568
    """Build hooks nodes.
5569

5570
    """
5571
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5572
    return (nl, nl)
5573

    
5574
  def CheckPrereq(self):
5575
    """Check prerequisites.
5576

5577
    This checks that the instance is in the cluster and is not running.
5578

5579
    """
5580
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5581
    assert instance is not None, \
5582
      "Cannot retrieve locked instance %s" % self.op.instance_name
5583
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5584
                     " offline, cannot reinstall")
5585
    for node in instance.secondary_nodes:
5586
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5587
                       " cannot reinstall")
5588

    
5589
    if instance.disk_template == constants.DT_DISKLESS:
5590
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5591
                                 self.op.instance_name,
5592
                                 errors.ECODE_INVAL)
5593
    _CheckInstanceDown(self, instance, "cannot reinstall")
5594

    
5595
    if self.op.os_type is not None:
5596
      # OS verification
5597
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5598
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5599
      instance_os = self.op.os_type
5600
    else:
5601
      instance_os = instance.os
5602

    
5603
    nodelist = list(instance.all_nodes)
5604

    
5605
    if self.op.osparams:
5606
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5607
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5608
      self.os_inst = i_osdict # the new dict (without defaults)
5609
    else:
5610
      self.os_inst = None
5611

    
5612
    self.instance = instance
5613

    
5614
  def Exec(self, feedback_fn):
5615
    """Reinstall the instance.
5616

5617
    """
5618
    inst = self.instance
5619

    
5620
    if self.op.os_type is not None:
5621
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5622
      inst.os = self.op.os_type
5623
      # Write to configuration
5624
      self.cfg.Update(inst, feedback_fn)
5625

    
5626
    _StartInstanceDisks(self, inst, None)
5627
    try:
5628
      feedback_fn("Running the instance OS create scripts...")
5629
      # FIXME: pass debug option from opcode to backend
5630
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5631
                                             self.op.debug_level,
5632
                                             osparams=self.os_inst)
5633
      result.Raise("Could not install OS for instance %s on node %s" %
5634
                   (inst.name, inst.primary_node))
5635
    finally:
5636
      _ShutdownInstanceDisks(self, inst)
5637

    
5638

    
5639
class LUInstanceRecreateDisks(LogicalUnit):
5640
  """Recreate an instance's missing disks.
5641

5642
  """
5643
  HPATH = "instance-recreate-disks"
5644
  HTYPE = constants.HTYPE_INSTANCE
5645
  REQ_BGL = False
5646

    
5647
  def ExpandNames(self):
5648
    self._ExpandAndLockInstance()
5649

    
5650
  def BuildHooksEnv(self):
5651
    """Build hooks env.
5652

5653
    This runs on master, primary and secondary nodes of the instance.
5654

5655
    """
5656
    return _BuildInstanceHookEnvByObject(self, self.instance)
5657

    
5658
  def BuildHooksNodes(self):
5659
    """Build hooks nodes.
5660

5661
    """
5662
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5663
    return (nl, nl)
5664

    
5665
  def CheckPrereq(self):
5666
    """Check prerequisites.
5667

5668
    This checks that the instance is in the cluster and is not running.
5669

5670
    """
5671
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5672
    assert instance is not None, \
5673
      "Cannot retrieve locked instance %s" % self.op.instance_name
5674
    _CheckNodeOnline(self, instance.primary_node)
5675

    
5676
    if instance.disk_template == constants.DT_DISKLESS:
5677
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5678
                                 self.op.instance_name, errors.ECODE_INVAL)
5679
    _CheckInstanceDown(self, instance, "cannot recreate disks")
5680

    
5681
    if not self.op.disks:
5682
      self.op.disks = range(len(instance.disks))
5683
    else:
5684
      for idx in self.op.disks:
5685
        if idx >= len(instance.disks):
5686
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5687
                                     errors.ECODE_INVAL)
5688

    
5689
    self.instance = instance
5690

    
5691
  def Exec(self, feedback_fn):
5692
    """Recreate the disks.
5693

5694
    """
5695
    to_skip = []
5696
    for idx, _ in enumerate(self.instance.disks):
5697
      if idx not in self.op.disks: # disk idx has not been passed in
5698
        to_skip.append(idx)
5699
        continue
5700

    
5701
    _CreateDisks(self, self.instance, to_skip=to_skip)
5702

    
5703

    
5704
class LUInstanceRename(LogicalUnit):
5705
  """Rename an instance.
5706

5707
  """
5708
  HPATH = "instance-rename"
5709
  HTYPE = constants.HTYPE_INSTANCE
5710

    
5711
  def CheckArguments(self):
5712
    """Check arguments.
5713

5714
    """
5715
    if self.op.ip_check and not self.op.name_check:
5716
      # TODO: make the ip check more flexible and not depend on the name check
5717
      raise errors.OpPrereqError("Cannot do ip check without a name check",
5718
                                 errors.ECODE_INVAL)
5719

    
5720
  def BuildHooksEnv(self):
5721
    """Build hooks env.
5722

5723
    This runs on master, primary and secondary nodes of the instance.
5724

5725
    """
5726
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5727
    env["INSTANCE_NEW_NAME"] = self.op.new_name
5728
    return env
5729

    
5730
  def BuildHooksNodes(self):
5731
    """Build hooks nodes.
5732

5733
    """
5734
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5735
    return (nl, nl)
5736

    
5737
  def CheckPrereq(self):
5738
    """Check prerequisites.
5739

5740
    This checks that the instance is in the cluster and is not running.
5741

5742
    """
5743
    self.op.instance_name = _ExpandInstanceName(self.cfg,
5744
                                                self.op.instance_name)
5745
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5746
    assert instance is not None
5747
    _CheckNodeOnline(self, instance.primary_node)
5748
    _CheckInstanceDown(self, instance, "cannot rename")
5749
    self.instance = instance
5750

    
5751
    new_name = self.op.new_name
5752
    if self.op.name_check:
5753
      hostname = netutils.GetHostname(name=new_name)
5754
      self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5755
                   hostname.name)
5756
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5757
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5758
                                    " same as given hostname '%s'") %
5759
                                    (hostname.name, self.op.new_name),
5760
                                    errors.ECODE_INVAL)
5761
      new_name = self.op.new_name = hostname.name
5762
      if (self.op.ip_check and
5763
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5764
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5765
                                   (hostname.ip, new_name),
5766
                                   errors.ECODE_NOTUNIQUE)
5767

    
5768
    instance_list = self.cfg.GetInstanceList()
5769
    if new_name in instance_list and new_name != instance.name:
5770
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5771
                                 new_name, errors.ECODE_EXISTS)
5772

    
5773
  def Exec(self, feedback_fn):
5774
    """Rename the instance.
5775

5776
    """
5777
    inst = self.instance
5778
    old_name = inst.name
5779

    
5780
    rename_file_storage = False
5781
    if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5782
        self.op.new_name != inst.name):
5783
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5784
      rename_file_storage = True
5785

    
5786
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5787
    # Change the instance lock. This is definitely safe while we hold the BGL
5788
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5789
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5790

    
5791
    # re-read the instance from the configuration after rename
5792
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5793

    
5794
    if rename_file_storage:
5795
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5796
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5797
                                                     old_file_storage_dir,
5798
                                                     new_file_storage_dir)
5799
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5800
                   " (but the instance has been renamed in Ganeti)" %
5801
                   (inst.primary_node, old_file_storage_dir,
5802
                    new_file_storage_dir))
5803

    
5804
    _StartInstanceDisks(self, inst, None)
5805
    try:
5806
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5807
                                                 old_name, self.op.debug_level)
5808
      msg = result.fail_msg
5809
      if msg:
5810
        msg = ("Could not run OS rename script for instance %s on node %s"
5811
               " (but the instance has been renamed in Ganeti): %s" %
5812
               (inst.name, inst.primary_node, msg))
5813
        self.proc.LogWarning(msg)
5814
    finally:
5815
      _ShutdownInstanceDisks(self, inst)
5816

    
5817
    return inst.name
5818

    
5819

    
5820
class LUInstanceRemove(LogicalUnit):
5821
  """Remove an instance.
5822

5823
  """
5824
  HPATH = "instance-remove"
5825
  HTYPE = constants.HTYPE_INSTANCE
5826
  REQ_BGL = False
5827

    
5828
  def ExpandNames(self):
5829
    self._ExpandAndLockInstance()
5830
    self.needed_locks[locking.LEVEL_NODE] = []
5831
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5832

    
5833
  def DeclareLocks(self, level):
5834
    if level == locking.LEVEL_NODE:
5835
      self._LockInstancesNodes()
5836

    
5837
  def BuildHooksEnv(self):
5838
    """Build hooks env.
5839

5840
    This runs on master, primary and secondary nodes of the instance.
5841

5842
    """
5843
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5844
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5845
    return env
5846

    
5847
  def BuildHooksNodes(self):
5848
    """Build hooks nodes.
5849

5850
    """
5851
    nl = [self.cfg.GetMasterNode()]
5852
    nl_post = list(self.instance.all_nodes) + nl
5853
    return (nl, nl_post)
5854

    
5855
  def CheckPrereq(self):
5856
    """Check prerequisites.
5857

5858
    This checks that the instance is in the cluster.
5859

5860
    """
5861
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5862
    assert self.instance is not None, \
5863
      "Cannot retrieve locked instance %s" % self.op.instance_name
5864

    
5865
  def Exec(self, feedback_fn):
5866
    """Remove the instance.
5867

5868
    """
5869
    instance = self.instance
5870
    logging.info("Shutting down instance %s on node %s",
5871
                 instance.name, instance.primary_node)
5872

    
5873
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5874
                                             self.op.shutdown_timeout)
5875
    msg = result.fail_msg
5876
    if msg:
5877
      if self.op.ignore_failures:
5878
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5879
      else:
5880
        raise errors.OpExecError("Could not shutdown instance %s on"
5881
                                 " node %s: %s" %
5882
                                 (instance.name, instance.primary_node, msg))
5883

    
5884
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5885

    
5886

    
5887
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5888
  """Utility function to remove an instance.
5889

5890
  """
5891
  logging.info("Removing block devices for instance %s", instance.name)
5892

    
5893
  if not _RemoveDisks(lu, instance):
5894
    if not ignore_failures:
5895
      raise errors.OpExecError("Can't remove instance's disks")
5896
    feedback_fn("Warning: can't remove instance's disks")
5897

    
5898
  logging.info("Removing instance %s out of cluster config", instance.name)
5899

    
5900
  lu.cfg.RemoveInstance(instance.name)
5901

    
5902
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5903
    "Instance lock removal conflict"
5904

    
5905
  # Remove lock for the instance
5906
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5907

    
5908

    
5909
class LUInstanceQuery(NoHooksLU):
5910
  """Logical unit for querying instances.
5911

5912
  """
5913
  # pylint: disable-msg=W0142
5914
  REQ_BGL = False
5915

    
5916
  def CheckArguments(self):
5917
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5918
                             self.op.output_fields, self.op.use_locking)
5919

    
5920
  def ExpandNames(self):
5921
    self.iq.ExpandNames(self)
5922

    
5923
  def DeclareLocks(self, level):
5924
    self.iq.DeclareLocks(self, level)
5925

    
5926
  def Exec(self, feedback_fn):
5927
    return self.iq.OldStyleQuery(self)
5928

    
5929

    
5930
class LUInstanceFailover(LogicalUnit):
5931
  """Failover an instance.
5932

5933
  """
5934
  HPATH = "instance-failover"
5935
  HTYPE = constants.HTYPE_INSTANCE
5936
  REQ_BGL = False
5937

    
5938
  def CheckArguments(self):
5939
    """Check the arguments.
5940

5941
    """
5942
    self.iallocator = getattr(self.op, "iallocator", None)
5943
    self.target_node = getattr(self.op, "target_node", None)
5944

    
5945
  def ExpandNames(self):
5946
    self._ExpandAndLockInstance()
5947

    
5948
    if self.op.target_node is not None:
5949
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5950

    
5951
    self.needed_locks[locking.LEVEL_NODE] = []
5952
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5953

    
5954
    ignore_consistency = self.op.ignore_consistency
5955
    shutdown_timeout = self.op.shutdown_timeout
5956
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5957
                                       cleanup=False,
5958
                                       iallocator=self.op.iallocator,
5959
                                       target_node=self.op.target_node,
5960
                                       failover=True,
5961
                                       ignore_consistency=ignore_consistency,
5962
                                       shutdown_timeout=shutdown_timeout)
5963
    self.tasklets = [self._migrater]
5964

    
5965
  def DeclareLocks(self, level):
5966
    if level == locking.LEVEL_NODE:
5967
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
5968
      if instance.disk_template in constants.DTS_EXT_MIRROR:
5969
        if self.op.target_node is None:
5970
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5971
        else:
5972
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
5973
                                                   self.op.target_node]
5974
        del self.recalculate_locks[locking.LEVEL_NODE]
5975
      else:
5976
        self._LockInstancesNodes()
5977

    
5978
  def BuildHooksEnv(self):
5979
    """Build hooks env.
5980

5981
    This runs on master, primary and secondary nodes of the instance.
5982

5983
    """
5984
    instance = self._migrater.instance
5985
    source_node = instance.primary_node
5986
    target_node = self._migrater.target_node
5987
    env = {
5988
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5989
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5990
      "OLD_PRIMARY": source_node,
5991
      "NEW_PRIMARY": target_node,
5992
      }
5993

    
5994
    if instance.disk_template in constants.DTS_INT_MIRROR:
5995
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
5996
      env["NEW_SECONDARY"] = source_node
5997
    else:
5998
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
5999

    
6000
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6001

    
6002
    return env
6003

    
6004
  def BuildHooksNodes(self):
6005
    """Build hooks nodes.
6006

6007
    """
6008
    instance = self._migrater.instance
6009
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6010
    return (nl, nl + [instance.primary_node])
6011

    
6012

    
6013
class LUInstanceMigrate(LogicalUnit):
6014
  """Migrate an instance.
6015

6016
  This is migration without shutting down, compared to the failover,
6017
  which is done with shutdown.
6018

6019
  """
6020
  HPATH = "instance-migrate"
6021
  HTYPE = constants.HTYPE_INSTANCE
6022
  REQ_BGL = False
6023

    
6024
  def ExpandNames(self):
6025
    self._ExpandAndLockInstance()
6026

    
6027
    if self.op.target_node is not None:
6028
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6029

    
6030
    self.needed_locks[locking.LEVEL_NODE] = []
6031
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6032

    
6033
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6034
                                       cleanup=self.op.cleanup,
6035
                                       iallocator=self.op.iallocator,
6036
                                       target_node=self.op.target_node,
6037
                                       failover=False,
6038
                                       fallback=self.op.allow_failover)
6039
    self.tasklets = [self._migrater]
6040

    
6041
  def DeclareLocks(self, level):
6042
    if level == locking.LEVEL_NODE:
6043
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6044
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6045
        if self.op.target_node is None:
6046
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6047
        else:
6048
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6049
                                                   self.op.target_node]
6050
        del self.recalculate_locks[locking.LEVEL_NODE]
6051
      else:
6052
        self._LockInstancesNodes()
6053

    
6054
  def BuildHooksEnv(self):
6055
    """Build hooks env.
6056

6057
    This runs on master, primary and secondary nodes of the instance.
6058

6059
    """
6060
    instance = self._migrater.instance
6061
    source_node = instance.primary_node
6062
    target_node = self._migrater.target_node
6063
    env = _BuildInstanceHookEnvByObject(self, instance)
6064
    env.update({
6065
      "MIGRATE_LIVE": self._migrater.live,
6066
      "MIGRATE_CLEANUP": self.op.cleanup,
6067
      "OLD_PRIMARY": source_node,
6068
      "NEW_PRIMARY": target_node,
6069
      })
6070

    
6071
    if instance.disk_template in constants.DTS_INT_MIRROR:
6072
      env["OLD_SECONDARY"] = target_node
6073
      env["NEW_SECONDARY"] = source_node
6074
    else:
6075
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6076

    
6077
    return env
6078

    
6079
  def BuildHooksNodes(self):
6080
    """Build hooks nodes.
6081

6082
    """
6083
    instance = self._migrater.instance
6084
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6085
    return (nl, nl + [instance.primary_node])
6086

    
6087

    
6088
class LUInstanceMove(LogicalUnit):
6089
  """Move an instance by data-copying.
6090

6091
  """
6092
  HPATH = "instance-move"
6093
  HTYPE = constants.HTYPE_INSTANCE
6094
  REQ_BGL = False
6095

    
6096
  def ExpandNames(self):
6097
    self._ExpandAndLockInstance()
6098
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6099
    self.op.target_node = target_node
6100
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6101
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6102

    
6103
  def DeclareLocks(self, level):
6104
    if level == locking.LEVEL_NODE:
6105
      self._LockInstancesNodes(primary_only=True)
6106

    
6107
  def BuildHooksEnv(self):
6108
    """Build hooks env.
6109

6110
    This runs on master, primary and secondary nodes of the instance.
6111

6112
    """
6113
    env = {
6114
      "TARGET_NODE": self.op.target_node,
6115
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6116
      }
6117
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6118
    return env
6119

    
6120
  def BuildHooksNodes(self):
6121
    """Build hooks nodes.
6122

6123
    """
6124
    nl = [
6125
      self.cfg.GetMasterNode(),
6126
      self.instance.primary_node,
6127
      self.op.target_node,
6128
      ]
6129
    return (nl, nl)
6130

    
6131
  def CheckPrereq(self):
6132
    """Check prerequisites.
6133

6134
    This checks that the instance is in the cluster.
6135

6136
    """
6137
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6138
    assert self.instance is not None, \
6139
      "Cannot retrieve locked instance %s" % self.op.instance_name
6140

    
6141
    node = self.cfg.GetNodeInfo(self.op.target_node)
6142
    assert node is not None, \
6143
      "Cannot retrieve locked node %s" % self.op.target_node
6144

    
6145
    self.target_node = target_node = node.name
6146

    
6147
    if target_node == instance.primary_node:
6148
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6149
                                 (instance.name, target_node),
6150
                                 errors.ECODE_STATE)
6151

    
6152
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6153

    
6154
    for idx, dsk in enumerate(instance.disks):
6155
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6156
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6157
                                   " cannot copy" % idx, errors.ECODE_STATE)
6158

    
6159
    _CheckNodeOnline(self, target_node)
6160
    _CheckNodeNotDrained(self, target_node)
6161
    _CheckNodeVmCapable(self, target_node)
6162

    
6163
    if instance.admin_up:
6164
      # check memory requirements on the secondary node
6165
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6166
                           instance.name, bep[constants.BE_MEMORY],
6167
                           instance.hypervisor)
6168
    else:
6169
      self.LogInfo("Not checking memory on the secondary node as"
6170
                   " instance will not be started")
6171

    
6172
    # check bridge existance
6173
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6174

    
6175
  def Exec(self, feedback_fn):
6176
    """Move an instance.
6177

6178
    The move is done by shutting it down on its present node, copying
6179
    the data over (slow) and starting it on the new node.
6180

6181
    """
6182
    instance = self.instance
6183

    
6184
    source_node = instance.primary_node
6185
    target_node = self.target_node
6186

    
6187
    self.LogInfo("Shutting down instance %s on source node %s",
6188
                 instance.name, source_node)
6189

    
6190
    result = self.rpc.call_instance_shutdown(source_node, instance,
6191
                                             self.op.shutdown_timeout)
6192
    msg = result.fail_msg
6193
    if msg:
6194
      if self.op.ignore_consistency:
6195
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6196
                             " Proceeding anyway. Please make sure node"
6197
                             " %s is down. Error details: %s",
6198
                             instance.name, source_node, source_node, msg)
6199
      else:
6200
        raise errors.OpExecError("Could not shutdown instance %s on"
6201
                                 " node %s: %s" %
6202
                                 (instance.name, source_node, msg))
6203

    
6204
    # create the target disks
6205
    try:
6206
      _CreateDisks(self, instance, target_node=target_node)
6207
    except errors.OpExecError:
6208
      self.LogWarning("Device creation failed, reverting...")
6209
      try:
6210
        _RemoveDisks(self, instance, target_node=target_node)
6211
      finally:
6212
        self.cfg.ReleaseDRBDMinors(instance.name)
6213
        raise
6214

    
6215
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6216

    
6217
    errs = []
6218
    # activate, get path, copy the data over
6219
    for idx, disk in enumerate(instance.disks):
6220
      self.LogInfo("Copying data for disk %d", idx)
6221
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6222
                                               instance.name, True, idx)
6223
      if result.fail_msg:
6224
        self.LogWarning("Can't assemble newly created disk %d: %s",
6225
                        idx, result.fail_msg)
6226
        errs.append(result.fail_msg)
6227
        break
6228
      dev_path = result.payload
6229
      result = self.rpc.call_blockdev_export(source_node, disk,
6230
                                             target_node, dev_path,
6231
                                             cluster_name)
6232
      if result.fail_msg:
6233
        self.LogWarning("Can't copy data over for disk %d: %s",
6234
                        idx, result.fail_msg)
6235
        errs.append(result.fail_msg)
6236
        break
6237

    
6238
    if errs:
6239
      self.LogWarning("Some disks failed to copy, aborting")
6240
      try:
6241
        _RemoveDisks(self, instance, target_node=target_node)
6242
      finally:
6243
        self.cfg.ReleaseDRBDMinors(instance.name)
6244
        raise errors.OpExecError("Errors during disk copy: %s" %
6245
                                 (",".join(errs),))
6246

    
6247
    instance.primary_node = target_node
6248
    self.cfg.Update(instance, feedback_fn)
6249

    
6250
    self.LogInfo("Removing the disks on the original node")
6251
    _RemoveDisks(self, instance, target_node=source_node)
6252

    
6253
    # Only start the instance if it's marked as up
6254
    if instance.admin_up:
6255
      self.LogInfo("Starting instance %s on node %s",
6256
                   instance.name, target_node)
6257

    
6258
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6259
                                           ignore_secondaries=True)
6260
      if not disks_ok:
6261
        _ShutdownInstanceDisks(self, instance)
6262
        raise errors.OpExecError("Can't activate the instance's disks")
6263

    
6264
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6265
      msg = result.fail_msg
6266
      if msg:
6267
        _ShutdownInstanceDisks(self, instance)
6268
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6269
                                 (instance.name, target_node, msg))
6270

    
6271

    
6272
class LUNodeMigrate(LogicalUnit):
6273
  """Migrate all instances from a node.
6274

6275
  """
6276
  HPATH = "node-migrate"
6277
  HTYPE = constants.HTYPE_NODE
6278
  REQ_BGL = False
6279

    
6280
  def CheckArguments(self):
6281
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6282

    
6283
  def ExpandNames(self):
6284
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6285

    
6286
    self.needed_locks = {}
6287

    
6288
    # Create tasklets for migrating instances for all instances on this node
6289
    names = []
6290
    tasklets = []
6291

    
6292
    self.lock_all_nodes = False
6293

    
6294
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6295
      logging.debug("Migrating instance %s", inst.name)
6296
      names.append(inst.name)
6297

    
6298
      tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False,
6299
                                        iallocator=self.op.iallocator,
6300
                                        taget_node=None))
6301

    
6302
      if inst.disk_template in constants.DTS_EXT_MIRROR:
6303
        # We need to lock all nodes, as the iallocator will choose the
6304
        # destination nodes afterwards
6305
        self.lock_all_nodes = True
6306

    
6307
    self.tasklets = tasklets
6308

    
6309
    # Declare node locks
6310
    if self.lock_all_nodes:
6311
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6312
    else:
6313
      self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6314
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6315

    
6316
    # Declare instance locks
6317
    self.needed_locks[locking.LEVEL_INSTANCE] = names
6318

    
6319
  def DeclareLocks(self, level):
6320
    if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6321
      self._LockInstancesNodes()
6322

    
6323
  def BuildHooksEnv(self):
6324
    """Build hooks env.
6325

6326
    This runs on the master, the primary and all the secondaries.
6327

6328
    """
6329
    return {
6330
      "NODE_NAME": self.op.node_name,
6331
      }
6332

    
6333
  def BuildHooksNodes(self):
6334
    """Build hooks nodes.
6335

6336
    """
6337
    nl = [self.cfg.GetMasterNode()]
6338
    return (nl, nl)
6339

    
6340

    
6341
class TLMigrateInstance(Tasklet):
6342
  """Tasklet class for instance migration.
6343

6344
  @type live: boolean
6345
  @ivar live: whether the migration will be done live or non-live;
6346
      this variable is initalized only after CheckPrereq has run
6347
  @type cleanup: boolean
6348
  @ivar cleanup: Wheater we cleanup from a failed migration
6349
  @type iallocator: string
6350
  @ivar iallocator: The iallocator used to determine target_node
6351
  @type target_node: string
6352
  @ivar target_node: If given, the target_node to reallocate the instance to
6353
  @type failover: boolean
6354
  @ivar failover: Whether operation results in failover or migration
6355
  @type fallback: boolean
6356
  @ivar fallback: Whether fallback to failover is allowed if migration not
6357
                  possible
6358
  @type ignore_consistency: boolean
6359
  @ivar ignore_consistency: Wheter we should ignore consistency between source
6360
                            and target node
6361
  @type shutdown_timeout: int
6362
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
6363

6364
  """
6365
  def __init__(self, lu, instance_name, cleanup=False, iallocator=None,
6366
               target_node=None, failover=False, fallback=False,
6367
               ignore_consistency=False,
6368
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6369
    """Initializes this class.
6370

6371
    """
6372
    Tasklet.__init__(self, lu)
6373

    
6374
    # Parameters
6375
    self.instance_name = instance_name
6376
    self.cleanup = cleanup
6377
    self.live = False # will be overridden later
6378
    self.iallocator = iallocator
6379
    self.target_node = target_node
6380
    self.failover = failover
6381
    self.fallback = fallback
6382
    self.ignore_consistency = ignore_consistency
6383
    self.shutdown_timeout = shutdown_timeout
6384

    
6385
  def CheckPrereq(self):
6386
    """Check prerequisites.
6387

6388
    This checks that the instance is in the cluster.
6389

6390
    """
6391
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6392
    instance = self.cfg.GetInstanceInfo(instance_name)
6393
    assert instance is not None
6394
    self.instance = instance
6395

    
6396
    if (not self.cleanup and not instance.admin_up and not self.failover and
6397
        self.fallback):
6398
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6399
                      " to failover")
6400
      self.failover = True
6401

    
6402
    if instance.disk_template not in constants.DTS_MIRRORED:
6403
      if self.failover:
6404
        text = "failovers"
6405
      else:
6406
        text = "migrations"
6407
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6408
                                 " %s" % (instance.disk_template, text),
6409
                                 errors.ECODE_STATE)
6410

    
6411
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6412
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6413

    
6414
      if self.iallocator:
6415
        self._RunAllocator()
6416

    
6417
      # self.target_node is already populated, either directly or by the
6418
      # iallocator run
6419
      target_node = self.target_node
6420

    
6421
      if len(self.lu.tasklets) == 1:
6422
        # It is safe to remove locks only when we're the only tasklet in the LU
6423
        nodes_keep = [instance.primary_node, self.target_node]
6424
        nodes_rel = [node for node in self.lu.acquired_locks[locking.LEVEL_NODE]
6425
                     if node not in nodes_keep]
6426
        self.lu.context.glm.release(locking.LEVEL_NODE, nodes_rel)
6427
        self.lu.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6428

    
6429
    else:
6430
      secondary_nodes = instance.secondary_nodes
6431
      if not secondary_nodes:
6432
        raise errors.ConfigurationError("No secondary node but using"
6433
                                        " %s disk template" %
6434
                                        instance.disk_template)
6435
      target_node = secondary_nodes[0]
6436
      if self.iallocator or (self.target_node and
6437
                             self.target_node != target_node):
6438
        if self.failover:
6439
          text = "failed over"
6440
        else:
6441
          text = "migrated"
6442
        raise errors.OpPrereqError("Instances with disk template %s cannot"
6443
                                   " be %s over to arbitrary nodes"
6444
                                   " (neither an iallocator nor a target"
6445
                                   " node can be passed)" %
6446
                                   (text, instance.disk_template),
6447
                                   errors.ECODE_INVAL)
6448

    
6449
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6450

    
6451
    # check memory requirements on the secondary node
6452
    if not self.failover or instance.admin_up:
6453
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6454
                           instance.name, i_be[constants.BE_MEMORY],
6455
                           instance.hypervisor)
6456
    else:
6457
      self.lu.LogInfo("Not checking memory on the secondary node as"
6458
                      " instance will not be started")
6459

    
6460
    # check bridge existance
6461
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6462

    
6463
    if not self.cleanup:
6464
      _CheckNodeNotDrained(self.lu, target_node)
6465
      if not self.failover:
6466
        result = self.rpc.call_instance_migratable(instance.primary_node,
6467
                                                   instance)
6468
        if result.fail_msg and self.fallback:
6469
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6470
                          " failover")
6471
          self.failover = True
6472
        else:
6473
          result.Raise("Can't migrate, please use failover",
6474
                       prereq=True, ecode=errors.ECODE_STATE)
6475

    
6476
    assert not (self.failover and self.cleanup)
6477

    
6478
  def _RunAllocator(self):
6479
    """Run the allocator based on input opcode.
6480

6481
    """
6482
    ial = IAllocator(self.cfg, self.rpc,
6483
                     mode=constants.IALLOCATOR_MODE_RELOC,
6484
                     name=self.instance_name,
6485
                     # TODO See why hail breaks with a single node below
6486
                     relocate_from=[self.instance.primary_node,
6487
                                    self.instance.primary_node],
6488
                     )
6489

    
6490
    ial.Run(self.iallocator)
6491

    
6492
    if not ial.success:
6493
      raise errors.OpPrereqError("Can't compute nodes using"
6494
                                 " iallocator '%s': %s" %
6495
                                 (self.iallocator, ial.info),
6496
                                 errors.ECODE_NORES)
6497
    if len(ial.result) != ial.required_nodes:
6498
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6499
                                 " of nodes (%s), required %s" %
6500
                                 (self.iallocator, len(ial.result),
6501
                                  ial.required_nodes), errors.ECODE_FAULT)
6502
    self.target_node = ial.result[0]
6503
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6504
                 self.instance_name, self.iallocator,
6505
                 utils.CommaJoin(ial.result))
6506

    
6507
    if not self.failover:
6508
      if self.lu.op.live is not None and self.lu.op.mode is not None:
6509
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6510
                                   " parameters are accepted",
6511
                                   errors.ECODE_INVAL)
6512
      if self.lu.op.live is not None:
6513
        if self.lu.op.live:
6514
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
6515
        else:
6516
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6517
        # reset the 'live' parameter to None so that repeated
6518
        # invocations of CheckPrereq do not raise an exception
6519
        self.lu.op.live = None
6520
      elif self.lu.op.mode is None:
6521
        # read the default value from the hypervisor
6522
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6523
                                                skip_globals=False)
6524
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6525

    
6526
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6527
    else:
6528
      # Failover is never live
6529
      self.live = False
6530

    
6531
  def _WaitUntilSync(self):
6532
    """Poll with custom rpc for disk sync.
6533

6534
    This uses our own step-based rpc call.
6535

6536
    """
6537
    self.feedback_fn("* wait until resync is done")
6538
    all_done = False
6539
    while not all_done:
6540
      all_done = True
6541
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6542
                                            self.nodes_ip,
6543
                                            self.instance.disks)
6544
      min_percent = 100
6545
      for node, nres in result.items():
6546
        nres.Raise("Cannot resync disks on node %s" % node)
6547
        node_done, node_percent = nres.payload
6548
        all_done = all_done and node_done
6549
        if node_percent is not None:
6550
          min_percent = min(min_percent, node_percent)
6551
      if not all_done:
6552
        if min_percent < 100:
6553
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6554
        time.sleep(2)
6555

    
6556
  def _EnsureSecondary(self, node):
6557
    """Demote a node to secondary.
6558

6559
    """
6560
    self.feedback_fn("* switching node %s to secondary mode" % node)
6561

    
6562
    for dev in self.instance.disks:
6563
      self.cfg.SetDiskID(dev, node)
6564

    
6565
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6566
                                          self.instance.disks)
6567
    result.Raise("Cannot change disk to secondary on node %s" % node)
6568

    
6569
  def _GoStandalone(self):
6570
    """Disconnect from the network.
6571

6572
    """
6573
    self.feedback_fn("* changing into standalone mode")
6574
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6575
                                               self.instance.disks)
6576
    for node, nres in result.items():
6577
      nres.Raise("Cannot disconnect disks node %s" % node)
6578

    
6579
  def _GoReconnect(self, multimaster):
6580
    """Reconnect to the network.
6581

6582
    """
6583
    if multimaster:
6584
      msg = "dual-master"
6585
    else:
6586
      msg = "single-master"
6587
    self.feedback_fn("* changing disks into %s mode" % msg)
6588
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6589
                                           self.instance.disks,
6590
                                           self.instance.name, multimaster)
6591
    for node, nres in result.items():
6592
      nres.Raise("Cannot change disks config on node %s" % node)
6593

    
6594
  def _ExecCleanup(self):
6595
    """Try to cleanup after a failed migration.
6596

6597
    The cleanup is done by:
6598
      - check that the instance is running only on one node
6599
        (and update the config if needed)
6600
      - change disks on its secondary node to secondary
6601
      - wait until disks are fully synchronized
6602
      - disconnect from the network
6603
      - change disks into single-master mode
6604
      - wait again until disks are fully synchronized
6605

6606
    """
6607
    instance = self.instance
6608
    target_node = self.target_node
6609
    source_node = self.source_node
6610

    
6611
    # check running on only one node
6612
    self.feedback_fn("* checking where the instance actually runs"
6613
                     " (if this hangs, the hypervisor might be in"
6614
                     " a bad state)")
6615
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6616
    for node, result in ins_l.items():
6617
      result.Raise("Can't contact node %s" % node)
6618

    
6619
    runningon_source = instance.name in ins_l[source_node].payload
6620
    runningon_target = instance.name in ins_l[target_node].payload
6621

    
6622
    if runningon_source and runningon_target:
6623
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6624
                               " or the hypervisor is confused. You will have"
6625
                               " to ensure manually that it runs only on one"
6626
                               " and restart this operation.")
6627

    
6628
    if not (runningon_source or runningon_target):
6629
      raise errors.OpExecError("Instance does not seem to be running at all."
6630
                               " In this case, it's safer to repair by"
6631
                               " running 'gnt-instance stop' to ensure disk"
6632
                               " shutdown, and then restarting it.")
6633

    
6634
    if runningon_target:
6635
      # the migration has actually succeeded, we need to update the config
6636
      self.feedback_fn("* instance running on secondary node (%s),"
6637
                       " updating config" % target_node)
6638
      instance.primary_node = target_node
6639
      self.cfg.Update(instance, self.feedback_fn)
6640
      demoted_node = source_node
6641
    else:
6642
      self.feedback_fn("* instance confirmed to be running on its"
6643
                       " primary node (%s)" % source_node)
6644
      demoted_node = target_node
6645

    
6646
    if instance.disk_template in constants.DTS_INT_MIRROR:
6647
      self._EnsureSecondary(demoted_node)
6648
      try:
6649
        self._WaitUntilSync()
6650
      except errors.OpExecError:
6651
        # we ignore here errors, since if the device is standalone, it
6652
        # won't be able to sync
6653
        pass
6654
      self._GoStandalone()
6655
      self._GoReconnect(False)
6656
      self._WaitUntilSync()
6657

    
6658
    self.feedback_fn("* done")
6659

    
6660
  def _RevertDiskStatus(self):
6661
    """Try to revert the disk status after a failed migration.
6662

6663
    """
6664
    target_node = self.target_node
6665
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6666
      return
6667

    
6668
    try:
6669
      self._EnsureSecondary(target_node)
6670
      self._GoStandalone()
6671
      self._GoReconnect(False)
6672
      self._WaitUntilSync()
6673
    except errors.OpExecError, err:
6674
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6675
                         " drives: error '%s'\n"
6676
                         "Please look and recover the instance status" %
6677
                         str(err))
6678

    
6679
  def _AbortMigration(self):
6680
    """Call the hypervisor code to abort a started migration.
6681

6682
    """
6683
    instance = self.instance
6684
    target_node = self.target_node
6685
    migration_info = self.migration_info
6686

    
6687
    abort_result = self.rpc.call_finalize_migration(target_node,
6688
                                                    instance,
6689
                                                    migration_info,
6690
                                                    False)
6691
    abort_msg = abort_result.fail_msg
6692
    if abort_msg:
6693
      logging.error("Aborting migration failed on target node %s: %s",
6694
                    target_node, abort_msg)
6695
      # Don't raise an exception here, as we stil have to try to revert the
6696
      # disk status, even if this step failed.
6697

    
6698
  def _ExecMigration(self):
6699
    """Migrate an instance.
6700

6701
    The migrate is done by:
6702
      - change the disks into dual-master mode
6703
      - wait until disks are fully synchronized again
6704
      - migrate the instance
6705
      - change disks on the new secondary node (the old primary) to secondary
6706
      - wait until disks are fully synchronized
6707
      - change disks into single-master mode
6708

6709
    """
6710
    instance = self.instance
6711
    target_node = self.target_node
6712
    source_node = self.source_node
6713

    
6714
    self.feedback_fn("* checking disk consistency between source and target")
6715
    for dev in instance.disks:
6716
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6717
        raise errors.OpExecError("Disk %s is degraded or not fully"
6718
                                 " synchronized on target node,"
6719
                                 " aborting migrate." % dev.iv_name)
6720

    
6721
    # First get the migration information from the remote node
6722
    result = self.rpc.call_migration_info(source_node, instance)
6723
    msg = result.fail_msg
6724
    if msg:
6725
      log_err = ("Failed fetching source migration information from %s: %s" %
6726
                 (source_node, msg))
6727
      logging.error(log_err)
6728
      raise errors.OpExecError(log_err)
6729

    
6730
    self.migration_info = migration_info = result.payload
6731

    
6732
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6733
      # Then switch the disks to master/master mode
6734
      self._EnsureSecondary(target_node)
6735
      self._GoStandalone()
6736
      self._GoReconnect(True)
6737
      self._WaitUntilSync()
6738

    
6739
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6740
    result = self.rpc.call_accept_instance(target_node,
6741
                                           instance,
6742
                                           migration_info,
6743
                                           self.nodes_ip[target_node])
6744

    
6745
    msg = result.fail_msg
6746
    if msg:
6747
      logging.error("Instance pre-migration failed, trying to revert"
6748
                    " disk status: %s", msg)
6749
      self.feedback_fn("Pre-migration failed, aborting")
6750
      self._AbortMigration()
6751
      self._RevertDiskStatus()
6752
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6753
                               (instance.name, msg))
6754

    
6755
    self.feedback_fn("* migrating instance to %s" % target_node)
6756
    time.sleep(10)
6757
    result = self.rpc.call_instance_migrate(source_node, instance,
6758
                                            self.nodes_ip[target_node],
6759
                                            self.live)
6760
    msg = result.fail_msg
6761
    if msg:
6762
      logging.error("Instance migration failed, trying to revert"
6763
                    " disk status: %s", msg)
6764
      self.feedback_fn("Migration failed, aborting")
6765
      self._AbortMigration()
6766
      self._RevertDiskStatus()
6767
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6768
                               (instance.name, msg))
6769
    time.sleep(10)
6770

    
6771
    instance.primary_node = target_node
6772
    # distribute new instance config to the other nodes
6773
    self.cfg.Update(instance, self.feedback_fn)
6774

    
6775
    result = self.rpc.call_finalize_migration(target_node,
6776
                                              instance,
6777
                                              migration_info,
6778
                                              True)
6779
    msg = result.fail_msg
6780
    if msg:
6781
      logging.error("Instance migration succeeded, but finalization failed:"
6782
                    " %s", msg)
6783
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6784
                               msg)
6785

    
6786
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6787
      self._EnsureSecondary(source_node)
6788
      self._WaitUntilSync()
6789
      self._GoStandalone()
6790
      self._GoReconnect(False)
6791
      self._WaitUntilSync()
6792

    
6793
    self.feedback_fn("* done")
6794

    
6795
  def _ExecFailover(self):
6796
    """Failover an instance.
6797

6798
    The failover is done by shutting it down on its present node and
6799
    starting it on the secondary.
6800

6801
    """
6802
    instance = self.instance
6803
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
6804

    
6805
    source_node = instance.primary_node
6806
    target_node = self.target_node
6807

    
6808
    if instance.admin_up:
6809
      self.feedback_fn("* checking disk consistency between source and target")
6810
      for dev in instance.disks:
6811
        # for drbd, these are drbd over lvm
6812
        if not _CheckDiskConsistency(self, dev, target_node, False):
6813
          if not self.ignore_consistency:
6814
            raise errors.OpExecError("Disk %s is degraded on target node,"
6815
                                     " aborting failover." % dev.iv_name)
6816
    else:
6817
      self.feedback_fn("* not checking disk consistency as instance is not"
6818
                       " running")
6819

    
6820
    self.feedback_fn("* shutting down instance on source node")
6821
    logging.info("Shutting down instance %s on node %s",
6822
                 instance.name, source_node)
6823

    
6824
    result = self.rpc.call_instance_shutdown(source_node, instance,
6825
                                             self.shutdown_timeout)
6826
    msg = result.fail_msg
6827
    if msg:
6828
      if self.ignore_consistency or primary_node.offline:
6829
        self.lu.LogWarning("Could not shutdown instance %s on node %s."
6830
                           " Proceeding anyway. Please make sure node"
6831
                           " %s is down. Error details: %s",
6832
                           instance.name, source_node, source_node, msg)
6833
      else:
6834
        raise errors.OpExecError("Could not shutdown instance %s on"
6835
                                 " node %s: %s" %
6836
                                 (instance.name, source_node, msg))
6837

    
6838
    self.feedback_fn("* deactivating the instance's disks on source node")
6839
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
6840
      raise errors.OpExecError("Can't shut down the instance's disks.")
6841

    
6842
    instance.primary_node = target_node
6843
    # distribute new instance config to the other nodes
6844
    self.cfg.Update(instance, self.feedback_fn)
6845

    
6846
    # Only start the instance if it's marked as up
6847
    if instance.admin_up:
6848
      self.feedback_fn("* activating the instance's disks on target node")
6849
      logging.info("Starting instance %s on node %s",
6850
                   instance.name, target_node)
6851

    
6852
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6853
                                           ignore_secondaries=True)
6854
      if not disks_ok:
6855
        _ShutdownInstanceDisks(self, instance)
6856
        raise errors.OpExecError("Can't activate the instance's disks")
6857

    
6858
      self.feedback_fn("* starting the instance on the target node")
6859
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6860
      msg = result.fail_msg
6861
      if msg:
6862
        _ShutdownInstanceDisks(self, instance)
6863
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6864
                                 (instance.name, target_node, msg))
6865

    
6866
  def Exec(self, feedback_fn):
6867
    """Perform the migration.
6868

6869
    """
6870
    self.feedback_fn = feedback_fn
6871
    self.source_node = self.instance.primary_node
6872

    
6873
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6874
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
6875
      self.target_node = self.instance.secondary_nodes[0]
6876
      # Otherwise self.target_node has been populated either
6877
      # directly, or through an iallocator.
6878

    
6879
    self.all_nodes = [self.source_node, self.target_node]
6880
    self.nodes_ip = {
6881
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6882
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6883
      }
6884

    
6885
    if self.failover:
6886
      feedback_fn("Failover instance %s" % self.instance.name)
6887
      self._ExecFailover()
6888
    else:
6889
      feedback_fn("Migrating instance %s" % self.instance.name)
6890

    
6891
      if self.cleanup:
6892
        return self._ExecCleanup()
6893
      else:
6894
        return self._ExecMigration()
6895

    
6896

    
6897
def _CreateBlockDev(lu, node, instance, device, force_create,
6898
                    info, force_open):
6899
  """Create a tree of block devices on a given node.
6900

6901
  If this device type has to be created on secondaries, create it and
6902
  all its children.
6903

6904
  If not, just recurse to children keeping the same 'force' value.
6905

6906
  @param lu: the lu on whose behalf we execute
6907
  @param node: the node on which to create the device
6908
  @type instance: L{objects.Instance}
6909
  @param instance: the instance which owns the device
6910
  @type device: L{objects.Disk}
6911
  @param device: the device to create
6912
  @type force_create: boolean
6913
  @param force_create: whether to force creation of this device; this
6914
      will be change to True whenever we find a device which has
6915
      CreateOnSecondary() attribute
6916
  @param info: the extra 'metadata' we should attach to the device
6917
      (this will be represented as a LVM tag)
6918
  @type force_open: boolean
6919
  @param force_open: this parameter will be passes to the
6920
      L{backend.BlockdevCreate} function where it specifies
6921
      whether we run on primary or not, and it affects both
6922
      the child assembly and the device own Open() execution
6923

6924
  """
6925
  if device.CreateOnSecondary():
6926
    force_create = True
6927

    
6928
  if device.children:
6929
    for child in device.children:
6930
      _CreateBlockDev(lu, node, instance, child, force_create,
6931
                      info, force_open)
6932

    
6933
  if not force_create:
6934
    return
6935

    
6936
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6937

    
6938

    
6939
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6940
  """Create a single block device on a given node.
6941

6942
  This will not recurse over children of the device, so they must be
6943
  created in advance.
6944

6945
  @param lu: the lu on whose behalf we execute
6946
  @param node: the node on which to create the device
6947
  @type instance: L{objects.Instance}
6948
  @param instance: the instance which owns the device
6949
  @type device: L{objects.Disk}
6950
  @param device: the device to create
6951
  @param info: the extra 'metadata' we should attach to the device
6952
      (this will be represented as a LVM tag)
6953
  @type force_open: boolean
6954
  @param force_open: this parameter will be passes to the
6955
      L{backend.BlockdevCreate} function where it specifies
6956
      whether we run on primary or not, and it affects both
6957
      the child assembly and the device own Open() execution
6958

6959
  """
6960
  lu.cfg.SetDiskID(device, node)
6961
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6962
                                       instance.name, force_open, info)
6963
  result.Raise("Can't create block device %s on"
6964
               " node %s for instance %s" % (device, node, instance.name))
6965
  if device.physical_id is None:
6966
    device.physical_id = result.payload
6967

    
6968

    
6969
def _GenerateUniqueNames(lu, exts):
6970
  """Generate a suitable LV name.
6971

6972
  This will generate a logical volume name for the given instance.
6973

6974
  """
6975
  results = []
6976
  for val in exts:
6977
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6978
    results.append("%s%s" % (new_id, val))
6979
  return results
6980

    
6981

    
6982
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6983
                         p_minor, s_minor):
6984
  """Generate a drbd8 device complete with its children.
6985

6986
  """
6987
  port = lu.cfg.AllocatePort()
6988
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6989
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6990
                          logical_id=(vgname, names[0]))
6991
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6992
                          logical_id=(vgname, names[1]))
6993
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6994
                          logical_id=(primary, secondary, port,
6995
                                      p_minor, s_minor,
6996
                                      shared_secret),
6997
                          children=[dev_data, dev_meta],
6998
                          iv_name=iv_name)
6999
  return drbd_dev
7000

    
7001

    
7002
def _GenerateDiskTemplate(lu, template_name,
7003
                          instance_name, primary_node,
7004
                          secondary_nodes, disk_info,
7005
                          file_storage_dir, file_driver,
7006
                          base_index, feedback_fn):
7007
  """Generate the entire disk layout for a given template type.
7008

7009
  """
7010
  #TODO: compute space requirements
7011

    
7012
  vgname = lu.cfg.GetVGName()
7013
  disk_count = len(disk_info)
7014
  disks = []
7015
  if template_name == constants.DT_DISKLESS:
7016
    pass
7017
  elif template_name == constants.DT_PLAIN:
7018
    if len(secondary_nodes) != 0:
7019
      raise errors.ProgrammerError("Wrong template configuration")
7020

    
7021
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7022
                                      for i in range(disk_count)])
7023
    for idx, disk in enumerate(disk_info):
7024
      disk_index = idx + base_index
7025
      vg = disk.get(constants.IDISK_VG, vgname)
7026
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7027
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7028
                              size=disk[constants.IDISK_SIZE],
7029
                              logical_id=(vg, names[idx]),
7030
                              iv_name="disk/%d" % disk_index,
7031
                              mode=disk[constants.IDISK_MODE])
7032
      disks.append(disk_dev)
7033
  elif template_name == constants.DT_DRBD8:
7034
    if len(secondary_nodes) != 1:
7035
      raise errors.ProgrammerError("Wrong template configuration")
7036
    remote_node = secondary_nodes[0]
7037
    minors = lu.cfg.AllocateDRBDMinor(
7038
      [primary_node, remote_node] * len(disk_info), instance_name)
7039

    
7040
    names = []
7041
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7042
                                               for i in range(disk_count)]):
7043
      names.append(lv_prefix + "_data")
7044
      names.append(lv_prefix + "_meta")
7045
    for idx, disk in enumerate(disk_info):
7046
      disk_index = idx + base_index
7047
      vg = disk.get(constants.IDISK_VG, vgname)
7048
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7049
                                      disk[constants.IDISK_SIZE], vg,
7050
                                      names[idx * 2:idx * 2 + 2],
7051
                                      "disk/%d" % disk_index,
7052
                                      minors[idx * 2], minors[idx * 2 + 1])
7053
      disk_dev.mode = disk[constants.IDISK_MODE]
7054
      disks.append(disk_dev)
7055
  elif template_name == constants.DT_FILE:
7056
    if len(secondary_nodes) != 0:
7057
      raise errors.ProgrammerError("Wrong template configuration")
7058

    
7059
    opcodes.RequireFileStorage()
7060

    
7061
    for idx, disk in enumerate(disk_info):
7062
      disk_index = idx + base_index
7063
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7064
                              size=disk[constants.IDISK_SIZE],
7065
                              iv_name="disk/%d" % disk_index,
7066
                              logical_id=(file_driver,
7067
                                          "%s/disk%d" % (file_storage_dir,
7068
                                                         disk_index)),
7069
                              mode=disk[constants.IDISK_MODE])
7070
      disks.append(disk_dev)
7071
  elif template_name == constants.DT_SHARED_FILE:
7072
    if len(secondary_nodes) != 0:
7073
      raise errors.ProgrammerError("Wrong template configuration")
7074

    
7075
    opcodes.RequireSharedFileStorage()
7076

    
7077
    for idx, disk in enumerate(disk_info):
7078
      disk_index = idx + base_index
7079
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7080
                              size=disk[constants.IDISK_SIZE],
7081
                              iv_name="disk/%d" % disk_index,
7082
                              logical_id=(file_driver,
7083
                                          "%s/disk%d" % (file_storage_dir,
7084
                                                         disk_index)),
7085
                              mode=disk[constants.IDISK_MODE])
7086
      disks.append(disk_dev)
7087
  elif template_name == constants.DT_BLOCK:
7088
    if len(secondary_nodes) != 0:
7089
      raise errors.ProgrammerError("Wrong template configuration")
7090

    
7091
    for idx, disk in enumerate(disk_info):
7092
      disk_index = idx + base_index
7093
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7094
                              size=disk[constants.IDISK_SIZE],
7095
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7096
                                          disk[constants.IDISK_ADOPT]),
7097
                              iv_name="disk/%d" % disk_index,
7098
                              mode=disk[constants.IDISK_MODE])
7099
      disks.append(disk_dev)
7100

    
7101
  else:
7102
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7103
  return disks
7104

    
7105

    
7106
def _GetInstanceInfoText(instance):
7107
  """Compute that text that should be added to the disk's metadata.
7108

7109
  """
7110
  return "originstname+%s" % instance.name
7111

    
7112

    
7113
def _CalcEta(time_taken, written, total_size):
7114
  """Calculates the ETA based on size written and total size.
7115

7116
  @param time_taken: The time taken so far
7117
  @param written: amount written so far
7118
  @param total_size: The total size of data to be written
7119
  @return: The remaining time in seconds
7120

7121
  """
7122
  avg_time = time_taken / float(written)
7123
  return (total_size - written) * avg_time
7124

    
7125

    
7126
def _WipeDisks(lu, instance):
7127
  """Wipes instance disks.
7128

7129
  @type lu: L{LogicalUnit}
7130
  @param lu: the logical unit on whose behalf we execute
7131
  @type instance: L{objects.Instance}
7132
  @param instance: the instance whose disks we should create
7133
  @return: the success of the wipe
7134

7135
  """
7136
  node = instance.primary_node
7137

    
7138
  for device in instance.disks:
7139
    lu.cfg.SetDiskID(device, node)
7140

    
7141
  logging.info("Pause sync of instance %s disks", instance.name)
7142
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7143

    
7144
  for idx, success in enumerate(result.payload):
7145
    if not success:
7146
      logging.warn("pause-sync of instance %s for disks %d failed",
7147
                   instance.name, idx)
7148

    
7149
  try:
7150
    for idx, device in enumerate(instance.disks):
7151
      lu.LogInfo("* Wiping disk %d", idx)
7152
      logging.info("Wiping disk %d for instance %s, node %s",
7153
                   idx, instance.name, node)
7154

    
7155
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7156
      # MAX_WIPE_CHUNK at max
7157
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7158
                            constants.MIN_WIPE_CHUNK_PERCENT)
7159

    
7160
      offset = 0
7161
      size = device.size
7162
      last_output = 0
7163
      start_time = time.time()
7164

    
7165
      while offset < size:
7166
        wipe_size = min(wipe_chunk_size, size - offset)
7167
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7168
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7169
                     (idx, offset, wipe_size))
7170
        now = time.time()
7171
        offset += wipe_size
7172
        if now - last_output >= 60:
7173
          eta = _CalcEta(now - start_time, offset, size)
7174
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7175
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7176
          last_output = now
7177
  finally:
7178
    logging.info("Resume sync of instance %s disks", instance.name)
7179

    
7180
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7181

    
7182
    for idx, success in enumerate(result.payload):
7183
      if not success:
7184
        lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
7185
                      " look at the status and troubleshoot the issue.", idx)
7186
        logging.warn("resume-sync of instance %s for disks %d failed",
7187
                     instance.name, idx)
7188

    
7189

    
7190
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7191
  """Create all disks for an instance.
7192

7193
  This abstracts away some work from AddInstance.
7194

7195
  @type lu: L{LogicalUnit}
7196
  @param lu: the logical unit on whose behalf we execute
7197
  @type instance: L{objects.Instance}
7198
  @param instance: the instance whose disks we should create
7199
  @type to_skip: list
7200
  @param to_skip: list of indices to skip
7201
  @type target_node: string
7202
  @param target_node: if passed, overrides the target node for creation
7203
  @rtype: boolean
7204
  @return: the success of the creation
7205

7206
  """
7207
  info = _GetInstanceInfoText(instance)
7208
  if target_node is None:
7209
    pnode = instance.primary_node
7210
    all_nodes = instance.all_nodes
7211
  else:
7212
    pnode = target_node
7213
    all_nodes = [pnode]
7214

    
7215
  if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7216
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7217
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7218

    
7219
    result.Raise("Failed to create directory '%s' on"
7220
                 " node %s" % (file_storage_dir, pnode))
7221

    
7222
  # Note: this needs to be kept in sync with adding of disks in
7223
  # LUInstanceSetParams
7224
  for idx, device in enumerate(instance.disks):
7225
    if to_skip and idx in to_skip:
7226
      continue
7227
    logging.info("Creating volume %s for instance %s",
7228
                 device.iv_name, instance.name)
7229
    #HARDCODE
7230
    for node in all_nodes:
7231
      f_create = node == pnode
7232
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7233

    
7234

    
7235
def _RemoveDisks(lu, instance, target_node=None):
7236
  """Remove all disks for an instance.
7237

7238
  This abstracts away some work from `AddInstance()` and
7239
  `RemoveInstance()`. Note that in case some of the devices couldn't
7240
  be removed, the removal will continue with the other ones (compare
7241
  with `_CreateDisks()`).
7242

7243
  @type lu: L{LogicalUnit}
7244
  @param lu: the logical unit on whose behalf we execute
7245
  @type instance: L{objects.Instance}
7246
  @param instance: the instance whose disks we should remove
7247
  @type target_node: string
7248
  @param target_node: used to override the node on which to remove the disks
7249
  @rtype: boolean
7250
  @return: the success of the removal
7251

7252
  """
7253
  logging.info("Removing block devices for instance %s", instance.name)
7254

    
7255
  all_result = True
7256
  for device in instance.disks:
7257
    if target_node:
7258
      edata = [(target_node, device)]
7259
    else:
7260
      edata = device.ComputeNodeTree(instance.primary_node)
7261
    for node, disk in edata:
7262
      lu.cfg.SetDiskID(disk, node)
7263
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7264
      if msg:
7265
        lu.LogWarning("Could not remove block device %s on node %s,"
7266
                      " continuing anyway: %s", device.iv_name, node, msg)
7267
        all_result = False
7268

    
7269
  if instance.disk_template == constants.DT_FILE:
7270
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7271
    if target_node:
7272
      tgt = target_node
7273
    else:
7274
      tgt = instance.primary_node
7275
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7276
    if result.fail_msg:
7277
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7278
                    file_storage_dir, instance.primary_node, result.fail_msg)
7279
      all_result = False
7280

    
7281
  return all_result
7282

    
7283

    
7284
def _ComputeDiskSizePerVG(disk_template, disks):
7285
  """Compute disk size requirements in the volume group
7286

7287
  """
7288
  def _compute(disks, payload):
7289
    """Universal algorithm.
7290

7291
    """
7292
    vgs = {}
7293
    for disk in disks:
7294
      vgs[disk[constants.IDISK_VG]] = \
7295
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7296

    
7297
    return vgs
7298

    
7299
  # Required free disk space as a function of disk and swap space
7300
  req_size_dict = {
7301
    constants.DT_DISKLESS: {},
7302
    constants.DT_PLAIN: _compute(disks, 0),
7303
    # 128 MB are added for drbd metadata for each disk
7304
    constants.DT_DRBD8: _compute(disks, 128),
7305
    constants.DT_FILE: {},
7306
    constants.DT_SHARED_FILE: {},
7307
  }
7308

    
7309
  if disk_template not in req_size_dict:
7310
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7311
                                 " is unknown" %  disk_template)
7312

    
7313
  return req_size_dict[disk_template]
7314

    
7315

    
7316
def _ComputeDiskSize(disk_template, disks):
7317
  """Compute disk size requirements in the volume group
7318

7319
  """
7320
  # Required free disk space as a function of disk and swap space
7321
  req_size_dict = {
7322
    constants.DT_DISKLESS: None,
7323
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7324
    # 128 MB are added for drbd metadata for each disk
7325
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7326
    constants.DT_FILE: None,
7327
    constants.DT_SHARED_FILE: 0,
7328
    constants.DT_BLOCK: 0,
7329
  }
7330

    
7331
  if disk_template not in req_size_dict:
7332
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7333
                                 " is unknown" %  disk_template)
7334

    
7335
  return req_size_dict[disk_template]
7336

    
7337

    
7338
def _FilterVmNodes(lu, nodenames):
7339
  """Filters out non-vm_capable nodes from a list.
7340

7341
  @type lu: L{LogicalUnit}
7342
  @param lu: the logical unit for which we check
7343
  @type nodenames: list
7344
  @param nodenames: the list of nodes on which we should check
7345
  @rtype: list
7346
  @return: the list of vm-capable nodes
7347

7348
  """
7349
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7350
  return [name for name in nodenames if name not in vm_nodes]
7351

    
7352

    
7353
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7354
  """Hypervisor parameter validation.
7355

7356
  This function abstract the hypervisor parameter validation to be
7357
  used in both instance create and instance modify.
7358

7359
  @type lu: L{LogicalUnit}
7360
  @param lu: the logical unit for which we check
7361
  @type nodenames: list
7362
  @param nodenames: the list of nodes on which we should check
7363
  @type hvname: string
7364
  @param hvname: the name of the hypervisor we should use
7365
  @type hvparams: dict
7366
  @param hvparams: the parameters which we need to check
7367
  @raise errors.OpPrereqError: if the parameters are not valid
7368

7369
  """
7370
  nodenames = _FilterVmNodes(lu, nodenames)
7371
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7372
                                                  hvname,
7373
                                                  hvparams)
7374
  for node in nodenames:
7375
    info = hvinfo[node]
7376
    if info.offline:
7377
      continue
7378
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7379

    
7380

    
7381
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7382
  """OS parameters validation.
7383

7384
  @type lu: L{LogicalUnit}
7385
  @param lu: the logical unit for which we check
7386
  @type required: boolean
7387
  @param required: whether the validation should fail if the OS is not
7388
      found
7389
  @type nodenames: list
7390
  @param nodenames: the list of nodes on which we should check
7391
  @type osname: string
7392
  @param osname: the name of the hypervisor we should use
7393
  @type osparams: dict
7394
  @param osparams: the parameters which we need to check
7395
  @raise errors.OpPrereqError: if the parameters are not valid
7396

7397
  """
7398
  nodenames = _FilterVmNodes(lu, nodenames)
7399
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7400
                                   [constants.OS_VALIDATE_PARAMETERS],
7401
                                   osparams)
7402
  for node, nres in result.items():
7403
    # we don't check for offline cases since this should be run only
7404
    # against the master node and/or an instance's nodes
7405
    nres.Raise("OS Parameters validation failed on node %s" % node)
7406
    if not nres.payload:
7407
      lu.LogInfo("OS %s not found on node %s, validation skipped",
7408
                 osname, node)
7409

    
7410

    
7411
class LUInstanceCreate(LogicalUnit):
7412
  """Create an instance.
7413

7414
  """
7415
  HPATH = "instance-add"
7416
  HTYPE = constants.HTYPE_INSTANCE
7417
  REQ_BGL = False
7418

    
7419
  def CheckArguments(self):
7420
    """Check arguments.
7421

7422
    """
7423
    # do not require name_check to ease forward/backward compatibility
7424
    # for tools
7425
    if self.op.no_install and self.op.start:
7426
      self.LogInfo("No-installation mode selected, disabling startup")
7427
      self.op.start = False
7428
    # validate/normalize the instance name
7429
    self.op.instance_name = \
7430
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7431

    
7432
    if self.op.ip_check and not self.op.name_check:
7433
      # TODO: make the ip check more flexible and not depend on the name check
7434
      raise errors.OpPrereqError("Cannot do ip check without a name check",
7435
                                 errors.ECODE_INVAL)
7436

    
7437
    # check nics' parameter names
7438
    for nic in self.op.nics:
7439
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7440

    
7441
    # check disks. parameter names and consistent adopt/no-adopt strategy
7442
    has_adopt = has_no_adopt = False
7443
    for disk in self.op.disks:
7444
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7445
      if constants.IDISK_ADOPT in disk:
7446
        has_adopt = True
7447
      else:
7448
        has_no_adopt = True
7449
    if has_adopt and has_no_adopt:
7450
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7451
                                 errors.ECODE_INVAL)
7452
    if has_adopt:
7453
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7454
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7455
                                   " '%s' disk template" %
7456
                                   self.op.disk_template,
7457
                                   errors.ECODE_INVAL)
7458
      if self.op.iallocator is not None:
7459
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7460
                                   " iallocator script", errors.ECODE_INVAL)
7461
      if self.op.mode == constants.INSTANCE_IMPORT:
7462
        raise errors.OpPrereqError("Disk adoption not allowed for"
7463
                                   " instance import", errors.ECODE_INVAL)
7464
    else:
7465
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
7466
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7467
                                   " but no 'adopt' parameter given" %
7468
                                   self.op.disk_template,
7469
                                   errors.ECODE_INVAL)
7470

    
7471
    self.adopt_disks = has_adopt
7472

    
7473
    # instance name verification
7474
    if self.op.name_check:
7475
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7476
      self.op.instance_name = self.hostname1.name
7477
      # used in CheckPrereq for ip ping check
7478
      self.check_ip = self.hostname1.ip
7479
    else:
7480
      self.check_ip = None
7481

    
7482
    # file storage checks
7483
    if (self.op.file_driver and
7484
        not self.op.file_driver in constants.FILE_DRIVER):
7485
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7486
                                 self.op.file_driver, errors.ECODE_INVAL)
7487

    
7488
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7489
      raise errors.OpPrereqError("File storage directory path not absolute",
7490
                                 errors.ECODE_INVAL)
7491

    
7492
    ### Node/iallocator related checks
7493
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7494

    
7495
    if self.op.pnode is not None:
7496
      if self.op.disk_template in constants.DTS_INT_MIRROR:
7497
        if self.op.snode is None:
7498
          raise errors.OpPrereqError("The networked disk templates need"
7499
                                     " a mirror node", errors.ECODE_INVAL)
7500
      elif self.op.snode:
7501
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7502
                        " template")
7503
        self.op.snode = None
7504

    
7505
    self._cds = _GetClusterDomainSecret()
7506

    
7507
    if self.op.mode == constants.INSTANCE_IMPORT:
7508
      # On import force_variant must be True, because if we forced it at
7509
      # initial install, our only chance when importing it back is that it
7510
      # works again!
7511
      self.op.force_variant = True
7512

    
7513
      if self.op.no_install:
7514
        self.LogInfo("No-installation mode has no effect during import")
7515

    
7516
    elif self.op.mode == constants.INSTANCE_CREATE:
7517
      if self.op.os_type is None:
7518
        raise errors.OpPrereqError("No guest OS specified",
7519
                                   errors.ECODE_INVAL)
7520
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7521
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7522
                                   " installation" % self.op.os_type,
7523
                                   errors.ECODE_STATE)
7524
      if self.op.disk_template is None:
7525
        raise errors.OpPrereqError("No disk template specified",
7526
                                   errors.ECODE_INVAL)
7527

    
7528
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7529
      # Check handshake to ensure both clusters have the same domain secret
7530
      src_handshake = self.op.source_handshake
7531
      if not src_handshake:
7532
        raise errors.OpPrereqError("Missing source handshake",
7533
                                   errors.ECODE_INVAL)
7534

    
7535
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7536
                                                           src_handshake)
7537
      if errmsg:
7538
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7539
                                   errors.ECODE_INVAL)
7540

    
7541
      # Load and check source CA
7542
      self.source_x509_ca_pem = self.op.source_x509_ca
7543
      if not self.source_x509_ca_pem:
7544
        raise errors.OpPrereqError("Missing source X509 CA",
7545
                                   errors.ECODE_INVAL)
7546

    
7547
      try:
7548
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7549
                                                    self._cds)
7550
      except OpenSSL.crypto.Error, err:
7551
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7552
                                   (err, ), errors.ECODE_INVAL)
7553

    
7554
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7555
      if errcode is not None:
7556
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7557
                                   errors.ECODE_INVAL)
7558

    
7559
      self.source_x509_ca = cert
7560

    
7561
      src_instance_name = self.op.source_instance_name
7562
      if not src_instance_name:
7563
        raise errors.OpPrereqError("Missing source instance name",
7564
                                   errors.ECODE_INVAL)
7565

    
7566
      self.source_instance_name = \
7567
          netutils.GetHostname(name=src_instance_name).name
7568

    
7569
    else:
7570
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
7571
                                 self.op.mode, errors.ECODE_INVAL)
7572

    
7573
  def ExpandNames(self):
7574
    """ExpandNames for CreateInstance.
7575

7576
    Figure out the right locks for instance creation.
7577

7578
    """
7579
    self.needed_locks = {}
7580

    
7581
    instance_name = self.op.instance_name
7582
    # this is just a preventive check, but someone might still add this
7583
    # instance in the meantime, and creation will fail at lock-add time
7584
    if instance_name in self.cfg.GetInstanceList():
7585
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7586
                                 instance_name, errors.ECODE_EXISTS)
7587

    
7588
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7589

    
7590
    if self.op.iallocator:
7591
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7592
    else:
7593
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7594
      nodelist = [self.op.pnode]
7595
      if self.op.snode is not None:
7596
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7597
        nodelist.append(self.op.snode)
7598
      self.needed_locks[locking.LEVEL_NODE] = nodelist
7599

    
7600
    # in case of import lock the source node too
7601
    if self.op.mode == constants.INSTANCE_IMPORT:
7602
      src_node = self.op.src_node
7603
      src_path = self.op.src_path
7604

    
7605
      if src_path is None:
7606
        self.op.src_path = src_path = self.op.instance_name
7607

    
7608
      if src_node is None:
7609
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7610
        self.op.src_node = None
7611
        if os.path.isabs(src_path):
7612
          raise errors.OpPrereqError("Importing an instance from an absolute"
7613
                                     " path requires a source node option.",
7614
                                     errors.ECODE_INVAL)
7615
      else:
7616
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7617
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7618
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
7619
        if not os.path.isabs(src_path):
7620
          self.op.src_path = src_path = \
7621
            utils.PathJoin(constants.EXPORT_DIR, src_path)
7622

    
7623
  def _RunAllocator(self):
7624
    """Run the allocator based on input opcode.
7625

7626
    """
7627
    nics = [n.ToDict() for n in self.nics]
7628
    ial = IAllocator(self.cfg, self.rpc,
7629
                     mode=constants.IALLOCATOR_MODE_ALLOC,
7630
                     name=self.op.instance_name,
7631
                     disk_template=self.op.disk_template,
7632
                     tags=[],
7633
                     os=self.op.os_type,
7634
                     vcpus=self.be_full[constants.BE_VCPUS],
7635
                     mem_size=self.be_full[constants.BE_MEMORY],
7636
                     disks=self.disks,
7637
                     nics=nics,
7638
                     hypervisor=self.op.hypervisor,
7639
                     )
7640

    
7641
    ial.Run(self.op.iallocator)
7642

    
7643
    if not ial.success:
7644
      raise errors.OpPrereqError("Can't compute nodes using"
7645
                                 " iallocator '%s': %s" %
7646
                                 (self.op.iallocator, ial.info),
7647
                                 errors.ECODE_NORES)
7648
    if len(ial.result) != ial.required_nodes:
7649
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7650
                                 " of nodes (%s), required %s" %
7651
                                 (self.op.iallocator, len(ial.result),
7652
                                  ial.required_nodes), errors.ECODE_FAULT)
7653
    self.op.pnode = ial.result[0]
7654
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7655
                 self.op.instance_name, self.op.iallocator,
7656
                 utils.CommaJoin(ial.result))
7657
    if ial.required_nodes == 2:
7658
      self.op.snode = ial.result[1]
7659

    
7660
  def BuildHooksEnv(self):
7661
    """Build hooks env.
7662

7663
    This runs on master, primary and secondary nodes of the instance.
7664

7665
    """
7666
    env = {
7667
      "ADD_MODE": self.op.mode,
7668
      }
7669
    if self.op.mode == constants.INSTANCE_IMPORT:
7670
      env["SRC_NODE"] = self.op.src_node
7671
      env["SRC_PATH"] = self.op.src_path
7672
      env["SRC_IMAGES"] = self.src_images
7673

    
7674
    env.update(_BuildInstanceHookEnv(
7675
      name=self.op.instance_name,
7676
      primary_node=self.op.pnode,
7677
      secondary_nodes=self.secondaries,
7678
      status=self.op.start,
7679
      os_type=self.op.os_type,
7680
      memory=self.be_full[constants.BE_MEMORY],
7681
      vcpus=self.be_full[constants.BE_VCPUS],
7682
      nics=_NICListToTuple(self, self.nics),
7683
      disk_template=self.op.disk_template,
7684
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
7685
             for d in self.disks],
7686
      bep=self.be_full,
7687
      hvp=self.hv_full,
7688
      hypervisor_name=self.op.hypervisor,
7689
    ))
7690

    
7691
    return env
7692

    
7693
  def BuildHooksNodes(self):
7694
    """Build hooks nodes.
7695

7696
    """
7697
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7698
    return nl, nl
7699

    
7700
  def _ReadExportInfo(self):
7701
    """Reads the export information from disk.
7702

7703
    It will override the opcode source node and path with the actual
7704
    information, if these two were not specified before.
7705

7706
    @return: the export information
7707

7708
    """
7709
    assert self.op.mode == constants.INSTANCE_IMPORT
7710

    
7711
    src_node = self.op.src_node
7712
    src_path = self.op.src_path
7713

    
7714
    if src_node is None:
7715
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7716
      exp_list = self.rpc.call_export_list(locked_nodes)
7717
      found = False
7718
      for node in exp_list:
7719
        if exp_list[node].fail_msg:
7720
          continue
7721
        if src_path in exp_list[node].payload:
7722
          found = True
7723
          self.op.src_node = src_node = node
7724
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7725
                                                       src_path)
7726
          break
7727
      if not found:
7728
        raise errors.OpPrereqError("No export found for relative path %s" %
7729
                                    src_path, errors.ECODE_INVAL)
7730

    
7731
    _CheckNodeOnline(self, src_node)
7732
    result = self.rpc.call_export_info(src_node, src_path)
7733
    result.Raise("No export or invalid export found in dir %s" % src_path)
7734

    
7735
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7736
    if not export_info.has_section(constants.INISECT_EXP):
7737
      raise errors.ProgrammerError("Corrupted export config",
7738
                                   errors.ECODE_ENVIRON)
7739

    
7740
    ei_version = export_info.get(constants.INISECT_EXP, "version")
7741
    if (int(ei_version) != constants.EXPORT_VERSION):
7742
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7743
                                 (ei_version, constants.EXPORT_VERSION),
7744
                                 errors.ECODE_ENVIRON)
7745
    return export_info
7746

    
7747
  def _ReadExportParams(self, einfo):
7748
    """Use export parameters as defaults.
7749

7750
    In case the opcode doesn't specify (as in override) some instance
7751
    parameters, then try to use them from the export information, if
7752
    that declares them.
7753

7754
    """
7755
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7756

    
7757
    if self.op.disk_template is None:
7758
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
7759
        self.op.disk_template = einfo.get(constants.INISECT_INS,
7760
                                          "disk_template")
7761
      else:
7762
        raise errors.OpPrereqError("No disk template specified and the export"
7763
                                   " is missing the disk_template information",
7764
                                   errors.ECODE_INVAL)
7765

    
7766
    if not self.op.disks:
7767
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7768
        disks = []
7769
        # TODO: import the disk iv_name too
7770
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7771
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7772
          disks.append({constants.IDISK_SIZE: disk_sz})
7773
        self.op.disks = disks
7774
      else:
7775
        raise errors.OpPrereqError("No disk info specified and the export"
7776
                                   " is missing the disk information",
7777
                                   errors.ECODE_INVAL)
7778

    
7779
    if (not self.op.nics and
7780
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7781
      nics = []
7782
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7783
        ndict = {}
7784
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7785
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7786
          ndict[name] = v
7787
        nics.append(ndict)
7788
      self.op.nics = nics
7789

    
7790
    if (self.op.hypervisor is None and
7791
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7792
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7793
    if einfo.has_section(constants.INISECT_HYP):
7794
      # use the export parameters but do not override the ones
7795
      # specified by the user
7796
      for name, value in einfo.items(constants.INISECT_HYP):
7797
        if name not in self.op.hvparams:
7798
          self.op.hvparams[name] = value
7799

    
7800
    if einfo.has_section(constants.INISECT_BEP):
7801
      # use the parameters, without overriding
7802
      for name, value in einfo.items(constants.INISECT_BEP):
7803
        if name not in self.op.beparams:
7804
          self.op.beparams[name] = value
7805
    else:
7806
      # try to read the parameters old style, from the main section
7807
      for name in constants.BES_PARAMETERS:
7808
        if (name not in self.op.beparams and
7809
            einfo.has_option(constants.INISECT_INS, name)):
7810
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7811

    
7812
    if einfo.has_section(constants.INISECT_OSP):
7813
      # use the parameters, without overriding
7814
      for name, value in einfo.items(constants.INISECT_OSP):
7815
        if name not in self.op.osparams:
7816
          self.op.osparams[name] = value
7817

    
7818
  def _RevertToDefaults(self, cluster):
7819
    """Revert the instance parameters to the default values.
7820

7821
    """
7822
    # hvparams
7823
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7824
    for name in self.op.hvparams.keys():
7825
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7826
        del self.op.hvparams[name]
7827
    # beparams
7828
    be_defs = cluster.SimpleFillBE({})
7829
    for name in self.op.beparams.keys():
7830
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7831
        del self.op.beparams[name]
7832
    # nic params
7833
    nic_defs = cluster.SimpleFillNIC({})
7834
    for nic in self.op.nics:
7835
      for name in constants.NICS_PARAMETERS:
7836
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7837
          del nic[name]
7838
    # osparams
7839
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7840
    for name in self.op.osparams.keys():
7841
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7842
        del self.op.osparams[name]
7843

    
7844
  def CheckPrereq(self):
7845
    """Check prerequisites.
7846

7847
    """
7848
    if self.op.mode == constants.INSTANCE_IMPORT:
7849
      export_info = self._ReadExportInfo()
7850
      self._ReadExportParams(export_info)
7851

    
7852
    if (not self.cfg.GetVGName() and
7853
        self.op.disk_template not in constants.DTS_NOT_LVM):
7854
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7855
                                 " instances", errors.ECODE_STATE)
7856

    
7857
    if self.op.hypervisor is None:
7858
      self.op.hypervisor = self.cfg.GetHypervisorType()
7859

    
7860
    cluster = self.cfg.GetClusterInfo()
7861
    enabled_hvs = cluster.enabled_hypervisors
7862
    if self.op.hypervisor not in enabled_hvs:
7863
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7864
                                 " cluster (%s)" % (self.op.hypervisor,
7865
                                  ",".join(enabled_hvs)),
7866
                                 errors.ECODE_STATE)
7867

    
7868
    # check hypervisor parameter syntax (locally)
7869
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7870
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7871
                                      self.op.hvparams)
7872
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7873
    hv_type.CheckParameterSyntax(filled_hvp)
7874
    self.hv_full = filled_hvp
7875
    # check that we don't specify global parameters on an instance
7876
    _CheckGlobalHvParams(self.op.hvparams)
7877

    
7878
    # fill and remember the beparams dict
7879
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7880
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7881

    
7882
    # build os parameters
7883
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7884

    
7885
    # now that hvp/bep are in final format, let's reset to defaults,
7886
    # if told to do so
7887
    if self.op.identify_defaults:
7888
      self._RevertToDefaults(cluster)
7889

    
7890
    # NIC buildup
7891
    self.nics = []
7892
    for idx, nic in enumerate(self.op.nics):
7893
      nic_mode_req = nic.get(constants.INIC_MODE, None)
7894
      nic_mode = nic_mode_req
7895
      if nic_mode is None:
7896
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7897

    
7898
      # in routed mode, for the first nic, the default ip is 'auto'
7899
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7900
        default_ip_mode = constants.VALUE_AUTO
7901
      else:
7902
        default_ip_mode = constants.VALUE_NONE
7903

    
7904
      # ip validity checks
7905
      ip = nic.get(constants.INIC_IP, default_ip_mode)
7906
      if ip is None or ip.lower() == constants.VALUE_NONE:
7907
        nic_ip = None
7908
      elif ip.lower() == constants.VALUE_AUTO:
7909
        if not self.op.name_check:
7910
          raise errors.OpPrereqError("IP address set to auto but name checks"
7911
                                     " have been skipped",
7912
                                     errors.ECODE_INVAL)
7913
        nic_ip = self.hostname1.ip
7914
      else:
7915
        if not netutils.IPAddress.IsValid(ip):
7916
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7917
                                     errors.ECODE_INVAL)
7918
        nic_ip = ip
7919

    
7920
      # TODO: check the ip address for uniqueness
7921
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7922
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7923
                                   errors.ECODE_INVAL)
7924

    
7925
      # MAC address verification
7926
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
7927
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7928
        mac = utils.NormalizeAndValidateMac(mac)
7929

    
7930
        try:
7931
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7932
        except errors.ReservationError:
7933
          raise errors.OpPrereqError("MAC address %s already in use"
7934
                                     " in cluster" % mac,
7935
                                     errors.ECODE_NOTUNIQUE)
7936

    
7937
      #  Build nic parameters
7938
      link = nic.get(constants.INIC_LINK, None)
7939
      nicparams = {}
7940
      if nic_mode_req:
7941
        nicparams[constants.NIC_MODE] = nic_mode_req
7942
      if link:
7943
        nicparams[constants.NIC_LINK] = link
7944

    
7945
      check_params = cluster.SimpleFillNIC(nicparams)
7946
      objects.NIC.CheckParameterSyntax(check_params)
7947
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7948

    
7949
    # disk checks/pre-build
7950
    default_vg = self.cfg.GetVGName()
7951
    self.disks = []
7952
    for disk in self.op.disks:
7953
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
7954
      if mode not in constants.DISK_ACCESS_SET:
7955
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7956
                                   mode, errors.ECODE_INVAL)
7957
      size = disk.get(constants.IDISK_SIZE, None)
7958
      if size is None:
7959
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7960
      try:
7961
        size = int(size)
7962
      except (TypeError, ValueError):
7963
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7964
                                   errors.ECODE_INVAL)
7965
      new_disk = {
7966
        constants.IDISK_SIZE: size,
7967
        constants.IDISK_MODE: mode,
7968
        constants.IDISK_VG: disk.get(constants.IDISK_VG, default_vg),
7969
        }
7970
      if constants.IDISK_ADOPT in disk:
7971
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
7972
      self.disks.append(new_disk)
7973

    
7974
    if self.op.mode == constants.INSTANCE_IMPORT:
7975

    
7976
      # Check that the new instance doesn't have less disks than the export
7977
      instance_disks = len(self.disks)
7978
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7979
      if instance_disks < export_disks:
7980
        raise errors.OpPrereqError("Not enough disks to import."
7981
                                   " (instance: %d, export: %d)" %
7982
                                   (instance_disks, export_disks),
7983
                                   errors.ECODE_INVAL)
7984

    
7985
      disk_images = []
7986
      for idx in range(export_disks):
7987
        option = 'disk%d_dump' % idx
7988
        if export_info.has_option(constants.INISECT_INS, option):
7989
          # FIXME: are the old os-es, disk sizes, etc. useful?
7990
          export_name = export_info.get(constants.INISECT_INS, option)
7991
          image = utils.PathJoin(self.op.src_path, export_name)
7992
          disk_images.append(image)
7993
        else:
7994
          disk_images.append(False)
7995

    
7996
      self.src_images = disk_images
7997

    
7998
      old_name = export_info.get(constants.INISECT_INS, 'name')
7999
      try:
8000
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8001
      except (TypeError, ValueError), err:
8002
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8003
                                   " an integer: %s" % str(err),
8004
                                   errors.ECODE_STATE)
8005
      if self.op.instance_name == old_name:
8006
        for idx, nic in enumerate(self.nics):
8007
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8008
            nic_mac_ini = 'nic%d_mac' % idx
8009
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8010

    
8011
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8012

    
8013
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8014
    if self.op.ip_check:
8015
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8016
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8017
                                   (self.check_ip, self.op.instance_name),
8018
                                   errors.ECODE_NOTUNIQUE)
8019

    
8020
    #### mac address generation
8021
    # By generating here the mac address both the allocator and the hooks get
8022
    # the real final mac address rather than the 'auto' or 'generate' value.
8023
    # There is a race condition between the generation and the instance object
8024
    # creation, which means that we know the mac is valid now, but we're not
8025
    # sure it will be when we actually add the instance. If things go bad
8026
    # adding the instance will abort because of a duplicate mac, and the
8027
    # creation job will fail.
8028
    for nic in self.nics:
8029
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8030
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8031

    
8032
    #### allocator run
8033

    
8034
    if self.op.iallocator is not None:
8035
      self._RunAllocator()
8036

    
8037
    #### node related checks
8038

    
8039
    # check primary node
8040
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8041
    assert self.pnode is not None, \
8042
      "Cannot retrieve locked node %s" % self.op.pnode
8043
    if pnode.offline:
8044
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8045
                                 pnode.name, errors.ECODE_STATE)
8046
    if pnode.drained:
8047
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8048
                                 pnode.name, errors.ECODE_STATE)
8049
    if not pnode.vm_capable:
8050
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8051
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8052

    
8053
    self.secondaries = []
8054

    
8055
    # mirror node verification
8056
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8057
      if self.op.snode == pnode.name:
8058
        raise errors.OpPrereqError("The secondary node cannot be the"
8059
                                   " primary node.", errors.ECODE_INVAL)
8060
      _CheckNodeOnline(self, self.op.snode)
8061
      _CheckNodeNotDrained(self, self.op.snode)
8062
      _CheckNodeVmCapable(self, self.op.snode)
8063
      self.secondaries.append(self.op.snode)
8064

    
8065
    nodenames = [pnode.name] + self.secondaries
8066

    
8067
    if not self.adopt_disks:
8068
      # Check lv size requirements, if not adopting
8069
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8070
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8071

    
8072
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8073
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8074
                                disk[constants.IDISK_ADOPT])
8075
                     for disk in self.disks])
8076
      if len(all_lvs) != len(self.disks):
8077
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8078
                                   errors.ECODE_INVAL)
8079
      for lv_name in all_lvs:
8080
        try:
8081
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8082
          # to ReserveLV uses the same syntax
8083
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8084
        except errors.ReservationError:
8085
          raise errors.OpPrereqError("LV named %s used by another instance" %
8086
                                     lv_name, errors.ECODE_NOTUNIQUE)
8087

    
8088
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8089
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8090

    
8091
      node_lvs = self.rpc.call_lv_list([pnode.name],
8092
                                       vg_names.payload.keys())[pnode.name]
8093
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8094
      node_lvs = node_lvs.payload
8095

    
8096
      delta = all_lvs.difference(node_lvs.keys())
8097
      if delta:
8098
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8099
                                   utils.CommaJoin(delta),
8100
                                   errors.ECODE_INVAL)
8101
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8102
      if online_lvs:
8103
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8104
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8105
                                   errors.ECODE_STATE)
8106
      # update the size of disk based on what is found
8107
      for dsk in self.disks:
8108
        dsk[constants.IDISK_SIZE] = \
8109
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8110
                                        dsk[constants.IDISK_ADOPT])][0]))
8111

    
8112
    elif self.op.disk_template == constants.DT_BLOCK:
8113
      # Normalize and de-duplicate device paths
8114
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8115
                       for disk in self.disks])
8116
      if len(all_disks) != len(self.disks):
8117
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8118
                                   errors.ECODE_INVAL)
8119
      baddisks = [d for d in all_disks
8120
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8121
      if baddisks:
8122
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8123
                                   " cannot be adopted" %
8124
                                   (", ".join(baddisks),
8125
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8126
                                   errors.ECODE_INVAL)
8127

    
8128
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8129
                                            list(all_disks))[pnode.name]
8130
      node_disks.Raise("Cannot get block device information from node %s" %
8131
                       pnode.name)
8132
      node_disks = node_disks.payload
8133
      delta = all_disks.difference(node_disks.keys())
8134
      if delta:
8135
        raise errors.OpPrereqError("Missing block device(s): %s" %
8136
                                   utils.CommaJoin(delta),
8137
                                   errors.ECODE_INVAL)
8138
      for dsk in self.disks:
8139
        dsk[constants.IDISK_SIZE] = \
8140
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8141

    
8142
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8143

    
8144
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8145
    # check OS parameters (remotely)
8146
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8147

    
8148
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8149

    
8150
    # memory check on primary node
8151
    if self.op.start:
8152
      _CheckNodeFreeMemory(self, self.pnode.name,
8153
                           "creating instance %s" % self.op.instance_name,
8154
                           self.be_full[constants.BE_MEMORY],
8155
                           self.op.hypervisor)
8156

    
8157
    self.dry_run_result = list(nodenames)
8158

    
8159
  def Exec(self, feedback_fn):
8160
    """Create and add the instance to the cluster.
8161

8162
    """
8163
    instance = self.op.instance_name
8164
    pnode_name = self.pnode.name
8165

    
8166
    ht_kind = self.op.hypervisor
8167
    if ht_kind in constants.HTS_REQ_PORT:
8168
      network_port = self.cfg.AllocatePort()
8169
    else:
8170
      network_port = None
8171

    
8172
    if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8173
      # this is needed because os.path.join does not accept None arguments
8174
      if self.op.file_storage_dir is None:
8175
        string_file_storage_dir = ""
8176
      else:
8177
        string_file_storage_dir = self.op.file_storage_dir
8178

    
8179
      # build the full file storage dir path
8180
      if self.op.disk_template == constants.DT_SHARED_FILE:
8181
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8182
      else:
8183
        get_fsd_fn = self.cfg.GetFileStorageDir
8184

    
8185
      file_storage_dir = utils.PathJoin(get_fsd_fn(),
8186
                                        string_file_storage_dir, instance)
8187
    else:
8188
      file_storage_dir = ""
8189

    
8190
    disks = _GenerateDiskTemplate(self,
8191
                                  self.op.disk_template,
8192
                                  instance, pnode_name,
8193
                                  self.secondaries,
8194
                                  self.disks,
8195
                                  file_storage_dir,
8196
                                  self.op.file_driver,
8197
                                  0,
8198
                                  feedback_fn)
8199

    
8200
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8201
                            primary_node=pnode_name,
8202
                            nics=self.nics, disks=disks,
8203
                            disk_template=self.op.disk_template,
8204
                            admin_up=False,
8205
                            network_port=network_port,
8206
                            beparams=self.op.beparams,
8207
                            hvparams=self.op.hvparams,
8208
                            hypervisor=self.op.hypervisor,
8209
                            osparams=self.op.osparams,
8210
                            )
8211

    
8212
    if self.adopt_disks:
8213
      if self.op.disk_template == constants.DT_PLAIN:
8214
        # rename LVs to the newly-generated names; we need to construct
8215
        # 'fake' LV disks with the old data, plus the new unique_id
8216
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8217
        rename_to = []
8218
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8219
          rename_to.append(t_dsk.logical_id)
8220
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8221
          self.cfg.SetDiskID(t_dsk, pnode_name)
8222
        result = self.rpc.call_blockdev_rename(pnode_name,
8223
                                               zip(tmp_disks, rename_to))
8224
        result.Raise("Failed to rename adoped LVs")
8225
    else:
8226
      feedback_fn("* creating instance disks...")
8227
      try:
8228
        _CreateDisks(self, iobj)
8229
      except errors.OpExecError:
8230
        self.LogWarning("Device creation failed, reverting...")
8231
        try:
8232
          _RemoveDisks(self, iobj)
8233
        finally:
8234
          self.cfg.ReleaseDRBDMinors(instance)
8235
          raise
8236

    
8237
      if self.cfg.GetClusterInfo().prealloc_wipe_disks:
8238
        feedback_fn("* wiping instance disks...")
8239
        try:
8240
          _WipeDisks(self, iobj)
8241
        except errors.OpExecError:
8242
          self.LogWarning("Device wiping failed, reverting...")
8243
          try:
8244
            _RemoveDisks(self, iobj)
8245
          finally:
8246
            self.cfg.ReleaseDRBDMinors(instance)
8247
            raise
8248

    
8249
    feedback_fn("adding instance %s to cluster config" % instance)
8250

    
8251
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8252

    
8253
    # Declare that we don't want to remove the instance lock anymore, as we've
8254
    # added the instance to the config
8255
    del self.remove_locks[locking.LEVEL_INSTANCE]
8256
    # Unlock all the nodes
8257
    if self.op.mode == constants.INSTANCE_IMPORT:
8258
      nodes_keep = [self.op.src_node]
8259
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
8260
                       if node != self.op.src_node]
8261
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
8262
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
8263
    else:
8264
      self.context.glm.release(locking.LEVEL_NODE)
8265
      del self.acquired_locks[locking.LEVEL_NODE]
8266

    
8267
    if self.op.wait_for_sync:
8268
      disk_abort = not _WaitForSync(self, iobj)
8269
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8270
      # make sure the disks are not degraded (still sync-ing is ok)
8271
      time.sleep(15)
8272
      feedback_fn("* checking mirrors status")
8273
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8274
    else:
8275
      disk_abort = False
8276

    
8277
    if disk_abort:
8278
      _RemoveDisks(self, iobj)
8279
      self.cfg.RemoveInstance(iobj.name)
8280
      # Make sure the instance lock gets removed
8281
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8282
      raise errors.OpExecError("There are some degraded disks for"
8283
                               " this instance")
8284

    
8285
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8286
      if self.op.mode == constants.INSTANCE_CREATE:
8287
        if not self.op.no_install:
8288
          feedback_fn("* running the instance OS create scripts...")
8289
          # FIXME: pass debug option from opcode to backend
8290
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8291
                                                 self.op.debug_level)
8292
          result.Raise("Could not add os for instance %s"
8293
                       " on node %s" % (instance, pnode_name))
8294

    
8295
      elif self.op.mode == constants.INSTANCE_IMPORT:
8296
        feedback_fn("* running the instance OS import scripts...")
8297

    
8298
        transfers = []
8299

    
8300
        for idx, image in enumerate(self.src_images):
8301
          if not image:
8302
            continue
8303

    
8304
          # FIXME: pass debug option from opcode to backend
8305
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8306
                                             constants.IEIO_FILE, (image, ),
8307
                                             constants.IEIO_SCRIPT,
8308
                                             (iobj.disks[idx], idx),
8309
                                             None)
8310
          transfers.append(dt)
8311

    
8312
        import_result = \
8313
          masterd.instance.TransferInstanceData(self, feedback_fn,
8314
                                                self.op.src_node, pnode_name,
8315
                                                self.pnode.secondary_ip,
8316
                                                iobj, transfers)
8317
        if not compat.all(import_result):
8318
          self.LogWarning("Some disks for instance %s on node %s were not"
8319
                          " imported successfully" % (instance, pnode_name))
8320

    
8321
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8322
        feedback_fn("* preparing remote import...")
8323
        # The source cluster will stop the instance before attempting to make a
8324
        # connection. In some cases stopping an instance can take a long time,
8325
        # hence the shutdown timeout is added to the connection timeout.
8326
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8327
                           self.op.source_shutdown_timeout)
8328
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8329

    
8330
        assert iobj.primary_node == self.pnode.name
8331
        disk_results = \
8332
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8333
                                        self.source_x509_ca,
8334
                                        self._cds, timeouts)
8335
        if not compat.all(disk_results):
8336
          # TODO: Should the instance still be started, even if some disks
8337
          # failed to import (valid for local imports, too)?
8338
          self.LogWarning("Some disks for instance %s on node %s were not"
8339
                          " imported successfully" % (instance, pnode_name))
8340

    
8341
        # Run rename script on newly imported instance
8342
        assert iobj.name == instance
8343
        feedback_fn("Running rename script for %s" % instance)
8344
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8345
                                                   self.source_instance_name,
8346
                                                   self.op.debug_level)
8347
        if result.fail_msg:
8348
          self.LogWarning("Failed to run rename script for %s on node"
8349
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
8350

    
8351
      else:
8352
        # also checked in the prereq part
8353
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8354
                                     % self.op.mode)
8355

    
8356
    if self.op.start:
8357
      iobj.admin_up = True
8358
      self.cfg.Update(iobj, feedback_fn)
8359
      logging.info("Starting instance %s on node %s", instance, pnode_name)
8360
      feedback_fn("* starting instance...")
8361
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8362
      result.Raise("Could not start instance")
8363

    
8364
    return list(iobj.all_nodes)
8365

    
8366

    
8367
class LUInstanceConsole(NoHooksLU):
8368
  """Connect to an instance's console.
8369

8370
  This is somewhat special in that it returns the command line that
8371
  you need to run on the master node in order to connect to the
8372
  console.
8373

8374
  """
8375
  REQ_BGL = False
8376

    
8377
  def ExpandNames(self):
8378
    self._ExpandAndLockInstance()
8379

    
8380
  def CheckPrereq(self):
8381
    """Check prerequisites.
8382

8383
    This checks that the instance is in the cluster.
8384

8385
    """
8386
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8387
    assert self.instance is not None, \
8388
      "Cannot retrieve locked instance %s" % self.op.instance_name
8389
    _CheckNodeOnline(self, self.instance.primary_node)
8390

    
8391
  def Exec(self, feedback_fn):
8392
    """Connect to the console of an instance
8393

8394
    """
8395
    instance = self.instance
8396
    node = instance.primary_node
8397

    
8398
    node_insts = self.rpc.call_instance_list([node],
8399
                                             [instance.hypervisor])[node]
8400
    node_insts.Raise("Can't get node information from %s" % node)
8401

    
8402
    if instance.name not in node_insts.payload:
8403
      if instance.admin_up:
8404
        state = constants.INSTST_ERRORDOWN
8405
      else:
8406
        state = constants.INSTST_ADMINDOWN
8407
      raise errors.OpExecError("Instance %s is not running (state %s)" %
8408
                               (instance.name, state))
8409

    
8410
    logging.debug("Connecting to console of %s on %s", instance.name, node)
8411

    
8412
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8413

    
8414

    
8415
def _GetInstanceConsole(cluster, instance):
8416
  """Returns console information for an instance.
8417

8418
  @type cluster: L{objects.Cluster}
8419
  @type instance: L{objects.Instance}
8420
  @rtype: dict
8421

8422
  """
8423
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
8424
  # beparams and hvparams are passed separately, to avoid editing the
8425
  # instance and then saving the defaults in the instance itself.
8426
  hvparams = cluster.FillHV(instance)
8427
  beparams = cluster.FillBE(instance)
8428
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8429

    
8430
  assert console.instance == instance.name
8431
  assert console.Validate()
8432

    
8433
  return console.ToDict()
8434

    
8435

    
8436
class LUInstanceReplaceDisks(LogicalUnit):
8437
  """Replace the disks of an instance.
8438

8439
  """
8440
  HPATH = "mirrors-replace"
8441
  HTYPE = constants.HTYPE_INSTANCE
8442
  REQ_BGL = False
8443

    
8444
  def CheckArguments(self):
8445
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8446
                                  self.op.iallocator)
8447

    
8448
  def ExpandNames(self):
8449
    self._ExpandAndLockInstance()
8450

    
8451
    if self.op.iallocator is not None:
8452
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8453

    
8454
    elif self.op.remote_node is not None:
8455
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8456
      self.op.remote_node = remote_node
8457

    
8458
      # Warning: do not remove the locking of the new secondary here
8459
      # unless DRBD8.AddChildren is changed to work in parallel;
8460
      # currently it doesn't since parallel invocations of
8461
      # FindUnusedMinor will conflict
8462
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8463
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8464

    
8465
    else:
8466
      self.needed_locks[locking.LEVEL_NODE] = []
8467
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8468

    
8469
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8470
                                   self.op.iallocator, self.op.remote_node,
8471
                                   self.op.disks, False, self.op.early_release)
8472

    
8473
    self.tasklets = [self.replacer]
8474

    
8475
  def DeclareLocks(self, level):
8476
    # If we're not already locking all nodes in the set we have to declare the
8477
    # instance's primary/secondary nodes.
8478
    if (level == locking.LEVEL_NODE and
8479
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8480
      self._LockInstancesNodes()
8481

    
8482
  def BuildHooksEnv(self):
8483
    """Build hooks env.
8484

8485
    This runs on the master, the primary and all the secondaries.
8486

8487
    """
8488
    instance = self.replacer.instance
8489
    env = {
8490
      "MODE": self.op.mode,
8491
      "NEW_SECONDARY": self.op.remote_node,
8492
      "OLD_SECONDARY": instance.secondary_nodes[0],
8493
      }
8494
    env.update(_BuildInstanceHookEnvByObject(self, instance))
8495
    return env
8496

    
8497
  def BuildHooksNodes(self):
8498
    """Build hooks nodes.
8499

8500
    """
8501
    instance = self.replacer.instance
8502
    nl = [
8503
      self.cfg.GetMasterNode(),
8504
      instance.primary_node,
8505
      ]
8506
    if self.op.remote_node is not None:
8507
      nl.append(self.op.remote_node)
8508
    return nl, nl
8509

    
8510

    
8511
class TLReplaceDisks(Tasklet):
8512
  """Replaces disks for an instance.
8513

8514
  Note: Locking is not within the scope of this class.
8515

8516
  """
8517
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8518
               disks, delay_iallocator, early_release):
8519
    """Initializes this class.
8520

8521
    """
8522
    Tasklet.__init__(self, lu)
8523

    
8524
    # Parameters
8525
    self.instance_name = instance_name
8526
    self.mode = mode
8527
    self.iallocator_name = iallocator_name
8528
    self.remote_node = remote_node
8529
    self.disks = disks
8530
    self.delay_iallocator = delay_iallocator
8531
    self.early_release = early_release
8532

    
8533
    # Runtime data
8534
    self.instance = None
8535
    self.new_node = None
8536
    self.target_node = None
8537
    self.other_node = None
8538
    self.remote_node_info = None
8539
    self.node_secondary_ip = None
8540

    
8541
  @staticmethod
8542
  def CheckArguments(mode, remote_node, iallocator):
8543
    """Helper function for users of this class.
8544

8545
    """
8546
    # check for valid parameter combination
8547
    if mode == constants.REPLACE_DISK_CHG:
8548
      if remote_node is None and iallocator is None:
8549
        raise errors.OpPrereqError("When changing the secondary either an"
8550
                                   " iallocator script must be used or the"
8551
                                   " new node given", errors.ECODE_INVAL)
8552

    
8553
      if remote_node is not None and iallocator is not None:
8554
        raise errors.OpPrereqError("Give either the iallocator or the new"
8555
                                   " secondary, not both", errors.ECODE_INVAL)
8556

    
8557
    elif remote_node is not None or iallocator is not None:
8558
      # Not replacing the secondary
8559
      raise errors.OpPrereqError("The iallocator and new node options can"
8560
                                 " only be used when changing the"
8561
                                 " secondary node", errors.ECODE_INVAL)
8562

    
8563
  @staticmethod
8564
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8565
    """Compute a new secondary node using an IAllocator.
8566

8567
    """
8568
    ial = IAllocator(lu.cfg, lu.rpc,
8569
                     mode=constants.IALLOCATOR_MODE_RELOC,
8570
                     name=instance_name,
8571
                     relocate_from=relocate_from)
8572

    
8573
    ial.Run(iallocator_name)
8574

    
8575
    if not ial.success:
8576
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8577
                                 " %s" % (iallocator_name, ial.info),
8578
                                 errors.ECODE_NORES)
8579

    
8580
    if len(ial.result) != ial.required_nodes:
8581
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8582
                                 " of nodes (%s), required %s" %
8583
                                 (iallocator_name,
8584
                                  len(ial.result), ial.required_nodes),
8585
                                 errors.ECODE_FAULT)
8586

    
8587
    remote_node_name = ial.result[0]
8588

    
8589
    lu.LogInfo("Selected new secondary for instance '%s': %s",
8590
               instance_name, remote_node_name)
8591

    
8592
    return remote_node_name
8593

    
8594
  def _FindFaultyDisks(self, node_name):
8595
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8596
                                    node_name, True)
8597

    
8598
  def _CheckDisksActivated(self, instance):
8599
    """Checks if the instance disks are activated.
8600

8601
    @param instance: The instance to check disks
8602
    @return: True if they are activated, False otherwise
8603

8604
    """
8605
    nodes = instance.all_nodes
8606

    
8607
    for idx, dev in enumerate(instance.disks):
8608
      for node in nodes:
8609
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8610
        self.cfg.SetDiskID(dev, node)
8611

    
8612
        result = self.rpc.call_blockdev_find(node, dev)
8613

    
8614
        if result.offline:
8615
          continue
8616
        elif result.fail_msg or not result.payload:
8617
          return False
8618

    
8619
    return True
8620

    
8621

    
8622
  def CheckPrereq(self):
8623
    """Check prerequisites.
8624

8625
    This checks that the instance is in the cluster.
8626

8627
    """
8628
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8629
    assert instance is not None, \
8630
      "Cannot retrieve locked instance %s" % self.instance_name
8631

    
8632
    if instance.disk_template != constants.DT_DRBD8:
8633
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8634
                                 " instances", errors.ECODE_INVAL)
8635

    
8636
    if len(instance.secondary_nodes) != 1:
8637
      raise errors.OpPrereqError("The instance has a strange layout,"
8638
                                 " expected one secondary but found %d" %
8639
                                 len(instance.secondary_nodes),
8640
                                 errors.ECODE_FAULT)
8641

    
8642
    if not self.delay_iallocator:
8643
      self._CheckPrereq2()
8644

    
8645
  def _CheckPrereq2(self):
8646
    """Check prerequisites, second part.
8647

8648
    This function should always be part of CheckPrereq. It was separated and is
8649
    now called from Exec because during node evacuation iallocator was only
8650
    called with an unmodified cluster model, not taking planned changes into
8651
    account.
8652

8653
    """
8654
    instance = self.instance
8655
    secondary_node = instance.secondary_nodes[0]
8656

    
8657
    if self.iallocator_name is None:
8658
      remote_node = self.remote_node
8659
    else:
8660
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8661
                                       instance.name, instance.secondary_nodes)
8662

    
8663
    if remote_node is not None:
8664
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8665
      assert self.remote_node_info is not None, \
8666
        "Cannot retrieve locked node %s" % remote_node
8667
    else:
8668
      self.remote_node_info = None
8669

    
8670
    if remote_node == self.instance.primary_node:
8671
      raise errors.OpPrereqError("The specified node is the primary node of"
8672
                                 " the instance.", errors.ECODE_INVAL)
8673

    
8674
    if remote_node == secondary_node:
8675
      raise errors.OpPrereqError("The specified node is already the"
8676
                                 " secondary node of the instance.",
8677
                                 errors.ECODE_INVAL)
8678

    
8679
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8680
                                    constants.REPLACE_DISK_CHG):
8681
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
8682
                                 errors.ECODE_INVAL)
8683

    
8684
    if self.mode == constants.REPLACE_DISK_AUTO:
8685
      if not self._CheckDisksActivated(instance):
8686
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
8687
                                   " first" % self.instance_name,
8688
                                   errors.ECODE_STATE)
8689
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
8690
      faulty_secondary = self._FindFaultyDisks(secondary_node)
8691

    
8692
      if faulty_primary and faulty_secondary:
8693
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8694
                                   " one node and can not be repaired"
8695
                                   " automatically" % self.instance_name,
8696
                                   errors.ECODE_STATE)
8697

    
8698
      if faulty_primary:
8699
        self.disks = faulty_primary
8700
        self.target_node = instance.primary_node
8701
        self.other_node = secondary_node
8702
        check_nodes = [self.target_node, self.other_node]
8703
      elif faulty_secondary:
8704
        self.disks = faulty_secondary
8705
        self.target_node = secondary_node
8706
        self.other_node = instance.primary_node
8707
        check_nodes = [self.target_node, self.other_node]
8708
      else:
8709
        self.disks = []
8710
        check_nodes = []
8711

    
8712
    else:
8713
      # Non-automatic modes
8714
      if self.mode == constants.REPLACE_DISK_PRI:
8715
        self.target_node = instance.primary_node
8716
        self.other_node = secondary_node
8717
        check_nodes = [self.target_node, self.other_node]
8718

    
8719
      elif self.mode == constants.REPLACE_DISK_SEC:
8720
        self.target_node = secondary_node
8721
        self.other_node = instance.primary_node
8722
        check_nodes = [self.target_node, self.other_node]
8723

    
8724
      elif self.mode == constants.REPLACE_DISK_CHG:
8725
        self.new_node = remote_node
8726
        self.other_node = instance.primary_node
8727
        self.target_node = secondary_node
8728
        check_nodes = [self.new_node, self.other_node]
8729

    
8730
        _CheckNodeNotDrained(self.lu, remote_node)
8731
        _CheckNodeVmCapable(self.lu, remote_node)
8732

    
8733
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
8734
        assert old_node_info is not None
8735
        if old_node_info.offline and not self.early_release:
8736
          # doesn't make sense to delay the release
8737
          self.early_release = True
8738
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8739
                          " early-release mode", secondary_node)
8740

    
8741
      else:
8742
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8743
                                     self.mode)
8744

    
8745
      # If not specified all disks should be replaced
8746
      if not self.disks:
8747
        self.disks = range(len(self.instance.disks))
8748

    
8749
    for node in check_nodes:
8750
      _CheckNodeOnline(self.lu, node)
8751

    
8752
    # Check whether disks are valid
8753
    for disk_idx in self.disks:
8754
      instance.FindDisk(disk_idx)
8755

    
8756
    # Get secondary node IP addresses
8757
    node_2nd_ip = {}
8758

    
8759
    for node_name in [self.target_node, self.other_node, self.new_node]:
8760
      if node_name is not None:
8761
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8762

    
8763
    self.node_secondary_ip = node_2nd_ip
8764

    
8765
  def Exec(self, feedback_fn):
8766
    """Execute disk replacement.
8767

8768
    This dispatches the disk replacement to the appropriate handler.
8769

8770
    """
8771
    if self.delay_iallocator:
8772
      self._CheckPrereq2()
8773

    
8774
    if not self.disks:
8775
      feedback_fn("No disks need replacement")
8776
      return
8777

    
8778
    feedback_fn("Replacing disk(s) %s for %s" %
8779
                (utils.CommaJoin(self.disks), self.instance.name))
8780

    
8781
    activate_disks = (not self.instance.admin_up)
8782

    
8783
    # Activate the instance disks if we're replacing them on a down instance
8784
    if activate_disks:
8785
      _StartInstanceDisks(self.lu, self.instance, True)
8786

    
8787
    try:
8788
      # Should we replace the secondary node?
8789
      if self.new_node is not None:
8790
        fn = self._ExecDrbd8Secondary
8791
      else:
8792
        fn = self._ExecDrbd8DiskOnly
8793

    
8794
      return fn(feedback_fn)
8795

    
8796
    finally:
8797
      # Deactivate the instance disks if we're replacing them on a
8798
      # down instance
8799
      if activate_disks:
8800
        _SafeShutdownInstanceDisks(self.lu, self.instance)
8801

    
8802
  def _CheckVolumeGroup(self, nodes):
8803
    self.lu.LogInfo("Checking volume groups")
8804

    
8805
    vgname = self.cfg.GetVGName()
8806

    
8807
    # Make sure volume group exists on all involved nodes
8808
    results = self.rpc.call_vg_list(nodes)
8809
    if not results:
8810
      raise errors.OpExecError("Can't list volume groups on the nodes")
8811

    
8812
    for node in nodes:
8813
      res = results[node]
8814
      res.Raise("Error checking node %s" % node)
8815
      if vgname not in res.payload:
8816
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
8817
                                 (vgname, node))
8818

    
8819
  def _CheckDisksExistence(self, nodes):
8820
    # Check disk existence
8821
    for idx, dev in enumerate(self.instance.disks):
8822
      if idx not in self.disks:
8823
        continue
8824

    
8825
      for node in nodes:
8826
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8827
        self.cfg.SetDiskID(dev, node)
8828

    
8829
        result = self.rpc.call_blockdev_find(node, dev)
8830

    
8831
        msg = result.fail_msg
8832
        if msg or not result.payload:
8833
          if not msg:
8834
            msg = "disk not found"
8835
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8836
                                   (idx, node, msg))
8837

    
8838
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8839
    for idx, dev in enumerate(self.instance.disks):
8840
      if idx not in self.disks:
8841
        continue
8842

    
8843
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8844
                      (idx, node_name))
8845

    
8846
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8847
                                   ldisk=ldisk):
8848
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8849
                                 " replace disks for instance %s" %
8850
                                 (node_name, self.instance.name))
8851

    
8852
  def _CreateNewStorage(self, node_name):
8853
    vgname = self.cfg.GetVGName()
8854
    iv_names = {}
8855

    
8856
    for idx, dev in enumerate(self.instance.disks):
8857
      if idx not in self.disks:
8858
        continue
8859

    
8860
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8861

    
8862
      self.cfg.SetDiskID(dev, node_name)
8863

    
8864
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8865
      names = _GenerateUniqueNames(self.lu, lv_names)
8866

    
8867
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8868
                             logical_id=(vgname, names[0]))
8869
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8870
                             logical_id=(vgname, names[1]))
8871

    
8872
      new_lvs = [lv_data, lv_meta]
8873
      old_lvs = dev.children
8874
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8875

    
8876
      # we pass force_create=True to force the LVM creation
8877
      for new_lv in new_lvs:
8878
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8879
                        _GetInstanceInfoText(self.instance), False)
8880

    
8881
    return iv_names
8882

    
8883
  def _CheckDevices(self, node_name, iv_names):
8884
    for name, (dev, _, _) in iv_names.iteritems():
8885
      self.cfg.SetDiskID(dev, node_name)
8886

    
8887
      result = self.rpc.call_blockdev_find(node_name, dev)
8888

    
8889
      msg = result.fail_msg
8890
      if msg or not result.payload:
8891
        if not msg:
8892
          msg = "disk not found"
8893
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
8894
                                 (name, msg))
8895

    
8896
      if result.payload.is_degraded:
8897
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
8898

    
8899
  def _RemoveOldStorage(self, node_name, iv_names):
8900
    for name, (_, old_lvs, _) in iv_names.iteritems():
8901
      self.lu.LogInfo("Remove logical volumes for %s" % name)
8902

    
8903
      for lv in old_lvs:
8904
        self.cfg.SetDiskID(lv, node_name)
8905

    
8906
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8907
        if msg:
8908
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
8909
                             hint="remove unused LVs manually")
8910

    
8911
  def _ReleaseNodeLock(self, node_name):
8912
    """Releases the lock for a given node."""
8913
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8914

    
8915
  def _ExecDrbd8DiskOnly(self, feedback_fn):
8916
    """Replace a disk on the primary or secondary for DRBD 8.
8917

8918
    The algorithm for replace is quite complicated:
8919

8920
      1. for each disk to be replaced:
8921

8922
        1. create new LVs on the target node with unique names
8923
        1. detach old LVs from the drbd device
8924
        1. rename old LVs to name_replaced.<time_t>
8925
        1. rename new LVs to old LVs
8926
        1. attach the new LVs (with the old names now) to the drbd device
8927

8928
      1. wait for sync across all devices
8929

8930
      1. for each modified disk:
8931

8932
        1. remove old LVs (which have the name name_replaces.<time_t>)
8933

8934
    Failures are not very well handled.
8935

8936
    """
8937
    steps_total = 6
8938

    
8939
    # Step: check device activation
8940
    self.lu.LogStep(1, steps_total, "Check device existence")
8941
    self._CheckDisksExistence([self.other_node, self.target_node])
8942
    self._CheckVolumeGroup([self.target_node, self.other_node])
8943

    
8944
    # Step: check other node consistency
8945
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8946
    self._CheckDisksConsistency(self.other_node,
8947
                                self.other_node == self.instance.primary_node,
8948
                                False)
8949

    
8950
    # Step: create new storage
8951
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8952
    iv_names = self._CreateNewStorage(self.target_node)
8953

    
8954
    # Step: for each lv, detach+rename*2+attach
8955
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8956
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8957
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8958

    
8959
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8960
                                                     old_lvs)
8961
      result.Raise("Can't detach drbd from local storage on node"
8962
                   " %s for device %s" % (self.target_node, dev.iv_name))
8963
      #dev.children = []
8964
      #cfg.Update(instance)
8965

    
8966
      # ok, we created the new LVs, so now we know we have the needed
8967
      # storage; as such, we proceed on the target node to rename
8968
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8969
      # using the assumption that logical_id == physical_id (which in
8970
      # turn is the unique_id on that node)
8971

    
8972
      # FIXME(iustin): use a better name for the replaced LVs
8973
      temp_suffix = int(time.time())
8974
      ren_fn = lambda d, suff: (d.physical_id[0],
8975
                                d.physical_id[1] + "_replaced-%s" % suff)
8976

    
8977
      # Build the rename list based on what LVs exist on the node
8978
      rename_old_to_new = []
8979
      for to_ren in old_lvs:
8980
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8981
        if not result.fail_msg and result.payload:
8982
          # device exists
8983
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8984

    
8985
      self.lu.LogInfo("Renaming the old LVs on the target node")
8986
      result = self.rpc.call_blockdev_rename(self.target_node,
8987
                                             rename_old_to_new)
8988
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
8989

    
8990
      # Now we rename the new LVs to the old LVs
8991
      self.lu.LogInfo("Renaming the new LVs on the target node")
8992
      rename_new_to_old = [(new, old.physical_id)
8993
                           for old, new in zip(old_lvs, new_lvs)]
8994
      result = self.rpc.call_blockdev_rename(self.target_node,
8995
                                             rename_new_to_old)
8996
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8997

    
8998
      for old, new in zip(old_lvs, new_lvs):
8999
        new.logical_id = old.logical_id
9000
        self.cfg.SetDiskID(new, self.target_node)
9001

    
9002
      for disk in old_lvs:
9003
        disk.logical_id = ren_fn(disk, temp_suffix)
9004
        self.cfg.SetDiskID(disk, self.target_node)
9005

    
9006
      # Now that the new lvs have the old name, we can add them to the device
9007
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9008
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9009
                                                  new_lvs)
9010
      msg = result.fail_msg
9011
      if msg:
9012
        for new_lv in new_lvs:
9013
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9014
                                               new_lv).fail_msg
9015
          if msg2:
9016
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9017
                               hint=("cleanup manually the unused logical"
9018
                                     "volumes"))
9019
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9020

    
9021
      dev.children = new_lvs
9022

    
9023
      self.cfg.Update(self.instance, feedback_fn)
9024

    
9025
    cstep = 5
9026
    if self.early_release:
9027
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9028
      cstep += 1
9029
      self._RemoveOldStorage(self.target_node, iv_names)
9030
      # WARNING: we release both node locks here, do not do other RPCs
9031
      # than WaitForSync to the primary node
9032
      self._ReleaseNodeLock([self.target_node, self.other_node])
9033

    
9034
    # Wait for sync
9035
    # This can fail as the old devices are degraded and _WaitForSync
9036
    # does a combined result over all disks, so we don't check its return value
9037
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9038
    cstep += 1
9039
    _WaitForSync(self.lu, self.instance)
9040

    
9041
    # Check all devices manually
9042
    self._CheckDevices(self.instance.primary_node, iv_names)
9043

    
9044
    # Step: remove old storage
9045
    if not self.early_release:
9046
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9047
      cstep += 1
9048
      self._RemoveOldStorage(self.target_node, iv_names)
9049

    
9050
  def _ExecDrbd8Secondary(self, feedback_fn):
9051
    """Replace the secondary node for DRBD 8.
9052

9053
    The algorithm for replace is quite complicated:
9054
      - for all disks of the instance:
9055
        - create new LVs on the new node with same names
9056
        - shutdown the drbd device on the old secondary
9057
        - disconnect the drbd network on the primary
9058
        - create the drbd device on the new secondary
9059
        - network attach the drbd on the primary, using an artifice:
9060
          the drbd code for Attach() will connect to the network if it
9061
          finds a device which is connected to the good local disks but
9062
          not network enabled
9063
      - wait for sync across all devices
9064
      - remove all disks from the old secondary
9065

9066
    Failures are not very well handled.
9067

9068
    """
9069
    steps_total = 6
9070

    
9071
    # Step: check device activation
9072
    self.lu.LogStep(1, steps_total, "Check device existence")
9073
    self._CheckDisksExistence([self.instance.primary_node])
9074
    self._CheckVolumeGroup([self.instance.primary_node])
9075

    
9076
    # Step: check other node consistency
9077
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9078
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9079

    
9080
    # Step: create new storage
9081
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9082
    for idx, dev in enumerate(self.instance.disks):
9083
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9084
                      (self.new_node, idx))
9085
      # we pass force_create=True to force LVM creation
9086
      for new_lv in dev.children:
9087
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9088
                        _GetInstanceInfoText(self.instance), False)
9089

    
9090
    # Step 4: dbrd minors and drbd setups changes
9091
    # after this, we must manually remove the drbd minors on both the
9092
    # error and the success paths
9093
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9094
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9095
                                         for dev in self.instance.disks],
9096
                                        self.instance.name)
9097
    logging.debug("Allocated minors %r", minors)
9098

    
9099
    iv_names = {}
9100
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9101
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9102
                      (self.new_node, idx))
9103
      # create new devices on new_node; note that we create two IDs:
9104
      # one without port, so the drbd will be activated without
9105
      # networking information on the new node at this stage, and one
9106
      # with network, for the latter activation in step 4
9107
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9108
      if self.instance.primary_node == o_node1:
9109
        p_minor = o_minor1
9110
      else:
9111
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9112
        p_minor = o_minor2
9113

    
9114
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9115
                      p_minor, new_minor, o_secret)
9116
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9117
                    p_minor, new_minor, o_secret)
9118

    
9119
      iv_names[idx] = (dev, dev.children, new_net_id)
9120
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9121
                    new_net_id)
9122
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9123
                              logical_id=new_alone_id,
9124
                              children=dev.children,
9125
                              size=dev.size)
9126
      try:
9127
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9128
                              _GetInstanceInfoText(self.instance), False)
9129
      except errors.GenericError:
9130
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9131
        raise
9132

    
9133
    # We have new devices, shutdown the drbd on the old secondary
9134
    for idx, dev in enumerate(self.instance.disks):
9135
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9136
      self.cfg.SetDiskID(dev, self.target_node)
9137
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9138
      if msg:
9139
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9140
                           "node: %s" % (idx, msg),
9141
                           hint=("Please cleanup this device manually as"
9142
                                 " soon as possible"))
9143

    
9144
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9145
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9146
                                               self.node_secondary_ip,
9147
                                               self.instance.disks)\
9148
                                              [self.instance.primary_node]
9149

    
9150
    msg = result.fail_msg
9151
    if msg:
9152
      # detaches didn't succeed (unlikely)
9153
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9154
      raise errors.OpExecError("Can't detach the disks from the network on"
9155
                               " old node: %s" % (msg,))
9156

    
9157
    # if we managed to detach at least one, we update all the disks of
9158
    # the instance to point to the new secondary
9159
    self.lu.LogInfo("Updating instance configuration")
9160
    for dev, _, new_logical_id in iv_names.itervalues():
9161
      dev.logical_id = new_logical_id
9162
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9163

    
9164
    self.cfg.Update(self.instance, feedback_fn)
9165

    
9166
    # and now perform the drbd attach
9167
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9168
                    " (standalone => connected)")
9169
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9170
                                            self.new_node],
9171
                                           self.node_secondary_ip,
9172
                                           self.instance.disks,
9173
                                           self.instance.name,
9174
                                           False)
9175
    for to_node, to_result in result.items():
9176
      msg = to_result.fail_msg
9177
      if msg:
9178
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9179
                           to_node, msg,
9180
                           hint=("please do a gnt-instance info to see the"
9181
                                 " status of disks"))
9182
    cstep = 5
9183
    if self.early_release:
9184
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9185
      cstep += 1
9186
      self._RemoveOldStorage(self.target_node, iv_names)
9187
      # WARNING: we release all node locks here, do not do other RPCs
9188
      # than WaitForSync to the primary node
9189
      self._ReleaseNodeLock([self.instance.primary_node,
9190
                             self.target_node,
9191
                             self.new_node])
9192

    
9193
    # Wait for sync
9194
    # This can fail as the old devices are degraded and _WaitForSync
9195
    # does a combined result over all disks, so we don't check its return value
9196
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9197
    cstep += 1
9198
    _WaitForSync(self.lu, self.instance)
9199

    
9200
    # Check all devices manually
9201
    self._CheckDevices(self.instance.primary_node, iv_names)
9202

    
9203
    # Step: remove old storage
9204
    if not self.early_release:
9205
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9206
      self._RemoveOldStorage(self.target_node, iv_names)
9207

    
9208

    
9209
class LURepairNodeStorage(NoHooksLU):
9210
  """Repairs the volume group on a node.
9211

9212
  """
9213
  REQ_BGL = False
9214

    
9215
  def CheckArguments(self):
9216
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9217

    
9218
    storage_type = self.op.storage_type
9219

    
9220
    if (constants.SO_FIX_CONSISTENCY not in
9221
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9222
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
9223
                                 " repaired" % storage_type,
9224
                                 errors.ECODE_INVAL)
9225

    
9226
  def ExpandNames(self):
9227
    self.needed_locks = {
9228
      locking.LEVEL_NODE: [self.op.node_name],
9229
      }
9230

    
9231
  def _CheckFaultyDisks(self, instance, node_name):
9232
    """Ensure faulty disks abort the opcode or at least warn."""
9233
    try:
9234
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9235
                                  node_name, True):
9236
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9237
                                   " node '%s'" % (instance.name, node_name),
9238
                                   errors.ECODE_STATE)
9239
    except errors.OpPrereqError, err:
9240
      if self.op.ignore_consistency:
9241
        self.proc.LogWarning(str(err.args[0]))
9242
      else:
9243
        raise
9244

    
9245
  def CheckPrereq(self):
9246
    """Check prerequisites.
9247

9248
    """
9249
    # Check whether any instance on this node has faulty disks
9250
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9251
      if not inst.admin_up:
9252
        continue
9253
      check_nodes = set(inst.all_nodes)
9254
      check_nodes.discard(self.op.node_name)
9255
      for inst_node_name in check_nodes:
9256
        self._CheckFaultyDisks(inst, inst_node_name)
9257

    
9258
  def Exec(self, feedback_fn):
9259
    feedback_fn("Repairing storage unit '%s' on %s ..." %
9260
                (self.op.name, self.op.node_name))
9261

    
9262
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9263
    result = self.rpc.call_storage_execute(self.op.node_name,
9264
                                           self.op.storage_type, st_args,
9265
                                           self.op.name,
9266
                                           constants.SO_FIX_CONSISTENCY)
9267
    result.Raise("Failed to repair storage unit '%s' on %s" %
9268
                 (self.op.name, self.op.node_name))
9269

    
9270

    
9271
class LUNodeEvacStrategy(NoHooksLU):
9272
  """Computes the node evacuation strategy.
9273

9274
  """
9275
  REQ_BGL = False
9276

    
9277
  def CheckArguments(self):
9278
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9279

    
9280
  def ExpandNames(self):
9281
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9282
    self.needed_locks = locks = {}
9283
    if self.op.remote_node is None:
9284
      locks[locking.LEVEL_NODE] = locking.ALL_SET
9285
    else:
9286
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9287
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9288

    
9289
  def Exec(self, feedback_fn):
9290
    if self.op.remote_node is not None:
9291
      instances = []
9292
      for node in self.op.nodes:
9293
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9294
      result = []
9295
      for i in instances:
9296
        if i.primary_node == self.op.remote_node:
9297
          raise errors.OpPrereqError("Node %s is the primary node of"
9298
                                     " instance %s, cannot use it as"
9299
                                     " secondary" %
9300
                                     (self.op.remote_node, i.name),
9301
                                     errors.ECODE_INVAL)
9302
        result.append([i.name, self.op.remote_node])
9303
    else:
9304
      ial = IAllocator(self.cfg, self.rpc,
9305
                       mode=constants.IALLOCATOR_MODE_MEVAC,
9306
                       evac_nodes=self.op.nodes)
9307
      ial.Run(self.op.iallocator, validate=True)
9308
      if not ial.success:
9309
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9310
                                 errors.ECODE_NORES)
9311
      result = ial.result
9312
    return result
9313

    
9314

    
9315
class LUInstanceGrowDisk(LogicalUnit):
9316
  """Grow a disk of an instance.
9317

9318
  """
9319
  HPATH = "disk-grow"
9320
  HTYPE = constants.HTYPE_INSTANCE
9321
  REQ_BGL = False
9322

    
9323
  def ExpandNames(self):
9324
    self._ExpandAndLockInstance()
9325
    self.needed_locks[locking.LEVEL_NODE] = []
9326
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9327

    
9328
  def DeclareLocks(self, level):
9329
    if level == locking.LEVEL_NODE:
9330
      self._LockInstancesNodes()
9331

    
9332
  def BuildHooksEnv(self):
9333
    """Build hooks env.
9334

9335
    This runs on the master, the primary and all the secondaries.
9336

9337
    """
9338
    env = {
9339
      "DISK": self.op.disk,
9340
      "AMOUNT": self.op.amount,
9341
      }
9342
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9343
    return env
9344

    
9345
  def BuildHooksNodes(self):
9346
    """Build hooks nodes.
9347

9348
    """
9349
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9350
    return (nl, nl)
9351

    
9352
  def CheckPrereq(self):
9353
    """Check prerequisites.
9354

9355
    This checks that the instance is in the cluster.
9356

9357
    """
9358
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9359
    assert instance is not None, \
9360
      "Cannot retrieve locked instance %s" % self.op.instance_name
9361
    nodenames = list(instance.all_nodes)
9362
    for node in nodenames:
9363
      _CheckNodeOnline(self, node)
9364

    
9365
    self.instance = instance
9366

    
9367
    if instance.disk_template not in constants.DTS_GROWABLE:
9368
      raise errors.OpPrereqError("Instance's disk layout does not support"
9369
                                 " growing.", errors.ECODE_INVAL)
9370

    
9371
    self.disk = instance.FindDisk(self.op.disk)
9372

    
9373
    if instance.disk_template not in (constants.DT_FILE,
9374
                                      constants.DT_SHARED_FILE):
9375
      # TODO: check the free disk space for file, when that feature will be
9376
      # supported
9377
      _CheckNodesFreeDiskPerVG(self, nodenames,
9378
                               self.disk.ComputeGrowth(self.op.amount))
9379

    
9380
  def Exec(self, feedback_fn):
9381
    """Execute disk grow.
9382

9383
    """
9384
    instance = self.instance
9385
    disk = self.disk
9386

    
9387
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9388
    if not disks_ok:
9389
      raise errors.OpExecError("Cannot activate block device to grow")
9390

    
9391
    for node in instance.all_nodes:
9392
      self.cfg.SetDiskID(disk, node)
9393
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9394
      result.Raise("Grow request failed to node %s" % node)
9395

    
9396
      # TODO: Rewrite code to work properly
9397
      # DRBD goes into sync mode for a short amount of time after executing the
9398
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9399
      # calling "resize" in sync mode fails. Sleeping for a short amount of
9400
      # time is a work-around.
9401
      time.sleep(5)
9402

    
9403
    disk.RecordGrow(self.op.amount)
9404
    self.cfg.Update(instance, feedback_fn)
9405
    if self.op.wait_for_sync:
9406
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
9407
      if disk_abort:
9408
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
9409
                             " status.\nPlease check the instance.")
9410
      if not instance.admin_up:
9411
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9412
    elif not instance.admin_up:
9413
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
9414
                           " not supposed to be running because no wait for"
9415
                           " sync mode was requested.")
9416

    
9417

    
9418
class LUInstanceQueryData(NoHooksLU):
9419
  """Query runtime instance data.
9420

9421
  """
9422
  REQ_BGL = False
9423

    
9424
  def ExpandNames(self):
9425
    self.needed_locks = {}
9426
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9427

    
9428
    if self.op.instances:
9429
      self.wanted_names = []
9430
      for name in self.op.instances:
9431
        full_name = _ExpandInstanceName(self.cfg, name)
9432
        self.wanted_names.append(full_name)
9433
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9434
    else:
9435
      self.wanted_names = None
9436
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9437

    
9438
    self.needed_locks[locking.LEVEL_NODE] = []
9439
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9440

    
9441
  def DeclareLocks(self, level):
9442
    if level == locking.LEVEL_NODE:
9443
      self._LockInstancesNodes()
9444

    
9445
  def CheckPrereq(self):
9446
    """Check prerequisites.
9447

9448
    This only checks the optional instance list against the existing names.
9449

9450
    """
9451
    if self.wanted_names is None:
9452
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9453

    
9454
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
9455
                             in self.wanted_names]
9456

    
9457
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
9458
    """Returns the status of a block device
9459

9460
    """
9461
    if self.op.static or not node:
9462
      return None
9463

    
9464
    self.cfg.SetDiskID(dev, node)
9465

    
9466
    result = self.rpc.call_blockdev_find(node, dev)
9467
    if result.offline:
9468
      return None
9469

    
9470
    result.Raise("Can't compute disk status for %s" % instance_name)
9471

    
9472
    status = result.payload
9473
    if status is None:
9474
      return None
9475

    
9476
    return (status.dev_path, status.major, status.minor,
9477
            status.sync_percent, status.estimated_time,
9478
            status.is_degraded, status.ldisk_status)
9479

    
9480
  def _ComputeDiskStatus(self, instance, snode, dev):
9481
    """Compute block device status.
9482

9483
    """
9484
    if dev.dev_type in constants.LDS_DRBD:
9485
      # we change the snode then (otherwise we use the one passed in)
9486
      if dev.logical_id[0] == instance.primary_node:
9487
        snode = dev.logical_id[1]
9488
      else:
9489
        snode = dev.logical_id[0]
9490

    
9491
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9492
                                              instance.name, dev)
9493
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9494

    
9495
    if dev.children:
9496
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
9497
                      for child in dev.children]
9498
    else:
9499
      dev_children = []
9500

    
9501
    data = {
9502
      "iv_name": dev.iv_name,
9503
      "dev_type": dev.dev_type,
9504
      "logical_id": dev.logical_id,
9505
      "physical_id": dev.physical_id,
9506
      "pstatus": dev_pstatus,
9507
      "sstatus": dev_sstatus,
9508
      "children": dev_children,
9509
      "mode": dev.mode,
9510
      "size": dev.size,
9511
      }
9512

    
9513
    return data
9514

    
9515
  def Exec(self, feedback_fn):
9516
    """Gather and return data"""
9517
    result = {}
9518

    
9519
    cluster = self.cfg.GetClusterInfo()
9520

    
9521
    for instance in self.wanted_instances:
9522
      if not self.op.static:
9523
        remote_info = self.rpc.call_instance_info(instance.primary_node,
9524
                                                  instance.name,
9525
                                                  instance.hypervisor)
9526
        remote_info.Raise("Error checking node %s" % instance.primary_node)
9527
        remote_info = remote_info.payload
9528
        if remote_info and "state" in remote_info:
9529
          remote_state = "up"
9530
        else:
9531
          remote_state = "down"
9532
      else:
9533
        remote_state = None
9534
      if instance.admin_up:
9535
        config_state = "up"
9536
      else:
9537
        config_state = "down"
9538

    
9539
      disks = [self._ComputeDiskStatus(instance, None, device)
9540
               for device in instance.disks]
9541

    
9542
      idict = {
9543
        "name": instance.name,
9544
        "config_state": config_state,
9545
        "run_state": remote_state,
9546
        "pnode": instance.primary_node,
9547
        "snodes": instance.secondary_nodes,
9548
        "os": instance.os,
9549
        # this happens to be the same format used for hooks
9550
        "nics": _NICListToTuple(self, instance.nics),
9551
        "disk_template": instance.disk_template,
9552
        "disks": disks,
9553
        "hypervisor": instance.hypervisor,
9554
        "network_port": instance.network_port,
9555
        "hv_instance": instance.hvparams,
9556
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
9557
        "be_instance": instance.beparams,
9558
        "be_actual": cluster.FillBE(instance),
9559
        "os_instance": instance.osparams,
9560
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9561
        "serial_no": instance.serial_no,
9562
        "mtime": instance.mtime,
9563
        "ctime": instance.ctime,
9564
        "uuid": instance.uuid,
9565
        }
9566

    
9567
      result[instance.name] = idict
9568

    
9569
    return result
9570

    
9571

    
9572
class LUInstanceSetParams(LogicalUnit):
9573
  """Modifies an instances's parameters.
9574

9575
  """
9576
  HPATH = "instance-modify"
9577
  HTYPE = constants.HTYPE_INSTANCE
9578
  REQ_BGL = False
9579

    
9580
  def CheckArguments(self):
9581
    if not (self.op.nics or self.op.disks or self.op.disk_template or
9582
            self.op.hvparams or self.op.beparams or self.op.os_name):
9583
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9584

    
9585
    if self.op.hvparams:
9586
      _CheckGlobalHvParams(self.op.hvparams)
9587

    
9588
    # Disk validation
9589
    disk_addremove = 0
9590
    for disk_op, disk_dict in self.op.disks:
9591
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9592
      if disk_op == constants.DDM_REMOVE:
9593
        disk_addremove += 1
9594
        continue
9595
      elif disk_op == constants.DDM_ADD:
9596
        disk_addremove += 1
9597
      else:
9598
        if not isinstance(disk_op, int):
9599
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9600
        if not isinstance(disk_dict, dict):
9601
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9602
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9603

    
9604
      if disk_op == constants.DDM_ADD:
9605
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9606
        if mode not in constants.DISK_ACCESS_SET:
9607
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9608
                                     errors.ECODE_INVAL)
9609
        size = disk_dict.get(constants.IDISK_SIZE, None)
9610
        if size is None:
9611
          raise errors.OpPrereqError("Required disk parameter size missing",
9612
                                     errors.ECODE_INVAL)
9613
        try:
9614
          size = int(size)
9615
        except (TypeError, ValueError), err:
9616
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9617
                                     str(err), errors.ECODE_INVAL)
9618
        disk_dict[constants.IDISK_SIZE] = size
9619
      else:
9620
        # modification of disk
9621
        if constants.IDISK_SIZE in disk_dict:
9622
          raise errors.OpPrereqError("Disk size change not possible, use"
9623
                                     " grow-disk", errors.ECODE_INVAL)
9624

    
9625
    if disk_addremove > 1:
9626
      raise errors.OpPrereqError("Only one disk add or remove operation"
9627
                                 " supported at a time", errors.ECODE_INVAL)
9628

    
9629
    if self.op.disks and self.op.disk_template is not None:
9630
      raise errors.OpPrereqError("Disk template conversion and other disk"
9631
                                 " changes not supported at the same time",
9632
                                 errors.ECODE_INVAL)
9633

    
9634
    if (self.op.disk_template and
9635
        self.op.disk_template in constants.DTS_INT_MIRROR and
9636
        self.op.remote_node is None):
9637
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
9638
                                 " one requires specifying a secondary node",
9639
                                 errors.ECODE_INVAL)
9640

    
9641
    # NIC validation
9642
    nic_addremove = 0
9643
    for nic_op, nic_dict in self.op.nics:
9644
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9645
      if nic_op == constants.DDM_REMOVE:
9646
        nic_addremove += 1
9647
        continue
9648
      elif nic_op == constants.DDM_ADD:
9649
        nic_addremove += 1
9650
      else:
9651
        if not isinstance(nic_op, int):
9652
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9653
        if not isinstance(nic_dict, dict):
9654
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9655
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9656

    
9657
      # nic_dict should be a dict
9658
      nic_ip = nic_dict.get(constants.INIC_IP, None)
9659
      if nic_ip is not None:
9660
        if nic_ip.lower() == constants.VALUE_NONE:
9661
          nic_dict[constants.INIC_IP] = None
9662
        else:
9663
          if not netutils.IPAddress.IsValid(nic_ip):
9664
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9665
                                       errors.ECODE_INVAL)
9666

    
9667
      nic_bridge = nic_dict.get('bridge', None)
9668
      nic_link = nic_dict.get(constants.INIC_LINK, None)
9669
      if nic_bridge and nic_link:
9670
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9671
                                   " at the same time", errors.ECODE_INVAL)
9672
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9673
        nic_dict['bridge'] = None
9674
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9675
        nic_dict[constants.INIC_LINK] = None
9676

    
9677
      if nic_op == constants.DDM_ADD:
9678
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
9679
        if nic_mac is None:
9680
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
9681

    
9682
      if constants.INIC_MAC in nic_dict:
9683
        nic_mac = nic_dict[constants.INIC_MAC]
9684
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9685
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9686

    
9687
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9688
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9689
                                     " modifying an existing nic",
9690
                                     errors.ECODE_INVAL)
9691

    
9692
    if nic_addremove > 1:
9693
      raise errors.OpPrereqError("Only one NIC add or remove operation"
9694
                                 " supported at a time", errors.ECODE_INVAL)
9695

    
9696
  def ExpandNames(self):
9697
    self._ExpandAndLockInstance()
9698
    self.needed_locks[locking.LEVEL_NODE] = []
9699
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9700

    
9701
  def DeclareLocks(self, level):
9702
    if level == locking.LEVEL_NODE:
9703
      self._LockInstancesNodes()
9704
      if self.op.disk_template and self.op.remote_node:
9705
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9706
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9707

    
9708
  def BuildHooksEnv(self):
9709
    """Build hooks env.
9710

9711
    This runs on the master, primary and secondaries.
9712

9713
    """
9714
    args = dict()
9715
    if constants.BE_MEMORY in self.be_new:
9716
      args['memory'] = self.be_new[constants.BE_MEMORY]
9717
    if constants.BE_VCPUS in self.be_new:
9718
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
9719
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9720
    # information at all.
9721
    if self.op.nics:
9722
      args['nics'] = []
9723
      nic_override = dict(self.op.nics)
9724
      for idx, nic in enumerate(self.instance.nics):
9725
        if idx in nic_override:
9726
          this_nic_override = nic_override[idx]
9727
        else:
9728
          this_nic_override = {}
9729
        if constants.INIC_IP in this_nic_override:
9730
          ip = this_nic_override[constants.INIC_IP]
9731
        else:
9732
          ip = nic.ip
9733
        if constants.INIC_MAC in this_nic_override:
9734
          mac = this_nic_override[constants.INIC_MAC]
9735
        else:
9736
          mac = nic.mac
9737
        if idx in self.nic_pnew:
9738
          nicparams = self.nic_pnew[idx]
9739
        else:
9740
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9741
        mode = nicparams[constants.NIC_MODE]
9742
        link = nicparams[constants.NIC_LINK]
9743
        args['nics'].append((ip, mac, mode, link))
9744
      if constants.DDM_ADD in nic_override:
9745
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
9746
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
9747
        nicparams = self.nic_pnew[constants.DDM_ADD]
9748
        mode = nicparams[constants.NIC_MODE]
9749
        link = nicparams[constants.NIC_LINK]
9750
        args['nics'].append((ip, mac, mode, link))
9751
      elif constants.DDM_REMOVE in nic_override:
9752
        del args['nics'][-1]
9753

    
9754
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9755
    if self.op.disk_template:
9756
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9757

    
9758
    return env
9759

    
9760
  def BuildHooksNodes(self):
9761
    """Build hooks nodes.
9762

9763
    """
9764
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9765
    return (nl, nl)
9766

    
9767
  def CheckPrereq(self):
9768
    """Check prerequisites.
9769

9770
    This only checks the instance list against the existing names.
9771

9772
    """
9773
    # checking the new params on the primary/secondary nodes
9774

    
9775
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9776
    cluster = self.cluster = self.cfg.GetClusterInfo()
9777
    assert self.instance is not None, \
9778
      "Cannot retrieve locked instance %s" % self.op.instance_name
9779
    pnode = instance.primary_node
9780
    nodelist = list(instance.all_nodes)
9781

    
9782
    # OS change
9783
    if self.op.os_name and not self.op.force:
9784
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9785
                      self.op.force_variant)
9786
      instance_os = self.op.os_name
9787
    else:
9788
      instance_os = instance.os
9789

    
9790
    if self.op.disk_template:
9791
      if instance.disk_template == self.op.disk_template:
9792
        raise errors.OpPrereqError("Instance already has disk template %s" %
9793
                                   instance.disk_template, errors.ECODE_INVAL)
9794

    
9795
      if (instance.disk_template,
9796
          self.op.disk_template) not in self._DISK_CONVERSIONS:
9797
        raise errors.OpPrereqError("Unsupported disk template conversion from"
9798
                                   " %s to %s" % (instance.disk_template,
9799
                                                  self.op.disk_template),
9800
                                   errors.ECODE_INVAL)
9801
      _CheckInstanceDown(self, instance, "cannot change disk template")
9802
      if self.op.disk_template in constants.DTS_INT_MIRROR:
9803
        if self.op.remote_node == pnode:
9804
          raise errors.OpPrereqError("Given new secondary node %s is the same"
9805
                                     " as the primary node of the instance" %
9806
                                     self.op.remote_node, errors.ECODE_STATE)
9807
        _CheckNodeOnline(self, self.op.remote_node)
9808
        _CheckNodeNotDrained(self, self.op.remote_node)
9809
        # FIXME: here we assume that the old instance type is DT_PLAIN
9810
        assert instance.disk_template == constants.DT_PLAIN
9811
        disks = [{constants.IDISK_SIZE: d.size,
9812
                  constants.IDISK_VG: d.logical_id[0]}
9813
                 for d in instance.disks]
9814
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9815
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9816

    
9817
    # hvparams processing
9818
    if self.op.hvparams:
9819
      hv_type = instance.hypervisor
9820
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9821
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9822
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9823

    
9824
      # local check
9825
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9826
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9827
      self.hv_new = hv_new # the new actual values
9828
      self.hv_inst = i_hvdict # the new dict (without defaults)
9829
    else:
9830
      self.hv_new = self.hv_inst = {}
9831

    
9832
    # beparams processing
9833
    if self.op.beparams:
9834
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9835
                                   use_none=True)
9836
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9837
      be_new = cluster.SimpleFillBE(i_bedict)
9838
      self.be_new = be_new # the new actual values
9839
      self.be_inst = i_bedict # the new dict (without defaults)
9840
    else:
9841
      self.be_new = self.be_inst = {}
9842

    
9843
    # osparams processing
9844
    if self.op.osparams:
9845
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9846
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9847
      self.os_inst = i_osdict # the new dict (without defaults)
9848
    else:
9849
      self.os_inst = {}
9850

    
9851
    self.warn = []
9852

    
9853
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9854
      mem_check_list = [pnode]
9855
      if be_new[constants.BE_AUTO_BALANCE]:
9856
        # either we changed auto_balance to yes or it was from before
9857
        mem_check_list.extend(instance.secondary_nodes)
9858
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9859
                                                  instance.hypervisor)
9860
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9861
                                         instance.hypervisor)
9862
      pninfo = nodeinfo[pnode]
9863
      msg = pninfo.fail_msg
9864
      if msg:
9865
        # Assume the primary node is unreachable and go ahead
9866
        self.warn.append("Can't get info from primary node %s: %s" %
9867
                         (pnode,  msg))
9868
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
9869
        self.warn.append("Node data from primary node %s doesn't contain"
9870
                         " free memory information" % pnode)
9871
      elif instance_info.fail_msg:
9872
        self.warn.append("Can't get instance runtime information: %s" %
9873
                        instance_info.fail_msg)
9874
      else:
9875
        if instance_info.payload:
9876
          current_mem = int(instance_info.payload['memory'])
9877
        else:
9878
          # Assume instance not running
9879
          # (there is a slight race condition here, but it's not very probable,
9880
          # and we have no other way to check)
9881
          current_mem = 0
9882
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9883
                    pninfo.payload['memory_free'])
9884
        if miss_mem > 0:
9885
          raise errors.OpPrereqError("This change will prevent the instance"
9886
                                     " from starting, due to %d MB of memory"
9887
                                     " missing on its primary node" % miss_mem,
9888
                                     errors.ECODE_NORES)
9889

    
9890
      if be_new[constants.BE_AUTO_BALANCE]:
9891
        for node, nres in nodeinfo.items():
9892
          if node not in instance.secondary_nodes:
9893
            continue
9894
          msg = nres.fail_msg
9895
          if msg:
9896
            self.warn.append("Can't get info from secondary node %s: %s" %
9897
                             (node, msg))
9898
          elif not isinstance(nres.payload.get('memory_free', None), int):
9899
            self.warn.append("Secondary node %s didn't return free"
9900
                             " memory information" % node)
9901
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9902
            self.warn.append("Not enough memory to failover instance to"
9903
                             " secondary node %s" % node)
9904

    
9905
    # NIC processing
9906
    self.nic_pnew = {}
9907
    self.nic_pinst = {}
9908
    for nic_op, nic_dict in self.op.nics:
9909
      if nic_op == constants.DDM_REMOVE:
9910
        if not instance.nics:
9911
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9912
                                     errors.ECODE_INVAL)
9913
        continue
9914
      if nic_op != constants.DDM_ADD:
9915
        # an existing nic
9916
        if not instance.nics:
9917
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9918
                                     " no NICs" % nic_op,
9919
                                     errors.ECODE_INVAL)
9920
        if nic_op < 0 or nic_op >= len(instance.nics):
9921
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9922
                                     " are 0 to %d" %
9923
                                     (nic_op, len(instance.nics) - 1),
9924
                                     errors.ECODE_INVAL)
9925
        old_nic_params = instance.nics[nic_op].nicparams
9926
        old_nic_ip = instance.nics[nic_op].ip
9927
      else:
9928
        old_nic_params = {}
9929
        old_nic_ip = None
9930

    
9931
      update_params_dict = dict([(key, nic_dict[key])
9932
                                 for key in constants.NICS_PARAMETERS
9933
                                 if key in nic_dict])
9934

    
9935
      if 'bridge' in nic_dict:
9936
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9937

    
9938
      new_nic_params = _GetUpdatedParams(old_nic_params,
9939
                                         update_params_dict)
9940
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9941
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9942
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9943
      self.nic_pinst[nic_op] = new_nic_params
9944
      self.nic_pnew[nic_op] = new_filled_nic_params
9945
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9946

    
9947
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
9948
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9949
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9950
        if msg:
9951
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9952
          if self.op.force:
9953
            self.warn.append(msg)
9954
          else:
9955
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9956
      if new_nic_mode == constants.NIC_MODE_ROUTED:
9957
        if constants.INIC_IP in nic_dict:
9958
          nic_ip = nic_dict[constants.INIC_IP]
9959
        else:
9960
          nic_ip = old_nic_ip
9961
        if nic_ip is None:
9962
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9963
                                     ' on a routed nic', errors.ECODE_INVAL)
9964
      if constants.INIC_MAC in nic_dict:
9965
        nic_mac = nic_dict[constants.INIC_MAC]
9966
        if nic_mac is None:
9967
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9968
                                     errors.ECODE_INVAL)
9969
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9970
          # otherwise generate the mac
9971
          nic_dict[constants.INIC_MAC] = \
9972
            self.cfg.GenerateMAC(self.proc.GetECId())
9973
        else:
9974
          # or validate/reserve the current one
9975
          try:
9976
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9977
          except errors.ReservationError:
9978
            raise errors.OpPrereqError("MAC address %s already in use"
9979
                                       " in cluster" % nic_mac,
9980
                                       errors.ECODE_NOTUNIQUE)
9981

    
9982
    # DISK processing
9983
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9984
      raise errors.OpPrereqError("Disk operations not supported for"
9985
                                 " diskless instances",
9986
                                 errors.ECODE_INVAL)
9987
    for disk_op, _ in self.op.disks:
9988
      if disk_op == constants.DDM_REMOVE:
9989
        if len(instance.disks) == 1:
9990
          raise errors.OpPrereqError("Cannot remove the last disk of"
9991
                                     " an instance", errors.ECODE_INVAL)
9992
        _CheckInstanceDown(self, instance, "cannot remove disks")
9993

    
9994
      if (disk_op == constants.DDM_ADD and
9995
          len(instance.disks) >= constants.MAX_DISKS):
9996
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9997
                                   " add more" % constants.MAX_DISKS,
9998
                                   errors.ECODE_STATE)
9999
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10000
        # an existing disk
10001
        if disk_op < 0 or disk_op >= len(instance.disks):
10002
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
10003
                                     " are 0 to %d" %
10004
                                     (disk_op, len(instance.disks)),
10005
                                     errors.ECODE_INVAL)
10006

    
10007
    return
10008

    
10009
  def _ConvertPlainToDrbd(self, feedback_fn):
10010
    """Converts an instance from plain to drbd.
10011

10012
    """
10013
    feedback_fn("Converting template to drbd")
10014
    instance = self.instance
10015
    pnode = instance.primary_node
10016
    snode = self.op.remote_node
10017

    
10018
    # create a fake disk info for _GenerateDiskTemplate
10019
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode}
10020
                 for d in instance.disks]
10021
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10022
                                      instance.name, pnode, [snode],
10023
                                      disk_info, None, None, 0, feedback_fn)
10024
    info = _GetInstanceInfoText(instance)
10025
    feedback_fn("Creating aditional volumes...")
10026
    # first, create the missing data and meta devices
10027
    for disk in new_disks:
10028
      # unfortunately this is... not too nice
10029
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10030
                            info, True)
10031
      for child in disk.children:
10032
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
10033
    # at this stage, all new LVs have been created, we can rename the
10034
    # old ones
10035
    feedback_fn("Renaming original volumes...")
10036
    rename_list = [(o, n.children[0].logical_id)
10037
                   for (o, n) in zip(instance.disks, new_disks)]
10038
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10039
    result.Raise("Failed to rename original LVs")
10040

    
10041
    feedback_fn("Initializing DRBD devices...")
10042
    # all child devices are in place, we can now create the DRBD devices
10043
    for disk in new_disks:
10044
      for node in [pnode, snode]:
10045
        f_create = node == pnode
10046
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10047

    
10048
    # at this point, the instance has been modified
10049
    instance.disk_template = constants.DT_DRBD8
10050
    instance.disks = new_disks
10051
    self.cfg.Update(instance, feedback_fn)
10052

    
10053
    # disks are created, waiting for sync
10054
    disk_abort = not _WaitForSync(self, instance)
10055
    if disk_abort:
10056
      raise errors.OpExecError("There are some degraded disks for"
10057
                               " this instance, please cleanup manually")
10058

    
10059
  def _ConvertDrbdToPlain(self, feedback_fn):
10060
    """Converts an instance from drbd to plain.
10061

10062
    """
10063
    instance = self.instance
10064
    assert len(instance.secondary_nodes) == 1
10065
    pnode = instance.primary_node
10066
    snode = instance.secondary_nodes[0]
10067
    feedback_fn("Converting template to plain")
10068

    
10069
    old_disks = instance.disks
10070
    new_disks = [d.children[0] for d in old_disks]
10071

    
10072
    # copy over size and mode
10073
    for parent, child in zip(old_disks, new_disks):
10074
      child.size = parent.size
10075
      child.mode = parent.mode
10076

    
10077
    # update instance structure
10078
    instance.disks = new_disks
10079
    instance.disk_template = constants.DT_PLAIN
10080
    self.cfg.Update(instance, feedback_fn)
10081

    
10082
    feedback_fn("Removing volumes on the secondary node...")
10083
    for disk in old_disks:
10084
      self.cfg.SetDiskID(disk, snode)
10085
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10086
      if msg:
10087
        self.LogWarning("Could not remove block device %s on node %s,"
10088
                        " continuing anyway: %s", disk.iv_name, snode, msg)
10089

    
10090
    feedback_fn("Removing unneeded volumes on the primary node...")
10091
    for idx, disk in enumerate(old_disks):
10092
      meta = disk.children[1]
10093
      self.cfg.SetDiskID(meta, pnode)
10094
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10095
      if msg:
10096
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
10097
                        " continuing anyway: %s", idx, pnode, msg)
10098

    
10099
  def Exec(self, feedback_fn):
10100
    """Modifies an instance.
10101

10102
    All parameters take effect only at the next restart of the instance.
10103

10104
    """
10105
    # Process here the warnings from CheckPrereq, as we don't have a
10106
    # feedback_fn there.
10107
    for warn in self.warn:
10108
      feedback_fn("WARNING: %s" % warn)
10109

    
10110
    result = []
10111
    instance = self.instance
10112
    # disk changes
10113
    for disk_op, disk_dict in self.op.disks:
10114
      if disk_op == constants.DDM_REMOVE:
10115
        # remove the last disk
10116
        device = instance.disks.pop()
10117
        device_idx = len(instance.disks)
10118
        for node, disk in device.ComputeNodeTree(instance.primary_node):
10119
          self.cfg.SetDiskID(disk, node)
10120
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10121
          if msg:
10122
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
10123
                            " continuing anyway", device_idx, node, msg)
10124
        result.append(("disk/%d" % device_idx, "remove"))
10125
      elif disk_op == constants.DDM_ADD:
10126
        # add a new disk
10127
        if instance.disk_template in (constants.DT_FILE,
10128
                                        constants.DT_SHARED_FILE):
10129
          file_driver, file_path = instance.disks[0].logical_id
10130
          file_path = os.path.dirname(file_path)
10131
        else:
10132
          file_driver = file_path = None
10133
        disk_idx_base = len(instance.disks)
10134
        new_disk = _GenerateDiskTemplate(self,
10135
                                         instance.disk_template,
10136
                                         instance.name, instance.primary_node,
10137
                                         instance.secondary_nodes,
10138
                                         [disk_dict],
10139
                                         file_path,
10140
                                         file_driver,
10141
                                         disk_idx_base, feedback_fn)[0]
10142
        instance.disks.append(new_disk)
10143
        info = _GetInstanceInfoText(instance)
10144

    
10145
        logging.info("Creating volume %s for instance %s",
10146
                     new_disk.iv_name, instance.name)
10147
        # Note: this needs to be kept in sync with _CreateDisks
10148
        #HARDCODE
10149
        for node in instance.all_nodes:
10150
          f_create = node == instance.primary_node
10151
          try:
10152
            _CreateBlockDev(self, node, instance, new_disk,
10153
                            f_create, info, f_create)
10154
          except errors.OpExecError, err:
10155
            self.LogWarning("Failed to create volume %s (%s) on"
10156
                            " node %s: %s",
10157
                            new_disk.iv_name, new_disk, node, err)
10158
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10159
                       (new_disk.size, new_disk.mode)))
10160
      else:
10161
        # change a given disk
10162
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10163
        result.append(("disk.mode/%d" % disk_op,
10164
                       disk_dict[constants.IDISK_MODE]))
10165

    
10166
    if self.op.disk_template:
10167
      r_shut = _ShutdownInstanceDisks(self, instance)
10168
      if not r_shut:
10169
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10170
                                 " proceed with disk template conversion")
10171
      mode = (instance.disk_template, self.op.disk_template)
10172
      try:
10173
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
10174
      except:
10175
        self.cfg.ReleaseDRBDMinors(instance.name)
10176
        raise
10177
      result.append(("disk_template", self.op.disk_template))
10178

    
10179
    # NIC changes
10180
    for nic_op, nic_dict in self.op.nics:
10181
      if nic_op == constants.DDM_REMOVE:
10182
        # remove the last nic
10183
        del instance.nics[-1]
10184
        result.append(("nic.%d" % len(instance.nics), "remove"))
10185
      elif nic_op == constants.DDM_ADD:
10186
        # mac and bridge should be set, by now
10187
        mac = nic_dict[constants.INIC_MAC]
10188
        ip = nic_dict.get(constants.INIC_IP, None)
10189
        nicparams = self.nic_pinst[constants.DDM_ADD]
10190
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10191
        instance.nics.append(new_nic)
10192
        result.append(("nic.%d" % (len(instance.nics) - 1),
10193
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
10194
                       (new_nic.mac, new_nic.ip,
10195
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10196
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10197
                       )))
10198
      else:
10199
        for key in (constants.INIC_MAC, constants.INIC_IP):
10200
          if key in nic_dict:
10201
            setattr(instance.nics[nic_op], key, nic_dict[key])
10202
        if nic_op in self.nic_pinst:
10203
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10204
        for key, val in nic_dict.iteritems():
10205
          result.append(("nic.%s/%d" % (key, nic_op), val))
10206

    
10207
    # hvparams changes
10208
    if self.op.hvparams:
10209
      instance.hvparams = self.hv_inst
10210
      for key, val in self.op.hvparams.iteritems():
10211
        result.append(("hv/%s" % key, val))
10212

    
10213
    # beparams changes
10214
    if self.op.beparams:
10215
      instance.beparams = self.be_inst
10216
      for key, val in self.op.beparams.iteritems():
10217
        result.append(("be/%s" % key, val))
10218

    
10219
    # OS change
10220
    if self.op.os_name:
10221
      instance.os = self.op.os_name
10222

    
10223
    # osparams changes
10224
    if self.op.osparams:
10225
      instance.osparams = self.os_inst
10226
      for key, val in self.op.osparams.iteritems():
10227
        result.append(("os/%s" % key, val))
10228

    
10229
    self.cfg.Update(instance, feedback_fn)
10230

    
10231
    return result
10232

    
10233
  _DISK_CONVERSIONS = {
10234
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10235
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10236
    }
10237

    
10238

    
10239
class LUBackupQuery(NoHooksLU):
10240
  """Query the exports list
10241

10242
  """
10243
  REQ_BGL = False
10244

    
10245
  def ExpandNames(self):
10246
    self.needed_locks = {}
10247
    self.share_locks[locking.LEVEL_NODE] = 1
10248
    if not self.op.nodes:
10249
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10250
    else:
10251
      self.needed_locks[locking.LEVEL_NODE] = \
10252
        _GetWantedNodes(self, self.op.nodes)
10253

    
10254
  def Exec(self, feedback_fn):
10255
    """Compute the list of all the exported system images.
10256

10257
    @rtype: dict
10258
    @return: a dictionary with the structure node->(export-list)
10259
        where export-list is a list of the instances exported on
10260
        that node.
10261

10262
    """
10263
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
10264
    rpcresult = self.rpc.call_export_list(self.nodes)
10265
    result = {}
10266
    for node in rpcresult:
10267
      if rpcresult[node].fail_msg:
10268
        result[node] = False
10269
      else:
10270
        result[node] = rpcresult[node].payload
10271

    
10272
    return result
10273

    
10274

    
10275
class LUBackupPrepare(NoHooksLU):
10276
  """Prepares an instance for an export and returns useful information.
10277

10278
  """
10279
  REQ_BGL = False
10280

    
10281
  def ExpandNames(self):
10282
    self._ExpandAndLockInstance()
10283

    
10284
  def CheckPrereq(self):
10285
    """Check prerequisites.
10286

10287
    """
10288
    instance_name = self.op.instance_name
10289

    
10290
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10291
    assert self.instance is not None, \
10292
          "Cannot retrieve locked instance %s" % self.op.instance_name
10293
    _CheckNodeOnline(self, self.instance.primary_node)
10294

    
10295
    self._cds = _GetClusterDomainSecret()
10296

    
10297
  def Exec(self, feedback_fn):
10298
    """Prepares an instance for an export.
10299

10300
    """
10301
    instance = self.instance
10302

    
10303
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10304
      salt = utils.GenerateSecret(8)
10305

    
10306
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10307
      result = self.rpc.call_x509_cert_create(instance.primary_node,
10308
                                              constants.RIE_CERT_VALIDITY)
10309
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
10310

    
10311
      (name, cert_pem) = result.payload
10312

    
10313
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10314
                                             cert_pem)
10315

    
10316
      return {
10317
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10318
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10319
                          salt),
10320
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10321
        }
10322

    
10323
    return None
10324

    
10325

    
10326
class LUBackupExport(LogicalUnit):
10327
  """Export an instance to an image in the cluster.
10328

10329
  """
10330
  HPATH = "instance-export"
10331
  HTYPE = constants.HTYPE_INSTANCE
10332
  REQ_BGL = False
10333

    
10334
  def CheckArguments(self):
10335
    """Check the arguments.
10336

10337
    """
10338
    self.x509_key_name = self.op.x509_key_name
10339
    self.dest_x509_ca_pem = self.op.destination_x509_ca
10340

    
10341
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10342
      if not self.x509_key_name:
10343
        raise errors.OpPrereqError("Missing X509 key name for encryption",
10344
                                   errors.ECODE_INVAL)
10345

    
10346
      if not self.dest_x509_ca_pem:
10347
        raise errors.OpPrereqError("Missing destination X509 CA",
10348
                                   errors.ECODE_INVAL)
10349

    
10350
  def ExpandNames(self):
10351
    self._ExpandAndLockInstance()
10352

    
10353
    # Lock all nodes for local exports
10354
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10355
      # FIXME: lock only instance primary and destination node
10356
      #
10357
      # Sad but true, for now we have do lock all nodes, as we don't know where
10358
      # the previous export might be, and in this LU we search for it and
10359
      # remove it from its current node. In the future we could fix this by:
10360
      #  - making a tasklet to search (share-lock all), then create the
10361
      #    new one, then one to remove, after
10362
      #  - removing the removal operation altogether
10363
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10364

    
10365
  def DeclareLocks(self, level):
10366
    """Last minute lock declaration."""
10367
    # All nodes are locked anyway, so nothing to do here.
10368

    
10369
  def BuildHooksEnv(self):
10370
    """Build hooks env.
10371

10372
    This will run on the master, primary node and target node.
10373

10374
    """
10375
    env = {
10376
      "EXPORT_MODE": self.op.mode,
10377
      "EXPORT_NODE": self.op.target_node,
10378
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10379
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10380
      # TODO: Generic function for boolean env variables
10381
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10382
      }
10383

    
10384
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10385

    
10386
    return env
10387

    
10388
  def BuildHooksNodes(self):
10389
    """Build hooks nodes.
10390

10391
    """
10392
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10393

    
10394
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10395
      nl.append(self.op.target_node)
10396

    
10397
    return (nl, nl)
10398

    
10399
  def CheckPrereq(self):
10400
    """Check prerequisites.
10401

10402
    This checks that the instance and node names are valid.
10403

10404
    """
10405
    instance_name = self.op.instance_name
10406

    
10407
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10408
    assert self.instance is not None, \
10409
          "Cannot retrieve locked instance %s" % self.op.instance_name
10410
    _CheckNodeOnline(self, self.instance.primary_node)
10411

    
10412
    if (self.op.remove_instance and self.instance.admin_up and
10413
        not self.op.shutdown):
10414
      raise errors.OpPrereqError("Can not remove instance without shutting it"
10415
                                 " down before")
10416

    
10417
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10418
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10419
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10420
      assert self.dst_node is not None
10421

    
10422
      _CheckNodeOnline(self, self.dst_node.name)
10423
      _CheckNodeNotDrained(self, self.dst_node.name)
10424

    
10425
      self._cds = None
10426
      self.dest_disk_info = None
10427
      self.dest_x509_ca = None
10428

    
10429
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10430
      self.dst_node = None
10431

    
10432
      if len(self.op.target_node) != len(self.instance.disks):
10433
        raise errors.OpPrereqError(("Received destination information for %s"
10434
                                    " disks, but instance %s has %s disks") %
10435
                                   (len(self.op.target_node), instance_name,
10436
                                    len(self.instance.disks)),
10437
                                   errors.ECODE_INVAL)
10438

    
10439
      cds = _GetClusterDomainSecret()
10440

    
10441
      # Check X509 key name
10442
      try:
10443
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10444
      except (TypeError, ValueError), err:
10445
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10446

    
10447
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10448
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10449
                                   errors.ECODE_INVAL)
10450

    
10451
      # Load and verify CA
10452
      try:
10453
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10454
      except OpenSSL.crypto.Error, err:
10455
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10456
                                   (err, ), errors.ECODE_INVAL)
10457

    
10458
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10459
      if errcode is not None:
10460
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10461
                                   (msg, ), errors.ECODE_INVAL)
10462

    
10463
      self.dest_x509_ca = cert
10464

    
10465
      # Verify target information
10466
      disk_info = []
10467
      for idx, disk_data in enumerate(self.op.target_node):
10468
        try:
10469
          (host, port, magic) = \
10470
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10471
        except errors.GenericError, err:
10472
          raise errors.OpPrereqError("Target info for disk %s: %s" %
10473
                                     (idx, err), errors.ECODE_INVAL)
10474

    
10475
        disk_info.append((host, port, magic))
10476

    
10477
      assert len(disk_info) == len(self.op.target_node)
10478
      self.dest_disk_info = disk_info
10479

    
10480
    else:
10481
      raise errors.ProgrammerError("Unhandled export mode %r" %
10482
                                   self.op.mode)
10483

    
10484
    # instance disk type verification
10485
    # TODO: Implement export support for file-based disks
10486
    for disk in self.instance.disks:
10487
      if disk.dev_type == constants.LD_FILE:
10488
        raise errors.OpPrereqError("Export not supported for instances with"
10489
                                   " file-based disks", errors.ECODE_INVAL)
10490

    
10491
  def _CleanupExports(self, feedback_fn):
10492
    """Removes exports of current instance from all other nodes.
10493

10494
    If an instance in a cluster with nodes A..D was exported to node C, its
10495
    exports will be removed from the nodes A, B and D.
10496

10497
    """
10498
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
10499

    
10500
    nodelist = self.cfg.GetNodeList()
10501
    nodelist.remove(self.dst_node.name)
10502

    
10503
    # on one-node clusters nodelist will be empty after the removal
10504
    # if we proceed the backup would be removed because OpBackupQuery
10505
    # substitutes an empty list with the full cluster node list.
10506
    iname = self.instance.name
10507
    if nodelist:
10508
      feedback_fn("Removing old exports for instance %s" % iname)
10509
      exportlist = self.rpc.call_export_list(nodelist)
10510
      for node in exportlist:
10511
        if exportlist[node].fail_msg:
10512
          continue
10513
        if iname in exportlist[node].payload:
10514
          msg = self.rpc.call_export_remove(node, iname).fail_msg
10515
          if msg:
10516
            self.LogWarning("Could not remove older export for instance %s"
10517
                            " on node %s: %s", iname, node, msg)
10518

    
10519
  def Exec(self, feedback_fn):
10520
    """Export an instance to an image in the cluster.
10521

10522
    """
10523
    assert self.op.mode in constants.EXPORT_MODES
10524

    
10525
    instance = self.instance
10526
    src_node = instance.primary_node
10527

    
10528
    if self.op.shutdown:
10529
      # shutdown the instance, but not the disks
10530
      feedback_fn("Shutting down instance %s" % instance.name)
10531
      result = self.rpc.call_instance_shutdown(src_node, instance,
10532
                                               self.op.shutdown_timeout)
10533
      # TODO: Maybe ignore failures if ignore_remove_failures is set
10534
      result.Raise("Could not shutdown instance %s on"
10535
                   " node %s" % (instance.name, src_node))
10536

    
10537
    # set the disks ID correctly since call_instance_start needs the
10538
    # correct drbd minor to create the symlinks
10539
    for disk in instance.disks:
10540
      self.cfg.SetDiskID(disk, src_node)
10541

    
10542
    activate_disks = (not instance.admin_up)
10543

    
10544
    if activate_disks:
10545
      # Activate the instance disks if we'exporting a stopped instance
10546
      feedback_fn("Activating disks for %s" % instance.name)
10547
      _StartInstanceDisks(self, instance, None)
10548

    
10549
    try:
10550
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10551
                                                     instance)
10552

    
10553
      helper.CreateSnapshots()
10554
      try:
10555
        if (self.op.shutdown and instance.admin_up and
10556
            not self.op.remove_instance):
10557
          assert not activate_disks
10558
          feedback_fn("Starting instance %s" % instance.name)
10559
          result = self.rpc.call_instance_start(src_node, instance, None, None)
10560
          msg = result.fail_msg
10561
          if msg:
10562
            feedback_fn("Failed to start instance: %s" % msg)
10563
            _ShutdownInstanceDisks(self, instance)
10564
            raise errors.OpExecError("Could not start instance: %s" % msg)
10565

    
10566
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
10567
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10568
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10569
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
10570
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10571

    
10572
          (key_name, _, _) = self.x509_key_name
10573

    
10574
          dest_ca_pem = \
10575
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10576
                                            self.dest_x509_ca)
10577

    
10578
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10579
                                                     key_name, dest_ca_pem,
10580
                                                     timeouts)
10581
      finally:
10582
        helper.Cleanup()
10583

    
10584
      # Check for backwards compatibility
10585
      assert len(dresults) == len(instance.disks)
10586
      assert compat.all(isinstance(i, bool) for i in dresults), \
10587
             "Not all results are boolean: %r" % dresults
10588

    
10589
    finally:
10590
      if activate_disks:
10591
        feedback_fn("Deactivating disks for %s" % instance.name)
10592
        _ShutdownInstanceDisks(self, instance)
10593

    
10594
    if not (compat.all(dresults) and fin_resu):
10595
      failures = []
10596
      if not fin_resu:
10597
        failures.append("export finalization")
10598
      if not compat.all(dresults):
10599
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10600
                               if not dsk)
10601
        failures.append("disk export: disk(s) %s" % fdsk)
10602

    
10603
      raise errors.OpExecError("Export failed, errors in %s" %
10604
                               utils.CommaJoin(failures))
10605

    
10606
    # At this point, the export was successful, we can cleanup/finish
10607

    
10608
    # Remove instance if requested
10609
    if self.op.remove_instance:
10610
      feedback_fn("Removing instance %s" % instance.name)
10611
      _RemoveInstance(self, feedback_fn, instance,
10612
                      self.op.ignore_remove_failures)
10613

    
10614
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10615
      self._CleanupExports(feedback_fn)
10616

    
10617
    return fin_resu, dresults
10618

    
10619

    
10620
class LUBackupRemove(NoHooksLU):
10621
  """Remove exports related to the named instance.
10622

10623
  """
10624
  REQ_BGL = False
10625

    
10626
  def ExpandNames(self):
10627
    self.needed_locks = {}
10628
    # We need all nodes to be locked in order for RemoveExport to work, but we
10629
    # don't need to lock the instance itself, as nothing will happen to it (and
10630
    # we can remove exports also for a removed instance)
10631
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10632

    
10633
  def Exec(self, feedback_fn):
10634
    """Remove any export.
10635

10636
    """
10637
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10638
    # If the instance was not found we'll try with the name that was passed in.
10639
    # This will only work if it was an FQDN, though.
10640
    fqdn_warn = False
10641
    if not instance_name:
10642
      fqdn_warn = True
10643
      instance_name = self.op.instance_name
10644

    
10645
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10646
    exportlist = self.rpc.call_export_list(locked_nodes)
10647
    found = False
10648
    for node in exportlist:
10649
      msg = exportlist[node].fail_msg
10650
      if msg:
10651
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10652
        continue
10653
      if instance_name in exportlist[node].payload:
10654
        found = True
10655
        result = self.rpc.call_export_remove(node, instance_name)
10656
        msg = result.fail_msg
10657
        if msg:
10658
          logging.error("Could not remove export for instance %s"
10659
                        " on node %s: %s", instance_name, node, msg)
10660

    
10661
    if fqdn_warn and not found:
10662
      feedback_fn("Export not found. If trying to remove an export belonging"
10663
                  " to a deleted instance please use its Fully Qualified"
10664
                  " Domain Name.")
10665

    
10666

    
10667
class LUGroupAdd(LogicalUnit):
10668
  """Logical unit for creating node groups.
10669

10670
  """
10671
  HPATH = "group-add"
10672
  HTYPE = constants.HTYPE_GROUP
10673
  REQ_BGL = False
10674

    
10675
  def ExpandNames(self):
10676
    # We need the new group's UUID here so that we can create and acquire the
10677
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10678
    # that it should not check whether the UUID exists in the configuration.
10679
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10680
    self.needed_locks = {}
10681
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10682

    
10683
  def CheckPrereq(self):
10684
    """Check prerequisites.
10685

10686
    This checks that the given group name is not an existing node group
10687
    already.
10688

10689
    """
10690
    try:
10691
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10692
    except errors.OpPrereqError:
10693
      pass
10694
    else:
10695
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10696
                                 " node group (UUID: %s)" %
10697
                                 (self.op.group_name, existing_uuid),
10698
                                 errors.ECODE_EXISTS)
10699

    
10700
    if self.op.ndparams:
10701
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10702

    
10703
  def BuildHooksEnv(self):
10704
    """Build hooks env.
10705

10706
    """
10707
    return {
10708
      "GROUP_NAME": self.op.group_name,
10709
      }
10710

    
10711
  def BuildHooksNodes(self):
10712
    """Build hooks nodes.
10713

10714
    """
10715
    mn = self.cfg.GetMasterNode()
10716
    return ([mn], [mn])
10717

    
10718
  def Exec(self, feedback_fn):
10719
    """Add the node group to the cluster.
10720

10721
    """
10722
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10723
                                  uuid=self.group_uuid,
10724
                                  alloc_policy=self.op.alloc_policy,
10725
                                  ndparams=self.op.ndparams)
10726

    
10727
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10728
    del self.remove_locks[locking.LEVEL_NODEGROUP]
10729

    
10730

    
10731
class LUGroupAssignNodes(NoHooksLU):
10732
  """Logical unit for assigning nodes to groups.
10733

10734
  """
10735
  REQ_BGL = False
10736

    
10737
  def ExpandNames(self):
10738
    # These raise errors.OpPrereqError on their own:
10739
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10740
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10741

    
10742
    # We want to lock all the affected nodes and groups. We have readily
10743
    # available the list of nodes, and the *destination* group. To gather the
10744
    # list of "source" groups, we need to fetch node information.
10745
    self.node_data = self.cfg.GetAllNodesInfo()
10746
    affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10747
    affected_groups.add(self.group_uuid)
10748

    
10749
    self.needed_locks = {
10750
      locking.LEVEL_NODEGROUP: list(affected_groups),
10751
      locking.LEVEL_NODE: self.op.nodes,
10752
      }
10753

    
10754
  def CheckPrereq(self):
10755
    """Check prerequisites.
10756

10757
    """
10758
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10759
    instance_data = self.cfg.GetAllInstancesInfo()
10760

    
10761
    if self.group is None:
10762
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10763
                               (self.op.group_name, self.group_uuid))
10764

    
10765
    (new_splits, previous_splits) = \
10766
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10767
                                             for node in self.op.nodes],
10768
                                            self.node_data, instance_data)
10769

    
10770
    if new_splits:
10771
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10772

    
10773
      if not self.op.force:
10774
        raise errors.OpExecError("The following instances get split by this"
10775
                                 " change and --force was not given: %s" %
10776
                                 fmt_new_splits)
10777
      else:
10778
        self.LogWarning("This operation will split the following instances: %s",
10779
                        fmt_new_splits)
10780

    
10781
        if previous_splits:
10782
          self.LogWarning("In addition, these already-split instances continue"
10783
                          " to be spit across groups: %s",
10784
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
10785

    
10786
  def Exec(self, feedback_fn):
10787
    """Assign nodes to a new group.
10788

10789
    """
10790
    for node in self.op.nodes:
10791
      self.node_data[node].group = self.group_uuid
10792

    
10793
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10794

    
10795
  @staticmethod
10796
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10797
    """Check for split instances after a node assignment.
10798

10799
    This method considers a series of node assignments as an atomic operation,
10800
    and returns information about split instances after applying the set of
10801
    changes.
10802

10803
    In particular, it returns information about newly split instances, and
10804
    instances that were already split, and remain so after the change.
10805

10806
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10807
    considered.
10808

10809
    @type changes: list of (node_name, new_group_uuid) pairs.
10810
    @param changes: list of node assignments to consider.
10811
    @param node_data: a dict with data for all nodes
10812
    @param instance_data: a dict with all instances to consider
10813
    @rtype: a two-tuple
10814
    @return: a list of instances that were previously okay and result split as a
10815
      consequence of this change, and a list of instances that were previously
10816
      split and this change does not fix.
10817

10818
    """
10819
    changed_nodes = dict((node, group) for node, group in changes
10820
                         if node_data[node].group != group)
10821

    
10822
    all_split_instances = set()
10823
    previously_split_instances = set()
10824

    
10825
    def InstanceNodes(instance):
10826
      return [instance.primary_node] + list(instance.secondary_nodes)
10827

    
10828
    for inst in instance_data.values():
10829
      if inst.disk_template not in constants.DTS_INT_MIRROR:
10830
        continue
10831

    
10832
      instance_nodes = InstanceNodes(inst)
10833

    
10834
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
10835
        previously_split_instances.add(inst.name)
10836

    
10837
      if len(set(changed_nodes.get(node, node_data[node].group)
10838
                 for node in instance_nodes)) > 1:
10839
        all_split_instances.add(inst.name)
10840

    
10841
    return (list(all_split_instances - previously_split_instances),
10842
            list(previously_split_instances & all_split_instances))
10843

    
10844

    
10845
class _GroupQuery(_QueryBase):
10846
  FIELDS = query.GROUP_FIELDS
10847

    
10848
  def ExpandNames(self, lu):
10849
    lu.needed_locks = {}
10850

    
10851
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10852
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10853

    
10854
    if not self.names:
10855
      self.wanted = [name_to_uuid[name]
10856
                     for name in utils.NiceSort(name_to_uuid.keys())]
10857
    else:
10858
      # Accept names to be either names or UUIDs.
10859
      missing = []
10860
      self.wanted = []
10861
      all_uuid = frozenset(self._all_groups.keys())
10862

    
10863
      for name in self.names:
10864
        if name in all_uuid:
10865
          self.wanted.append(name)
10866
        elif name in name_to_uuid:
10867
          self.wanted.append(name_to_uuid[name])
10868
        else:
10869
          missing.append(name)
10870

    
10871
      if missing:
10872
        raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10873
                                   errors.ECODE_NOENT)
10874

    
10875
  def DeclareLocks(self, lu, level):
10876
    pass
10877

    
10878
  def _GetQueryData(self, lu):
10879
    """Computes the list of node groups and their attributes.
10880

10881
    """
10882
    do_nodes = query.GQ_NODE in self.requested_data
10883
    do_instances = query.GQ_INST in self.requested_data
10884

    
10885
    group_to_nodes = None
10886
    group_to_instances = None
10887

    
10888
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10889
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10890
    # latter GetAllInstancesInfo() is not enough, for we have to go through
10891
    # instance->node. Hence, we will need to process nodes even if we only need
10892
    # instance information.
10893
    if do_nodes or do_instances:
10894
      all_nodes = lu.cfg.GetAllNodesInfo()
10895
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10896
      node_to_group = {}
10897

    
10898
      for node in all_nodes.values():
10899
        if node.group in group_to_nodes:
10900
          group_to_nodes[node.group].append(node.name)
10901
          node_to_group[node.name] = node.group
10902

    
10903
      if do_instances:
10904
        all_instances = lu.cfg.GetAllInstancesInfo()
10905
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
10906

    
10907
        for instance in all_instances.values():
10908
          node = instance.primary_node
10909
          if node in node_to_group:
10910
            group_to_instances[node_to_group[node]].append(instance.name)
10911

    
10912
        if not do_nodes:
10913
          # Do not pass on node information if it was not requested.
10914
          group_to_nodes = None
10915

    
10916
    return query.GroupQueryData([self._all_groups[uuid]
10917
                                 for uuid in self.wanted],
10918
                                group_to_nodes, group_to_instances)
10919

    
10920

    
10921
class LUGroupQuery(NoHooksLU):
10922
  """Logical unit for querying node groups.
10923

10924
  """
10925
  REQ_BGL = False
10926

    
10927
  def CheckArguments(self):
10928
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
10929
                          self.op.output_fields, False)
10930

    
10931
  def ExpandNames(self):
10932
    self.gq.ExpandNames(self)
10933

    
10934
  def Exec(self, feedback_fn):
10935
    return self.gq.OldStyleQuery(self)
10936

    
10937

    
10938
class LUGroupSetParams(LogicalUnit):
10939
  """Modifies the parameters of a node group.
10940

10941
  """
10942
  HPATH = "group-modify"
10943
  HTYPE = constants.HTYPE_GROUP
10944
  REQ_BGL = False
10945

    
10946
  def CheckArguments(self):
10947
    all_changes = [
10948
      self.op.ndparams,
10949
      self.op.alloc_policy,
10950
      ]
10951

    
10952
    if all_changes.count(None) == len(all_changes):
10953
      raise errors.OpPrereqError("Please pass at least one modification",
10954
                                 errors.ECODE_INVAL)
10955

    
10956
  def ExpandNames(self):
10957
    # This raises errors.OpPrereqError on its own:
10958
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10959

    
10960
    self.needed_locks = {
10961
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10962
      }
10963

    
10964
  def CheckPrereq(self):
10965
    """Check prerequisites.
10966

10967
    """
10968
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10969

    
10970
    if self.group is None:
10971
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10972
                               (self.op.group_name, self.group_uuid))
10973

    
10974
    if self.op.ndparams:
10975
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10976
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10977
      self.new_ndparams = new_ndparams
10978

    
10979
  def BuildHooksEnv(self):
10980
    """Build hooks env.
10981

10982
    """
10983
    return {
10984
      "GROUP_NAME": self.op.group_name,
10985
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
10986
      }
10987

    
10988
  def BuildHooksNodes(self):
10989
    """Build hooks nodes.
10990

10991
    """
10992
    mn = self.cfg.GetMasterNode()
10993
    return ([mn], [mn])
10994

    
10995
  def Exec(self, feedback_fn):
10996
    """Modifies the node group.
10997

10998
    """
10999
    result = []
11000

    
11001
    if self.op.ndparams:
11002
      self.group.ndparams = self.new_ndparams
11003
      result.append(("ndparams", str(self.group.ndparams)))
11004

    
11005
    if self.op.alloc_policy:
11006
      self.group.alloc_policy = self.op.alloc_policy
11007

    
11008
    self.cfg.Update(self.group, feedback_fn)
11009
    return result
11010

    
11011

    
11012

    
11013
class LUGroupRemove(LogicalUnit):
11014
  HPATH = "group-remove"
11015
  HTYPE = constants.HTYPE_GROUP
11016
  REQ_BGL = False
11017

    
11018
  def ExpandNames(self):
11019
    # This will raises errors.OpPrereqError on its own:
11020
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11021
    self.needed_locks = {
11022
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11023
      }
11024

    
11025
  def CheckPrereq(self):
11026
    """Check prerequisites.
11027

11028
    This checks that the given group name exists as a node group, that is
11029
    empty (i.e., contains no nodes), and that is not the last group of the
11030
    cluster.
11031

11032
    """
11033
    # Verify that the group is empty.
11034
    group_nodes = [node.name
11035
                   for node in self.cfg.GetAllNodesInfo().values()
11036
                   if node.group == self.group_uuid]
11037

    
11038
    if group_nodes:
11039
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
11040
                                 " nodes: %s" %
11041
                                 (self.op.group_name,
11042
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
11043
                                 errors.ECODE_STATE)
11044

    
11045
    # Verify the cluster would not be left group-less.
11046
    if len(self.cfg.GetNodeGroupList()) == 1:
11047
      raise errors.OpPrereqError("Group '%s' is the only group,"
11048
                                 " cannot be removed" %
11049
                                 self.op.group_name,
11050
                                 errors.ECODE_STATE)
11051

    
11052
  def BuildHooksEnv(self):
11053
    """Build hooks env.
11054

11055
    """
11056
    return {
11057
      "GROUP_NAME": self.op.group_name,
11058
      }
11059

    
11060
  def BuildHooksNodes(self):
11061
    """Build hooks nodes.
11062

11063
    """
11064
    mn = self.cfg.GetMasterNode()
11065
    return ([mn], [mn])
11066

    
11067
  def Exec(self, feedback_fn):
11068
    """Remove the node group.
11069

11070
    """
11071
    try:
11072
      self.cfg.RemoveNodeGroup(self.group_uuid)
11073
    except errors.ConfigurationError:
11074
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11075
                               (self.op.group_name, self.group_uuid))
11076

    
11077
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11078

    
11079

    
11080
class LUGroupRename(LogicalUnit):
11081
  HPATH = "group-rename"
11082
  HTYPE = constants.HTYPE_GROUP
11083
  REQ_BGL = False
11084

    
11085
  def ExpandNames(self):
11086
    # This raises errors.OpPrereqError on its own:
11087
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11088

    
11089
    self.needed_locks = {
11090
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11091
      }
11092

    
11093
  def CheckPrereq(self):
11094
    """Check prerequisites.
11095

11096
    Ensures requested new name is not yet used.
11097

11098
    """
11099
    try:
11100
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11101
    except errors.OpPrereqError:
11102
      pass
11103
    else:
11104
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11105
                                 " node group (UUID: %s)" %
11106
                                 (self.op.new_name, new_name_uuid),
11107
                                 errors.ECODE_EXISTS)
11108

    
11109
  def BuildHooksEnv(self):
11110
    """Build hooks env.
11111

11112
    """
11113
    return {
11114
      "OLD_NAME": self.op.group_name,
11115
      "NEW_NAME": self.op.new_name,
11116
      }
11117

    
11118
  def BuildHooksNodes(self):
11119
    """Build hooks nodes.
11120

11121
    """
11122
    mn = self.cfg.GetMasterNode()
11123

    
11124
    all_nodes = self.cfg.GetAllNodesInfo()
11125
    all_nodes.pop(mn, None)
11126

    
11127
    run_nodes = [mn]
11128
    run_nodes.extend(node.name for node in all_nodes.values()
11129
                     if node.group == self.group_uuid)
11130

    
11131
    return (run_nodes, run_nodes)
11132

    
11133
  def Exec(self, feedback_fn):
11134
    """Rename the node group.
11135

11136
    """
11137
    group = self.cfg.GetNodeGroup(self.group_uuid)
11138

    
11139
    if group is None:
11140
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11141
                               (self.op.group_name, self.group_uuid))
11142

    
11143
    group.name = self.op.new_name
11144
    self.cfg.Update(group, feedback_fn)
11145

    
11146
    return self.op.new_name
11147

    
11148

    
11149
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11150
  """Generic tags LU.
11151

11152
  This is an abstract class which is the parent of all the other tags LUs.
11153

11154
  """
11155

    
11156
  def ExpandNames(self):
11157
    self.needed_locks = {}
11158
    if self.op.kind == constants.TAG_NODE:
11159
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11160
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
11161
    elif self.op.kind == constants.TAG_INSTANCE:
11162
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11163
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11164

    
11165
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11166
    # not possible to acquire the BGL based on opcode parameters)
11167

    
11168
  def CheckPrereq(self):
11169
    """Check prerequisites.
11170

11171
    """
11172
    if self.op.kind == constants.TAG_CLUSTER:
11173
      self.target = self.cfg.GetClusterInfo()
11174
    elif self.op.kind == constants.TAG_NODE:
11175
      self.target = self.cfg.GetNodeInfo(self.op.name)
11176
    elif self.op.kind == constants.TAG_INSTANCE:
11177
      self.target = self.cfg.GetInstanceInfo(self.op.name)
11178
    else:
11179
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11180
                                 str(self.op.kind), errors.ECODE_INVAL)
11181

    
11182

    
11183
class LUTagsGet(TagsLU):
11184
  """Returns the tags of a given object.
11185

11186
  """
11187
  REQ_BGL = False
11188

    
11189
  def ExpandNames(self):
11190
    TagsLU.ExpandNames(self)
11191

    
11192
    # Share locks as this is only a read operation
11193
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11194

    
11195
  def Exec(self, feedback_fn):
11196
    """Returns the tag list.
11197

11198
    """
11199
    return list(self.target.GetTags())
11200

    
11201

    
11202
class LUTagsSearch(NoHooksLU):
11203
  """Searches the tags for a given pattern.
11204

11205
  """
11206
  REQ_BGL = False
11207

    
11208
  def ExpandNames(self):
11209
    self.needed_locks = {}
11210

    
11211
  def CheckPrereq(self):
11212
    """Check prerequisites.
11213

11214
    This checks the pattern passed for validity by compiling it.
11215

11216
    """
11217
    try:
11218
      self.re = re.compile(self.op.pattern)
11219
    except re.error, err:
11220
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11221
                                 (self.op.pattern, err), errors.ECODE_INVAL)
11222

    
11223
  def Exec(self, feedback_fn):
11224
    """Returns the tag list.
11225

11226
    """
11227
    cfg = self.cfg
11228
    tgts = [("/cluster", cfg.GetClusterInfo())]
11229
    ilist = cfg.GetAllInstancesInfo().values()
11230
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11231
    nlist = cfg.GetAllNodesInfo().values()
11232
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11233
    results = []
11234
    for path, target in tgts:
11235
      for tag in target.GetTags():
11236
        if self.re.search(tag):
11237
          results.append((path, tag))
11238
    return results
11239

    
11240

    
11241
class LUTagsSet(TagsLU):
11242
  """Sets a tag on a given object.
11243

11244
  """
11245
  REQ_BGL = False
11246

    
11247
  def CheckPrereq(self):
11248
    """Check prerequisites.
11249

11250
    This checks the type and length of the tag name and value.
11251

11252
    """
11253
    TagsLU.CheckPrereq(self)
11254
    for tag in self.op.tags:
11255
      objects.TaggableObject.ValidateTag(tag)
11256

    
11257
  def Exec(self, feedback_fn):
11258
    """Sets the tag.
11259

11260
    """
11261
    try:
11262
      for tag in self.op.tags:
11263
        self.target.AddTag(tag)
11264
    except errors.TagError, err:
11265
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
11266
    self.cfg.Update(self.target, feedback_fn)
11267

    
11268

    
11269
class LUTagsDel(TagsLU):
11270
  """Delete a list of tags from a given object.
11271

11272
  """
11273
  REQ_BGL = False
11274

    
11275
  def CheckPrereq(self):
11276
    """Check prerequisites.
11277

11278
    This checks that we have the given tag.
11279

11280
    """
11281
    TagsLU.CheckPrereq(self)
11282
    for tag in self.op.tags:
11283
      objects.TaggableObject.ValidateTag(tag)
11284
    del_tags = frozenset(self.op.tags)
11285
    cur_tags = self.target.GetTags()
11286

    
11287
    diff_tags = del_tags - cur_tags
11288
    if diff_tags:
11289
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
11290
      raise errors.OpPrereqError("Tag(s) %s not found" %
11291
                                 (utils.CommaJoin(diff_names), ),
11292
                                 errors.ECODE_NOENT)
11293

    
11294
  def Exec(self, feedback_fn):
11295
    """Remove the tag from the object.
11296

11297
    """
11298
    for tag in self.op.tags:
11299
      self.target.RemoveTag(tag)
11300
    self.cfg.Update(self.target, feedback_fn)
11301

    
11302

    
11303
class LUTestDelay(NoHooksLU):
11304
  """Sleep for a specified amount of time.
11305

11306
  This LU sleeps on the master and/or nodes for a specified amount of
11307
  time.
11308

11309
  """
11310
  REQ_BGL = False
11311

    
11312
  def ExpandNames(self):
11313
    """Expand names and set required locks.
11314

11315
    This expands the node list, if any.
11316

11317
    """
11318
    self.needed_locks = {}
11319
    if self.op.on_nodes:
11320
      # _GetWantedNodes can be used here, but is not always appropriate to use
11321
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11322
      # more information.
11323
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11324
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11325

    
11326
  def _TestDelay(self):
11327
    """Do the actual sleep.
11328

11329
    """
11330
    if self.op.on_master:
11331
      if not utils.TestDelay(self.op.duration):
11332
        raise errors.OpExecError("Error during master delay test")
11333
    if self.op.on_nodes:
11334
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11335
      for node, node_result in result.items():
11336
        node_result.Raise("Failure during rpc call to node %s" % node)
11337

    
11338
  def Exec(self, feedback_fn):
11339
    """Execute the test delay opcode, with the wanted repetitions.
11340

11341
    """
11342
    if self.op.repeat == 0:
11343
      self._TestDelay()
11344
    else:
11345
      top_value = self.op.repeat - 1
11346
      for i in range(self.op.repeat):
11347
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11348
        self._TestDelay()
11349

    
11350

    
11351
class LUTestJqueue(NoHooksLU):
11352
  """Utility LU to test some aspects of the job queue.
11353

11354
  """
11355
  REQ_BGL = False
11356

    
11357
  # Must be lower than default timeout for WaitForJobChange to see whether it
11358
  # notices changed jobs
11359
  _CLIENT_CONNECT_TIMEOUT = 20.0
11360
  _CLIENT_CONFIRM_TIMEOUT = 60.0
11361

    
11362
  @classmethod
11363
  def _NotifyUsingSocket(cls, cb, errcls):
11364
    """Opens a Unix socket and waits for another program to connect.
11365

11366
    @type cb: callable
11367
    @param cb: Callback to send socket name to client
11368
    @type errcls: class
11369
    @param errcls: Exception class to use for errors
11370

11371
    """
11372
    # Using a temporary directory as there's no easy way to create temporary
11373
    # sockets without writing a custom loop around tempfile.mktemp and
11374
    # socket.bind
11375
    tmpdir = tempfile.mkdtemp()
11376
    try:
11377
      tmpsock = utils.PathJoin(tmpdir, "sock")
11378

    
11379
      logging.debug("Creating temporary socket at %s", tmpsock)
11380
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11381
      try:
11382
        sock.bind(tmpsock)
11383
        sock.listen(1)
11384

    
11385
        # Send details to client
11386
        cb(tmpsock)
11387

    
11388
        # Wait for client to connect before continuing
11389
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11390
        try:
11391
          (conn, _) = sock.accept()
11392
        except socket.error, err:
11393
          raise errcls("Client didn't connect in time (%s)" % err)
11394
      finally:
11395
        sock.close()
11396
    finally:
11397
      # Remove as soon as client is connected
11398
      shutil.rmtree(tmpdir)
11399

    
11400
    # Wait for client to close
11401
    try:
11402
      try:
11403
        # pylint: disable-msg=E1101
11404
        # Instance of '_socketobject' has no ... member
11405
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11406
        conn.recv(1)
11407
      except socket.error, err:
11408
        raise errcls("Client failed to confirm notification (%s)" % err)
11409
    finally:
11410
      conn.close()
11411

    
11412
  def _SendNotification(self, test, arg, sockname):
11413
    """Sends a notification to the client.
11414

11415
    @type test: string
11416
    @param test: Test name
11417
    @param arg: Test argument (depends on test)
11418
    @type sockname: string
11419
    @param sockname: Socket path
11420

11421
    """
11422
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11423

    
11424
  def _Notify(self, prereq, test, arg):
11425
    """Notifies the client of a test.
11426

11427
    @type prereq: bool
11428
    @param prereq: Whether this is a prereq-phase test
11429
    @type test: string
11430
    @param test: Test name
11431
    @param arg: Test argument (depends on test)
11432

11433
    """
11434
    if prereq:
11435
      errcls = errors.OpPrereqError
11436
    else:
11437
      errcls = errors.OpExecError
11438

    
11439
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11440
                                                  test, arg),
11441
                                   errcls)
11442

    
11443
  def CheckArguments(self):
11444
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11445
    self.expandnames_calls = 0
11446

    
11447
  def ExpandNames(self):
11448
    checkargs_calls = getattr(self, "checkargs_calls", 0)
11449
    if checkargs_calls < 1:
11450
      raise errors.ProgrammerError("CheckArguments was not called")
11451

    
11452
    self.expandnames_calls += 1
11453

    
11454
    if self.op.notify_waitlock:
11455
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
11456

    
11457
    self.LogInfo("Expanding names")
11458

    
11459
    # Get lock on master node (just to get a lock, not for a particular reason)
11460
    self.needed_locks = {
11461
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11462
      }
11463

    
11464
  def Exec(self, feedback_fn):
11465
    if self.expandnames_calls < 1:
11466
      raise errors.ProgrammerError("ExpandNames was not called")
11467

    
11468
    if self.op.notify_exec:
11469
      self._Notify(False, constants.JQT_EXEC, None)
11470

    
11471
    self.LogInfo("Executing")
11472

    
11473
    if self.op.log_messages:
11474
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11475
      for idx, msg in enumerate(self.op.log_messages):
11476
        self.LogInfo("Sending log message %s", idx + 1)
11477
        feedback_fn(constants.JQT_MSGPREFIX + msg)
11478
        # Report how many test messages have been sent
11479
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11480

    
11481
    if self.op.fail:
11482
      raise errors.OpExecError("Opcode failure was requested")
11483

    
11484
    return True
11485

    
11486

    
11487
class IAllocator(object):
11488
  """IAllocator framework.
11489

11490
  An IAllocator instance has three sets of attributes:
11491
    - cfg that is needed to query the cluster
11492
    - input data (all members of the _KEYS class attribute are required)
11493
    - four buffer attributes (in|out_data|text), that represent the
11494
      input (to the external script) in text and data structure format,
11495
      and the output from it, again in two formats
11496
    - the result variables from the script (success, info, nodes) for
11497
      easy usage
11498

11499
  """
11500
  # pylint: disable-msg=R0902
11501
  # lots of instance attributes
11502
  _ALLO_KEYS = [
11503
    "name", "mem_size", "disks", "disk_template",
11504
    "os", "tags", "nics", "vcpus", "hypervisor",
11505
    ]
11506
  _RELO_KEYS = [
11507
    "name", "relocate_from",
11508
    ]
11509
  _EVAC_KEYS = [
11510
    "evac_nodes",
11511
    ]
11512

    
11513
  def __init__(self, cfg, rpc, mode, **kwargs):
11514
    self.cfg = cfg
11515
    self.rpc = rpc
11516
    # init buffer variables
11517
    self.in_text = self.out_text = self.in_data = self.out_data = None
11518
    # init all input fields so that pylint is happy
11519
    self.mode = mode
11520
    self.mem_size = self.disks = self.disk_template = None
11521
    self.os = self.tags = self.nics = self.vcpus = None
11522
    self.hypervisor = None
11523
    self.relocate_from = None
11524
    self.name = None
11525
    self.evac_nodes = None
11526
    # computed fields
11527
    self.required_nodes = None
11528
    # init result fields
11529
    self.success = self.info = self.result = None
11530
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11531
      keyset = self._ALLO_KEYS
11532
      fn = self._AddNewInstance
11533
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11534
      keyset = self._RELO_KEYS
11535
      fn = self._AddRelocateInstance
11536
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11537
      keyset = self._EVAC_KEYS
11538
      fn = self._AddEvacuateNodes
11539
    else:
11540
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11541
                                   " IAllocator" % self.mode)
11542
    for key in kwargs:
11543
      if key not in keyset:
11544
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
11545
                                     " IAllocator" % key)
11546
      setattr(self, key, kwargs[key])
11547

    
11548
    for key in keyset:
11549
      if key not in kwargs:
11550
        raise errors.ProgrammerError("Missing input parameter '%s' to"
11551
                                     " IAllocator" % key)
11552
    self._BuildInputData(fn)
11553

    
11554
  def _ComputeClusterData(self):
11555
    """Compute the generic allocator input data.
11556

11557
    This is the data that is independent of the actual operation.
11558

11559
    """
11560
    cfg = self.cfg
11561
    cluster_info = cfg.GetClusterInfo()
11562
    # cluster data
11563
    data = {
11564
      "version": constants.IALLOCATOR_VERSION,
11565
      "cluster_name": cfg.GetClusterName(),
11566
      "cluster_tags": list(cluster_info.GetTags()),
11567
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11568
      # we don't have job IDs
11569
      }
11570
    ninfo = cfg.GetAllNodesInfo()
11571
    iinfo = cfg.GetAllInstancesInfo().values()
11572
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11573

    
11574
    # node data
11575
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
11576

    
11577
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11578
      hypervisor_name = self.hypervisor
11579
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11580
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11581
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11582
      hypervisor_name = cluster_info.enabled_hypervisors[0]
11583

    
11584
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11585
                                        hypervisor_name)
11586
    node_iinfo = \
11587
      self.rpc.call_all_instances_info(node_list,
11588
                                       cluster_info.enabled_hypervisors)
11589

    
11590
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11591

    
11592
    config_ndata = self._ComputeBasicNodeData(ninfo)
11593
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11594
                                                 i_list, config_ndata)
11595
    assert len(data["nodes"]) == len(ninfo), \
11596
        "Incomplete node data computed"
11597

    
11598
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11599

    
11600
    self.in_data = data
11601

    
11602
  @staticmethod
11603
  def _ComputeNodeGroupData(cfg):
11604
    """Compute node groups data.
11605

11606
    """
11607
    ng = {}
11608
    for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11609
      ng[guuid] = {
11610
        "name": gdata.name,
11611
        "alloc_policy": gdata.alloc_policy,
11612
        }
11613
    return ng
11614

    
11615
  @staticmethod
11616
  def _ComputeBasicNodeData(node_cfg):
11617
    """Compute global node data.
11618

11619
    @rtype: dict
11620
    @returns: a dict of name: (node dict, node config)
11621

11622
    """
11623
    node_results = {}
11624
    for ninfo in node_cfg.values():
11625
      # fill in static (config-based) values
11626
      pnr = {
11627
        "tags": list(ninfo.GetTags()),
11628
        "primary_ip": ninfo.primary_ip,
11629
        "secondary_ip": ninfo.secondary_ip,
11630
        "offline": ninfo.offline,
11631
        "drained": ninfo.drained,
11632
        "master_candidate": ninfo.master_candidate,
11633
        "group": ninfo.group,
11634
        "master_capable": ninfo.master_capable,
11635
        "vm_capable": ninfo.vm_capable,
11636
        }
11637

    
11638
      node_results[ninfo.name] = pnr
11639

    
11640
    return node_results
11641

    
11642
  @staticmethod
11643
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11644
                              node_results):
11645
    """Compute global node data.
11646

11647
    @param node_results: the basic node structures as filled from the config
11648

11649
    """
11650
    # make a copy of the current dict
11651
    node_results = dict(node_results)
11652
    for nname, nresult in node_data.items():
11653
      assert nname in node_results, "Missing basic data for node %s" % nname
11654
      ninfo = node_cfg[nname]
11655

    
11656
      if not (ninfo.offline or ninfo.drained):
11657
        nresult.Raise("Can't get data for node %s" % nname)
11658
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11659
                                nname)
11660
        remote_info = nresult.payload
11661

    
11662
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
11663
                     'vg_size', 'vg_free', 'cpu_total']:
11664
          if attr not in remote_info:
11665
            raise errors.OpExecError("Node '%s' didn't return attribute"
11666
                                     " '%s'" % (nname, attr))
11667
          if not isinstance(remote_info[attr], int):
11668
            raise errors.OpExecError("Node '%s' returned invalid value"
11669
                                     " for '%s': %s" %
11670
                                     (nname, attr, remote_info[attr]))
11671
        # compute memory used by primary instances
11672
        i_p_mem = i_p_up_mem = 0
11673
        for iinfo, beinfo in i_list:
11674
          if iinfo.primary_node == nname:
11675
            i_p_mem += beinfo[constants.BE_MEMORY]
11676
            if iinfo.name not in node_iinfo[nname].payload:
11677
              i_used_mem = 0
11678
            else:
11679
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11680
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11681
            remote_info['memory_free'] -= max(0, i_mem_diff)
11682

    
11683
            if iinfo.admin_up:
11684
              i_p_up_mem += beinfo[constants.BE_MEMORY]
11685

    
11686
        # compute memory used by instances
11687
        pnr_dyn = {
11688
          "total_memory": remote_info['memory_total'],
11689
          "reserved_memory": remote_info['memory_dom0'],
11690
          "free_memory": remote_info['memory_free'],
11691
          "total_disk": remote_info['vg_size'],
11692
          "free_disk": remote_info['vg_free'],
11693
          "total_cpus": remote_info['cpu_total'],
11694
          "i_pri_memory": i_p_mem,
11695
          "i_pri_up_memory": i_p_up_mem,
11696
          }
11697
        pnr_dyn.update(node_results[nname])
11698
        node_results[nname] = pnr_dyn
11699

    
11700
    return node_results
11701

    
11702
  @staticmethod
11703
  def _ComputeInstanceData(cluster_info, i_list):
11704
    """Compute global instance data.
11705

11706
    """
11707
    instance_data = {}
11708
    for iinfo, beinfo in i_list:
11709
      nic_data = []
11710
      for nic in iinfo.nics:
11711
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11712
        nic_dict = {"mac": nic.mac,
11713
                    "ip": nic.ip,
11714
                    "mode": filled_params[constants.NIC_MODE],
11715
                    "link": filled_params[constants.NIC_LINK],
11716
                   }
11717
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11718
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11719
        nic_data.append(nic_dict)
11720
      pir = {
11721
        "tags": list(iinfo.GetTags()),
11722
        "admin_up": iinfo.admin_up,
11723
        "vcpus": beinfo[constants.BE_VCPUS],
11724
        "memory": beinfo[constants.BE_MEMORY],
11725
        "os": iinfo.os,
11726
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11727
        "nics": nic_data,
11728
        "disks": [{constants.IDISK_SIZE: dsk.size,
11729
                   constants.IDISK_MODE: dsk.mode}
11730
                  for dsk in iinfo.disks],
11731
        "disk_template": iinfo.disk_template,
11732
        "hypervisor": iinfo.hypervisor,
11733
        }
11734
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11735
                                                 pir["disks"])
11736
      instance_data[iinfo.name] = pir
11737

    
11738
    return instance_data
11739

    
11740
  def _AddNewInstance(self):
11741
    """Add new instance data to allocator structure.
11742

11743
    This in combination with _AllocatorGetClusterData will create the
11744
    correct structure needed as input for the allocator.
11745

11746
    The checks for the completeness of the opcode must have already been
11747
    done.
11748

11749
    """
11750
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11751

    
11752
    if self.disk_template in constants.DTS_INT_MIRROR:
11753
      self.required_nodes = 2
11754
    else:
11755
      self.required_nodes = 1
11756
    request = {
11757
      "name": self.name,
11758
      "disk_template": self.disk_template,
11759
      "tags": self.tags,
11760
      "os": self.os,
11761
      "vcpus": self.vcpus,
11762
      "memory": self.mem_size,
11763
      "disks": self.disks,
11764
      "disk_space_total": disk_space,
11765
      "nics": self.nics,
11766
      "required_nodes": self.required_nodes,
11767
      }
11768
    return request
11769

    
11770
  def _AddRelocateInstance(self):
11771
    """Add relocate instance data to allocator structure.
11772

11773
    This in combination with _IAllocatorGetClusterData will create the
11774
    correct structure needed as input for the allocator.
11775

11776
    The checks for the completeness of the opcode must have already been
11777
    done.
11778

11779
    """
11780
    instance = self.cfg.GetInstanceInfo(self.name)
11781
    if instance is None:
11782
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
11783
                                   " IAllocator" % self.name)
11784

    
11785
    if instance.disk_template not in constants.DTS_MIRRORED:
11786
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11787
                                 errors.ECODE_INVAL)
11788

    
11789
    if instance.disk_template in constants.DTS_INT_MIRROR and \
11790
        len(instance.secondary_nodes) != 1:
11791
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
11792
                                 errors.ECODE_STATE)
11793

    
11794
    self.required_nodes = 1
11795
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
11796
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11797

    
11798
    request = {
11799
      "name": self.name,
11800
      "disk_space_total": disk_space,
11801
      "required_nodes": self.required_nodes,
11802
      "relocate_from": self.relocate_from,
11803
      }
11804
    return request
11805

    
11806
  def _AddEvacuateNodes(self):
11807
    """Add evacuate nodes data to allocator structure.
11808

11809
    """
11810
    request = {
11811
      "evac_nodes": self.evac_nodes
11812
      }
11813
    return request
11814

    
11815
  def _BuildInputData(self, fn):
11816
    """Build input data structures.
11817

11818
    """
11819
    self._ComputeClusterData()
11820

    
11821
    request = fn()
11822
    request["type"] = self.mode
11823
    self.in_data["request"] = request
11824

    
11825
    self.in_text = serializer.Dump(self.in_data)
11826

    
11827
  def Run(self, name, validate=True, call_fn=None):
11828
    """Run an instance allocator and return the results.
11829

11830
    """
11831
    if call_fn is None:
11832
      call_fn = self.rpc.call_iallocator_runner
11833

    
11834
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11835
    result.Raise("Failure while running the iallocator script")
11836

    
11837
    self.out_text = result.payload
11838
    if validate:
11839
      self._ValidateResult()
11840

    
11841
  def _ValidateResult(self):
11842
    """Process the allocator results.
11843

11844
    This will process and if successful save the result in
11845
    self.out_data and the other parameters.
11846

11847
    """
11848
    try:
11849
      rdict = serializer.Load(self.out_text)
11850
    except Exception, err:
11851
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11852

    
11853
    if not isinstance(rdict, dict):
11854
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
11855

    
11856
    # TODO: remove backwards compatiblity in later versions
11857
    if "nodes" in rdict and "result" not in rdict:
11858
      rdict["result"] = rdict["nodes"]
11859
      del rdict["nodes"]
11860

    
11861
    for key in "success", "info", "result":
11862
      if key not in rdict:
11863
        raise errors.OpExecError("Can't parse iallocator results:"
11864
                                 " missing key '%s'" % key)
11865
      setattr(self, key, rdict[key])
11866

    
11867
    if not isinstance(rdict["result"], list):
11868
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11869
                               " is not a list")
11870
    self.out_data = rdict
11871

    
11872

    
11873
class LUTestAllocator(NoHooksLU):
11874
  """Run allocator tests.
11875

11876
  This LU runs the allocator tests
11877

11878
  """
11879
  def CheckPrereq(self):
11880
    """Check prerequisites.
11881

11882
    This checks the opcode parameters depending on the director and mode test.
11883

11884
    """
11885
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11886
      for attr in ["mem_size", "disks", "disk_template",
11887
                   "os", "tags", "nics", "vcpus"]:
11888
        if not hasattr(self.op, attr):
11889
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11890
                                     attr, errors.ECODE_INVAL)
11891
      iname = self.cfg.ExpandInstanceName(self.op.name)
11892
      if iname is not None:
11893
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11894
                                   iname, errors.ECODE_EXISTS)
11895
      if not isinstance(self.op.nics, list):
11896
        raise errors.OpPrereqError("Invalid parameter 'nics'",
11897
                                   errors.ECODE_INVAL)
11898
      if not isinstance(self.op.disks, list):
11899
        raise errors.OpPrereqError("Invalid parameter 'disks'",
11900
                                   errors.ECODE_INVAL)
11901
      for row in self.op.disks:
11902
        if (not isinstance(row, dict) or
11903
            "size" not in row or
11904
            not isinstance(row["size"], int) or
11905
            "mode" not in row or
11906
            row["mode"] not in ['r', 'w']):
11907
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
11908
                                     " parameter", errors.ECODE_INVAL)
11909
      if self.op.hypervisor is None:
11910
        self.op.hypervisor = self.cfg.GetHypervisorType()
11911
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11912
      fname = _ExpandInstanceName(self.cfg, self.op.name)
11913
      self.op.name = fname
11914
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11915
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11916
      if not hasattr(self.op, "evac_nodes"):
11917
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11918
                                   " opcode input", errors.ECODE_INVAL)
11919
    else:
11920
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11921
                                 self.op.mode, errors.ECODE_INVAL)
11922

    
11923
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11924
      if self.op.allocator is None:
11925
        raise errors.OpPrereqError("Missing allocator name",
11926
                                   errors.ECODE_INVAL)
11927
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11928
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
11929
                                 self.op.direction, errors.ECODE_INVAL)
11930

    
11931
  def Exec(self, feedback_fn):
11932
    """Run the allocator test.
11933

11934
    """
11935
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11936
      ial = IAllocator(self.cfg, self.rpc,
11937
                       mode=self.op.mode,
11938
                       name=self.op.name,
11939
                       mem_size=self.op.mem_size,
11940
                       disks=self.op.disks,
11941
                       disk_template=self.op.disk_template,
11942
                       os=self.op.os,
11943
                       tags=self.op.tags,
11944
                       nics=self.op.nics,
11945
                       vcpus=self.op.vcpus,
11946
                       hypervisor=self.op.hypervisor,
11947
                       )
11948
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11949
      ial = IAllocator(self.cfg, self.rpc,
11950
                       mode=self.op.mode,
11951
                       name=self.op.name,
11952
                       relocate_from=list(self.relocate_from),
11953
                       )
11954
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11955
      ial = IAllocator(self.cfg, self.rpc,
11956
                       mode=self.op.mode,
11957
                       evac_nodes=self.op.evac_nodes)
11958
    else:
11959
      raise errors.ProgrammerError("Uncatched mode %s in"
11960
                                   " LUTestAllocator.Exec", self.op.mode)
11961

    
11962
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
11963
      result = ial.in_text
11964
    else:
11965
      ial.Run(self.op.allocator, validate=False)
11966
      result = ial.out_text
11967
    return result
11968

    
11969

    
11970
#: Query type implementations
11971
_QUERY_IMPL = {
11972
  constants.QR_INSTANCE: _InstanceQuery,
11973
  constants.QR_NODE: _NodeQuery,
11974
  constants.QR_GROUP: _GroupQuery,
11975
  constants.QR_OS: _OsQuery,
11976
  }
11977

    
11978
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
11979

    
11980

    
11981
def _GetQueryImplementation(name):
11982
  """Returns the implemtnation for a query type.
11983

11984
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
11985

11986
  """
11987
  try:
11988
    return _QUERY_IMPL[name]
11989
  except KeyError:
11990
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11991
                               errors.ECODE_INVAL)