Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 342f9172

History | View | Annotate | Download (421.5 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import query
58
from ganeti import qlang
59
from ganeti import opcodes
60

    
61
import ganeti.masterd.instance # pylint: disable-msg=W0611
62

    
63

    
64
def _SupportsOob(cfg, node):
65
  """Tells if node supports OOB.
66

67
  @type cfg: L{config.ConfigWriter}
68
  @param cfg: The cluster configuration
69
  @type node: L{objects.Node}
70
  @param node: The node
71
  @return: The OOB script if supported or an empty string otherwise
72

73
  """
74
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
75

    
76

    
77
class ResultWithJobs:
78
  """Data container for LU results with jobs.
79

80
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
81
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
82
  contained in the C{jobs} attribute and include the job IDs in the opcode
83
  result.
84

85
  """
86
  def __init__(self, jobs, **kwargs):
87
    """Initializes this class.
88

89
    Additional return values can be specified as keyword arguments.
90

91
    @type jobs: list of lists of L{opcode.OpCode}
92
    @param jobs: A list of lists of opcode objects
93

94
    """
95
    self.jobs = jobs
96
    self.other = kwargs
97

    
98

    
99
class LogicalUnit(object):
100
  """Logical Unit base class.
101

102
  Subclasses must follow these rules:
103
    - implement ExpandNames
104
    - implement CheckPrereq (except when tasklets are used)
105
    - implement Exec (except when tasklets are used)
106
    - implement BuildHooksEnv
107
    - implement BuildHooksNodes
108
    - redefine HPATH and HTYPE
109
    - optionally redefine their run requirements:
110
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
111

112
  Note that all commands require root permissions.
113

114
  @ivar dry_run_result: the value (if any) that will be returned to the caller
115
      in dry-run mode (signalled by opcode dry_run parameter)
116

117
  """
118
  HPATH = None
119
  HTYPE = None
120
  REQ_BGL = True
121

    
122
  def __init__(self, processor, op, context, rpc):
123
    """Constructor for LogicalUnit.
124

125
    This needs to be overridden in derived classes in order to check op
126
    validity.
127

128
    """
129
    self.proc = processor
130
    self.op = op
131
    self.cfg = context.cfg
132
    self.context = context
133
    self.rpc = rpc
134
    # Dicts used to declare locking needs to mcpu
135
    self.needed_locks = None
136
    self.acquired_locks = {}
137
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
138
    self.add_locks = {}
139
    self.remove_locks = {}
140
    # Used to force good behavior when calling helper functions
141
    self.recalculate_locks = {}
142
    self.__ssh = None
143
    # logging
144
    self.Log = processor.Log # pylint: disable-msg=C0103
145
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
146
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
147
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
148
    # support for dry-run
149
    self.dry_run_result = None
150
    # support for generic debug attribute
151
    if (not hasattr(self.op, "debug_level") or
152
        not isinstance(self.op.debug_level, int)):
153
      self.op.debug_level = 0
154

    
155
    # Tasklets
156
    self.tasklets = None
157

    
158
    # Validate opcode parameters and set defaults
159
    self.op.Validate(True)
160

    
161
    self.CheckArguments()
162

    
163
  def __GetSSH(self):
164
    """Returns the SshRunner object
165

166
    """
167
    if not self.__ssh:
168
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
169
    return self.__ssh
170

    
171
  ssh = property(fget=__GetSSH)
172

    
173
  def CheckArguments(self):
174
    """Check syntactic validity for the opcode arguments.
175

176
    This method is for doing a simple syntactic check and ensure
177
    validity of opcode parameters, without any cluster-related
178
    checks. While the same can be accomplished in ExpandNames and/or
179
    CheckPrereq, doing these separate is better because:
180

181
      - ExpandNames is left as as purely a lock-related function
182
      - CheckPrereq is run after we have acquired locks (and possible
183
        waited for them)
184

185
    The function is allowed to change the self.op attribute so that
186
    later methods can no longer worry about missing parameters.
187

188
    """
189
    pass
190

    
191
  def ExpandNames(self):
192
    """Expand names for this LU.
193

194
    This method is called before starting to execute the opcode, and it should
195
    update all the parameters of the opcode to their canonical form (e.g. a
196
    short node name must be fully expanded after this method has successfully
197
    completed). This way locking, hooks, logging, etc. can work correctly.
198

199
    LUs which implement this method must also populate the self.needed_locks
200
    member, as a dict with lock levels as keys, and a list of needed lock names
201
    as values. Rules:
202

203
      - use an empty dict if you don't need any lock
204
      - if you don't need any lock at a particular level omit that level
205
      - don't put anything for the BGL level
206
      - if you want all locks at a level use locking.ALL_SET as a value
207

208
    If you need to share locks (rather than acquire them exclusively) at one
209
    level you can modify self.share_locks, setting a true value (usually 1) for
210
    that level. By default locks are not shared.
211

212
    This function can also define a list of tasklets, which then will be
213
    executed in order instead of the usual LU-level CheckPrereq and Exec
214
    functions, if those are not defined by the LU.
215

216
    Examples::
217

218
      # Acquire all nodes and one instance
219
      self.needed_locks = {
220
        locking.LEVEL_NODE: locking.ALL_SET,
221
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
222
      }
223
      # Acquire just two nodes
224
      self.needed_locks = {
225
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
226
      }
227
      # Acquire no locks
228
      self.needed_locks = {} # No, you can't leave it to the default value None
229

230
    """
231
    # The implementation of this method is mandatory only if the new LU is
232
    # concurrent, so that old LUs don't need to be changed all at the same
233
    # time.
234
    if self.REQ_BGL:
235
      self.needed_locks = {} # Exclusive LUs don't need locks.
236
    else:
237
      raise NotImplementedError
238

    
239
  def DeclareLocks(self, level):
240
    """Declare LU locking needs for a level
241

242
    While most LUs can just declare their locking needs at ExpandNames time,
243
    sometimes there's the need to calculate some locks after having acquired
244
    the ones before. This function is called just before acquiring locks at a
245
    particular level, but after acquiring the ones at lower levels, and permits
246
    such calculations. It can be used to modify self.needed_locks, and by
247
    default it does nothing.
248

249
    This function is only called if you have something already set in
250
    self.needed_locks for the level.
251

252
    @param level: Locking level which is going to be locked
253
    @type level: member of ganeti.locking.LEVELS
254

255
    """
256

    
257
  def CheckPrereq(self):
258
    """Check prerequisites for this LU.
259

260
    This method should check that the prerequisites for the execution
261
    of this LU are fulfilled. It can do internode communication, but
262
    it should be idempotent - no cluster or system changes are
263
    allowed.
264

265
    The method should raise errors.OpPrereqError in case something is
266
    not fulfilled. Its return value is ignored.
267

268
    This method should also update all the parameters of the opcode to
269
    their canonical form if it hasn't been done by ExpandNames before.
270

271
    """
272
    if self.tasklets is not None:
273
      for (idx, tl) in enumerate(self.tasklets):
274
        logging.debug("Checking prerequisites for tasklet %s/%s",
275
                      idx + 1, len(self.tasklets))
276
        tl.CheckPrereq()
277
    else:
278
      pass
279

    
280
  def Exec(self, feedback_fn):
281
    """Execute the LU.
282

283
    This method should implement the actual work. It should raise
284
    errors.OpExecError for failures that are somewhat dealt with in
285
    code, or expected.
286

287
    """
288
    if self.tasklets is not None:
289
      for (idx, tl) in enumerate(self.tasklets):
290
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
291
        tl.Exec(feedback_fn)
292
    else:
293
      raise NotImplementedError
294

    
295
  def BuildHooksEnv(self):
296
    """Build hooks environment for this LU.
297

298
    @rtype: dict
299
    @return: Dictionary containing the environment that will be used for
300
      running the hooks for this LU. The keys of the dict must not be prefixed
301
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
302
      will extend the environment with additional variables. If no environment
303
      should be defined, an empty dictionary should be returned (not C{None}).
304
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
305
      will not be called.
306

307
    """
308
    raise NotImplementedError
309

    
310
  def BuildHooksNodes(self):
311
    """Build list of nodes to run LU's hooks.
312

313
    @rtype: tuple; (list, list)
314
    @return: Tuple containing a list of node names on which the hook
315
      should run before the execution and a list of node names on which the
316
      hook should run after the execution. No nodes should be returned as an
317
      empty list (and not None).
318
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
319
      will not be called.
320

321
    """
322
    raise NotImplementedError
323

    
324
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
325
    """Notify the LU about the results of its hooks.
326

327
    This method is called every time a hooks phase is executed, and notifies
328
    the Logical Unit about the hooks' result. The LU can then use it to alter
329
    its result based on the hooks.  By default the method does nothing and the
330
    previous result is passed back unchanged but any LU can define it if it
331
    wants to use the local cluster hook-scripts somehow.
332

333
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
334
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
335
    @param hook_results: the results of the multi-node hooks rpc call
336
    @param feedback_fn: function used send feedback back to the caller
337
    @param lu_result: the previous Exec result this LU had, or None
338
        in the PRE phase
339
    @return: the new Exec result, based on the previous result
340
        and hook results
341

342
    """
343
    # API must be kept, thus we ignore the unused argument and could
344
    # be a function warnings
345
    # pylint: disable-msg=W0613,R0201
346
    return lu_result
347

    
348
  def _ExpandAndLockInstance(self):
349
    """Helper function to expand and lock an instance.
350

351
    Many LUs that work on an instance take its name in self.op.instance_name
352
    and need to expand it and then declare the expanded name for locking. This
353
    function does it, and then updates self.op.instance_name to the expanded
354
    name. It also initializes needed_locks as a dict, if this hasn't been done
355
    before.
356

357
    """
358
    if self.needed_locks is None:
359
      self.needed_locks = {}
360
    else:
361
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
362
        "_ExpandAndLockInstance called with instance-level locks set"
363
    self.op.instance_name = _ExpandInstanceName(self.cfg,
364
                                                self.op.instance_name)
365
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
366

    
367
  def _LockInstancesNodes(self, primary_only=False):
368
    """Helper function to declare instances' nodes for locking.
369

370
    This function should be called after locking one or more instances to lock
371
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
372
    with all primary or secondary nodes for instances already locked and
373
    present in self.needed_locks[locking.LEVEL_INSTANCE].
374

375
    It should be called from DeclareLocks, and for safety only works if
376
    self.recalculate_locks[locking.LEVEL_NODE] is set.
377

378
    In the future it may grow parameters to just lock some instance's nodes, or
379
    to just lock primaries or secondary nodes, if needed.
380

381
    If should be called in DeclareLocks in a way similar to::
382

383
      if level == locking.LEVEL_NODE:
384
        self._LockInstancesNodes()
385

386
    @type primary_only: boolean
387
    @param primary_only: only lock primary nodes of locked instances
388

389
    """
390
    assert locking.LEVEL_NODE in self.recalculate_locks, \
391
      "_LockInstancesNodes helper function called with no nodes to recalculate"
392

    
393
    # TODO: check if we're really been called with the instance locks held
394

    
395
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
396
    # future we might want to have different behaviors depending on the value
397
    # of self.recalculate_locks[locking.LEVEL_NODE]
398
    wanted_nodes = []
399
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
400
      instance = self.context.cfg.GetInstanceInfo(instance_name)
401
      wanted_nodes.append(instance.primary_node)
402
      if not primary_only:
403
        wanted_nodes.extend(instance.secondary_nodes)
404

    
405
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
406
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
407
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
408
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
409

    
410
    del self.recalculate_locks[locking.LEVEL_NODE]
411

    
412

    
413
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
414
  """Simple LU which runs no hooks.
415

416
  This LU is intended as a parent for other LogicalUnits which will
417
  run no hooks, in order to reduce duplicate code.
418

419
  """
420
  HPATH = None
421
  HTYPE = None
422

    
423
  def BuildHooksEnv(self):
424
    """Empty BuildHooksEnv for NoHooksLu.
425

426
    This just raises an error.
427

428
    """
429
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
430

    
431
  def BuildHooksNodes(self):
432
    """Empty BuildHooksNodes for NoHooksLU.
433

434
    """
435
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
436

    
437

    
438
class Tasklet:
439
  """Tasklet base class.
440

441
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
442
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
443
  tasklets know nothing about locks.
444

445
  Subclasses must follow these rules:
446
    - Implement CheckPrereq
447
    - Implement Exec
448

449
  """
450
  def __init__(self, lu):
451
    self.lu = lu
452

    
453
    # Shortcuts
454
    self.cfg = lu.cfg
455
    self.rpc = lu.rpc
456

    
457
  def CheckPrereq(self):
458
    """Check prerequisites for this tasklets.
459

460
    This method should check whether the prerequisites for the execution of
461
    this tasklet are fulfilled. It can do internode communication, but it
462
    should be idempotent - no cluster or system changes are allowed.
463

464
    The method should raise errors.OpPrereqError in case something is not
465
    fulfilled. Its return value is ignored.
466

467
    This method should also update all parameters to their canonical form if it
468
    hasn't been done before.
469

470
    """
471
    pass
472

    
473
  def Exec(self, feedback_fn):
474
    """Execute the tasklet.
475

476
    This method should implement the actual work. It should raise
477
    errors.OpExecError for failures that are somewhat dealt with in code, or
478
    expected.
479

480
    """
481
    raise NotImplementedError
482

    
483

    
484
class _QueryBase:
485
  """Base for query utility classes.
486

487
  """
488
  #: Attribute holding field definitions
489
  FIELDS = None
490

    
491
  def __init__(self, filter_, fields, use_locking):
492
    """Initializes this class.
493

494
    """
495
    self.use_locking = use_locking
496

    
497
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
498
                             namefield="name")
499
    self.requested_data = self.query.RequestedData()
500
    self.names = self.query.RequestedNames()
501

    
502
    # Sort only if no names were requested
503
    self.sort_by_name = not self.names
504

    
505
    self.do_locking = None
506
    self.wanted = None
507

    
508
  def _GetNames(self, lu, all_names, lock_level):
509
    """Helper function to determine names asked for in the query.
510

511
    """
512
    if self.do_locking:
513
      names = lu.acquired_locks[lock_level]
514
    else:
515
      names = all_names
516

    
517
    if self.wanted == locking.ALL_SET:
518
      assert not self.names
519
      # caller didn't specify names, so ordering is not important
520
      return utils.NiceSort(names)
521

    
522
    # caller specified names and we must keep the same order
523
    assert self.names
524
    assert not self.do_locking or lu.acquired_locks[lock_level]
525

    
526
    missing = set(self.wanted).difference(names)
527
    if missing:
528
      raise errors.OpExecError("Some items were removed before retrieving"
529
                               " their data: %s" % missing)
530

    
531
    # Return expanded names
532
    return self.wanted
533

    
534
  def ExpandNames(self, lu):
535
    """Expand names for this query.
536

537
    See L{LogicalUnit.ExpandNames}.
538

539
    """
540
    raise NotImplementedError()
541

    
542
  def DeclareLocks(self, lu, level):
543
    """Declare locks for this query.
544

545
    See L{LogicalUnit.DeclareLocks}.
546

547
    """
548
    raise NotImplementedError()
549

    
550
  def _GetQueryData(self, lu):
551
    """Collects all data for this query.
552

553
    @return: Query data object
554

555
    """
556
    raise NotImplementedError()
557

    
558
  def NewStyleQuery(self, lu):
559
    """Collect data and execute query.
560

561
    """
562
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
563
                                  sort_by_name=self.sort_by_name)
564

    
565
  def OldStyleQuery(self, lu):
566
    """Collect data and execute query.
567

568
    """
569
    return self.query.OldStyleQuery(self._GetQueryData(lu),
570
                                    sort_by_name=self.sort_by_name)
571

    
572

    
573
def _GetWantedNodes(lu, nodes):
574
  """Returns list of checked and expanded node names.
575

576
  @type lu: L{LogicalUnit}
577
  @param lu: the logical unit on whose behalf we execute
578
  @type nodes: list
579
  @param nodes: list of node names or None for all nodes
580
  @rtype: list
581
  @return: the list of nodes, sorted
582
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
583

584
  """
585
  if nodes:
586
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
587

    
588
  return utils.NiceSort(lu.cfg.GetNodeList())
589

    
590

    
591
def _GetWantedInstances(lu, instances):
592
  """Returns list of checked and expanded instance names.
593

594
  @type lu: L{LogicalUnit}
595
  @param lu: the logical unit on whose behalf we execute
596
  @type instances: list
597
  @param instances: list of instance names or None for all instances
598
  @rtype: list
599
  @return: the list of instances, sorted
600
  @raise errors.OpPrereqError: if the instances parameter is wrong type
601
  @raise errors.OpPrereqError: if any of the passed instances is not found
602

603
  """
604
  if instances:
605
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
606
  else:
607
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
608
  return wanted
609

    
610

    
611
def _GetUpdatedParams(old_params, update_dict,
612
                      use_default=True, use_none=False):
613
  """Return the new version of a parameter dictionary.
614

615
  @type old_params: dict
616
  @param old_params: old parameters
617
  @type update_dict: dict
618
  @param update_dict: dict containing new parameter values, or
619
      constants.VALUE_DEFAULT to reset the parameter to its default
620
      value
621
  @param use_default: boolean
622
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
623
      values as 'to be deleted' values
624
  @param use_none: boolean
625
  @type use_none: whether to recognise C{None} values as 'to be
626
      deleted' values
627
  @rtype: dict
628
  @return: the new parameter dictionary
629

630
  """
631
  params_copy = copy.deepcopy(old_params)
632
  for key, val in update_dict.iteritems():
633
    if ((use_default and val == constants.VALUE_DEFAULT) or
634
        (use_none and val is None)):
635
      try:
636
        del params_copy[key]
637
      except KeyError:
638
        pass
639
    else:
640
      params_copy[key] = val
641
  return params_copy
642

    
643

    
644
def _RunPostHook(lu, node_name):
645
  """Runs the post-hook for an opcode on a single node.
646

647
  """
648
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
649
  try:
650
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
651
  except:
652
    # pylint: disable-msg=W0702
653
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
654

    
655

    
656
def _CheckOutputFields(static, dynamic, selected):
657
  """Checks whether all selected fields are valid.
658

659
  @type static: L{utils.FieldSet}
660
  @param static: static fields set
661
  @type dynamic: L{utils.FieldSet}
662
  @param dynamic: dynamic fields set
663

664
  """
665
  f = utils.FieldSet()
666
  f.Extend(static)
667
  f.Extend(dynamic)
668

    
669
  delta = f.NonMatching(selected)
670
  if delta:
671
    raise errors.OpPrereqError("Unknown output fields selected: %s"
672
                               % ",".join(delta), errors.ECODE_INVAL)
673

    
674

    
675
def _CheckGlobalHvParams(params):
676
  """Validates that given hypervisor params are not global ones.
677

678
  This will ensure that instances don't get customised versions of
679
  global params.
680

681
  """
682
  used_globals = constants.HVC_GLOBALS.intersection(params)
683
  if used_globals:
684
    msg = ("The following hypervisor parameters are global and cannot"
685
           " be customized at instance level, please modify them at"
686
           " cluster level: %s" % utils.CommaJoin(used_globals))
687
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
688

    
689

    
690
def _CheckNodeOnline(lu, node, msg=None):
691
  """Ensure that a given node is online.
692

693
  @param lu: the LU on behalf of which we make the check
694
  @param node: the node to check
695
  @param msg: if passed, should be a message to replace the default one
696
  @raise errors.OpPrereqError: if the node is offline
697

698
  """
699
  if msg is None:
700
    msg = "Can't use offline node"
701
  if lu.cfg.GetNodeInfo(node).offline:
702
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
703

    
704

    
705
def _CheckNodeNotDrained(lu, node):
706
  """Ensure that a given node is not drained.
707

708
  @param lu: the LU on behalf of which we make the check
709
  @param node: the node to check
710
  @raise errors.OpPrereqError: if the node is drained
711

712
  """
713
  if lu.cfg.GetNodeInfo(node).drained:
714
    raise errors.OpPrereqError("Can't use drained node %s" % node,
715
                               errors.ECODE_STATE)
716

    
717

    
718
def _CheckNodeVmCapable(lu, node):
719
  """Ensure that a given node is vm capable.
720

721
  @param lu: the LU on behalf of which we make the check
722
  @param node: the node to check
723
  @raise errors.OpPrereqError: if the node is not vm capable
724

725
  """
726
  if not lu.cfg.GetNodeInfo(node).vm_capable:
727
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
728
                               errors.ECODE_STATE)
729

    
730

    
731
def _CheckNodeHasOS(lu, node, os_name, force_variant):
732
  """Ensure that a node supports a given OS.
733

734
  @param lu: the LU on behalf of which we make the check
735
  @param node: the node to check
736
  @param os_name: the OS to query about
737
  @param force_variant: whether to ignore variant errors
738
  @raise errors.OpPrereqError: if the node is not supporting the OS
739

740
  """
741
  result = lu.rpc.call_os_get(node, os_name)
742
  result.Raise("OS '%s' not in supported OS list for node %s" %
743
               (os_name, node),
744
               prereq=True, ecode=errors.ECODE_INVAL)
745
  if not force_variant:
746
    _CheckOSVariant(result.payload, os_name)
747

    
748

    
749
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
750
  """Ensure that a node has the given secondary ip.
751

752
  @type lu: L{LogicalUnit}
753
  @param lu: the LU on behalf of which we make the check
754
  @type node: string
755
  @param node: the node to check
756
  @type secondary_ip: string
757
  @param secondary_ip: the ip to check
758
  @type prereq: boolean
759
  @param prereq: whether to throw a prerequisite or an execute error
760
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
761
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
762

763
  """
764
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
765
  result.Raise("Failure checking secondary ip on node %s" % node,
766
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
767
  if not result.payload:
768
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
769
           " please fix and re-run this command" % secondary_ip)
770
    if prereq:
771
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
772
    else:
773
      raise errors.OpExecError(msg)
774

    
775

    
776
def _GetClusterDomainSecret():
777
  """Reads the cluster domain secret.
778

779
  """
780
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
781
                               strict=True)
782

    
783

    
784
def _CheckInstanceDown(lu, instance, reason):
785
  """Ensure that an instance is not running."""
786
  if instance.admin_up:
787
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
788
                               (instance.name, reason), errors.ECODE_STATE)
789

    
790
  pnode = instance.primary_node
791
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
792
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
793
              prereq=True, ecode=errors.ECODE_ENVIRON)
794

    
795
  if instance.name in ins_l.payload:
796
    raise errors.OpPrereqError("Instance %s is running, %s" %
797
                               (instance.name, reason), errors.ECODE_STATE)
798

    
799

    
800
def _ExpandItemName(fn, name, kind):
801
  """Expand an item name.
802

803
  @param fn: the function to use for expansion
804
  @param name: requested item name
805
  @param kind: text description ('Node' or 'Instance')
806
  @return: the resolved (full) name
807
  @raise errors.OpPrereqError: if the item is not found
808

809
  """
810
  full_name = fn(name)
811
  if full_name is None:
812
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
813
                               errors.ECODE_NOENT)
814
  return full_name
815

    
816

    
817
def _ExpandNodeName(cfg, name):
818
  """Wrapper over L{_ExpandItemName} for nodes."""
819
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
820

    
821

    
822
def _ExpandInstanceName(cfg, name):
823
  """Wrapper over L{_ExpandItemName} for instance."""
824
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
825

    
826

    
827
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
828
                          memory, vcpus, nics, disk_template, disks,
829
                          bep, hvp, hypervisor_name):
830
  """Builds instance related env variables for hooks
831

832
  This builds the hook environment from individual variables.
833

834
  @type name: string
835
  @param name: the name of the instance
836
  @type primary_node: string
837
  @param primary_node: the name of the instance's primary node
838
  @type secondary_nodes: list
839
  @param secondary_nodes: list of secondary nodes as strings
840
  @type os_type: string
841
  @param os_type: the name of the instance's OS
842
  @type status: boolean
843
  @param status: the should_run status of the instance
844
  @type memory: string
845
  @param memory: the memory size of the instance
846
  @type vcpus: string
847
  @param vcpus: the count of VCPUs the instance has
848
  @type nics: list
849
  @param nics: list of tuples (ip, mac, mode, link) representing
850
      the NICs the instance has
851
  @type disk_template: string
852
  @param disk_template: the disk template of the instance
853
  @type disks: list
854
  @param disks: the list of (size, mode) pairs
855
  @type bep: dict
856
  @param bep: the backend parameters for the instance
857
  @type hvp: dict
858
  @param hvp: the hypervisor parameters for the instance
859
  @type hypervisor_name: string
860
  @param hypervisor_name: the hypervisor for the instance
861
  @rtype: dict
862
  @return: the hook environment for this instance
863

864
  """
865
  if status:
866
    str_status = "up"
867
  else:
868
    str_status = "down"
869
  env = {
870
    "OP_TARGET": name,
871
    "INSTANCE_NAME": name,
872
    "INSTANCE_PRIMARY": primary_node,
873
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
874
    "INSTANCE_OS_TYPE": os_type,
875
    "INSTANCE_STATUS": str_status,
876
    "INSTANCE_MEMORY": memory,
877
    "INSTANCE_VCPUS": vcpus,
878
    "INSTANCE_DISK_TEMPLATE": disk_template,
879
    "INSTANCE_HYPERVISOR": hypervisor_name,
880
  }
881

    
882
  if nics:
883
    nic_count = len(nics)
884
    for idx, (ip, mac, mode, link) in enumerate(nics):
885
      if ip is None:
886
        ip = ""
887
      env["INSTANCE_NIC%d_IP" % idx] = ip
888
      env["INSTANCE_NIC%d_MAC" % idx] = mac
889
      env["INSTANCE_NIC%d_MODE" % idx] = mode
890
      env["INSTANCE_NIC%d_LINK" % idx] = link
891
      if mode == constants.NIC_MODE_BRIDGED:
892
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
893
  else:
894
    nic_count = 0
895

    
896
  env["INSTANCE_NIC_COUNT"] = nic_count
897

    
898
  if disks:
899
    disk_count = len(disks)
900
    for idx, (size, mode) in enumerate(disks):
901
      env["INSTANCE_DISK%d_SIZE" % idx] = size
902
      env["INSTANCE_DISK%d_MODE" % idx] = mode
903
  else:
904
    disk_count = 0
905

    
906
  env["INSTANCE_DISK_COUNT"] = disk_count
907

    
908
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
909
    for key, value in source.items():
910
      env["INSTANCE_%s_%s" % (kind, key)] = value
911

    
912
  return env
913

    
914

    
915
def _NICListToTuple(lu, nics):
916
  """Build a list of nic information tuples.
917

918
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
919
  value in LUInstanceQueryData.
920

921
  @type lu:  L{LogicalUnit}
922
  @param lu: the logical unit on whose behalf we execute
923
  @type nics: list of L{objects.NIC}
924
  @param nics: list of nics to convert to hooks tuples
925

926
  """
927
  hooks_nics = []
928
  cluster = lu.cfg.GetClusterInfo()
929
  for nic in nics:
930
    ip = nic.ip
931
    mac = nic.mac
932
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
933
    mode = filled_params[constants.NIC_MODE]
934
    link = filled_params[constants.NIC_LINK]
935
    hooks_nics.append((ip, mac, mode, link))
936
  return hooks_nics
937

    
938

    
939
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
940
  """Builds instance related env variables for hooks from an object.
941

942
  @type lu: L{LogicalUnit}
943
  @param lu: the logical unit on whose behalf we execute
944
  @type instance: L{objects.Instance}
945
  @param instance: the instance for which we should build the
946
      environment
947
  @type override: dict
948
  @param override: dictionary with key/values that will override
949
      our values
950
  @rtype: dict
951
  @return: the hook environment dictionary
952

953
  """
954
  cluster = lu.cfg.GetClusterInfo()
955
  bep = cluster.FillBE(instance)
956
  hvp = cluster.FillHV(instance)
957
  args = {
958
    'name': instance.name,
959
    'primary_node': instance.primary_node,
960
    'secondary_nodes': instance.secondary_nodes,
961
    'os_type': instance.os,
962
    'status': instance.admin_up,
963
    'memory': bep[constants.BE_MEMORY],
964
    'vcpus': bep[constants.BE_VCPUS],
965
    'nics': _NICListToTuple(lu, instance.nics),
966
    'disk_template': instance.disk_template,
967
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
968
    'bep': bep,
969
    'hvp': hvp,
970
    'hypervisor_name': instance.hypervisor,
971
  }
972
  if override:
973
    args.update(override)
974
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
975

    
976

    
977
def _AdjustCandidatePool(lu, exceptions):
978
  """Adjust the candidate pool after node operations.
979

980
  """
981
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
982
  if mod_list:
983
    lu.LogInfo("Promoted nodes to master candidate role: %s",
984
               utils.CommaJoin(node.name for node in mod_list))
985
    for name in mod_list:
986
      lu.context.ReaddNode(name)
987
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
988
  if mc_now > mc_max:
989
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
990
               (mc_now, mc_max))
991

    
992

    
993
def _DecideSelfPromotion(lu, exceptions=None):
994
  """Decide whether I should promote myself as a master candidate.
995

996
  """
997
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
998
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
999
  # the new node will increase mc_max with one, so:
1000
  mc_should = min(mc_should + 1, cp_size)
1001
  return mc_now < mc_should
1002

    
1003

    
1004
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1005
  """Check that the brigdes needed by a list of nics exist.
1006

1007
  """
1008
  cluster = lu.cfg.GetClusterInfo()
1009
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1010
  brlist = [params[constants.NIC_LINK] for params in paramslist
1011
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1012
  if brlist:
1013
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1014
    result.Raise("Error checking bridges on destination node '%s'" %
1015
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1016

    
1017

    
1018
def _CheckInstanceBridgesExist(lu, instance, node=None):
1019
  """Check that the brigdes needed by an instance exist.
1020

1021
  """
1022
  if node is None:
1023
    node = instance.primary_node
1024
  _CheckNicsBridgesExist(lu, instance.nics, node)
1025

    
1026

    
1027
def _CheckOSVariant(os_obj, name):
1028
  """Check whether an OS name conforms to the os variants specification.
1029

1030
  @type os_obj: L{objects.OS}
1031
  @param os_obj: OS object to check
1032
  @type name: string
1033
  @param name: OS name passed by the user, to check for validity
1034

1035
  """
1036
  if not os_obj.supported_variants:
1037
    return
1038
  variant = objects.OS.GetVariant(name)
1039
  if not variant:
1040
    raise errors.OpPrereqError("OS name must include a variant",
1041
                               errors.ECODE_INVAL)
1042

    
1043
  if variant not in os_obj.supported_variants:
1044
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1045

    
1046

    
1047
def _GetNodeInstancesInner(cfg, fn):
1048
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1049

    
1050

    
1051
def _GetNodeInstances(cfg, node_name):
1052
  """Returns a list of all primary and secondary instances on a node.
1053

1054
  """
1055

    
1056
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1057

    
1058

    
1059
def _GetNodePrimaryInstances(cfg, node_name):
1060
  """Returns primary instances on a node.
1061

1062
  """
1063
  return _GetNodeInstancesInner(cfg,
1064
                                lambda inst: node_name == inst.primary_node)
1065

    
1066

    
1067
def _GetNodeSecondaryInstances(cfg, node_name):
1068
  """Returns secondary instances on a node.
1069

1070
  """
1071
  return _GetNodeInstancesInner(cfg,
1072
                                lambda inst: node_name in inst.secondary_nodes)
1073

    
1074

    
1075
def _GetStorageTypeArgs(cfg, storage_type):
1076
  """Returns the arguments for a storage type.
1077

1078
  """
1079
  # Special case for file storage
1080
  if storage_type == constants.ST_FILE:
1081
    # storage.FileStorage wants a list of storage directories
1082
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1083

    
1084
  return []
1085

    
1086

    
1087
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1088
  faulty = []
1089

    
1090
  for dev in instance.disks:
1091
    cfg.SetDiskID(dev, node_name)
1092

    
1093
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1094
  result.Raise("Failed to get disk status from node %s" % node_name,
1095
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1096

    
1097
  for idx, bdev_status in enumerate(result.payload):
1098
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1099
      faulty.append(idx)
1100

    
1101
  return faulty
1102

    
1103

    
1104
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1105
  """Check the sanity of iallocator and node arguments and use the
1106
  cluster-wide iallocator if appropriate.
1107

1108
  Check that at most one of (iallocator, node) is specified. If none is
1109
  specified, then the LU's opcode's iallocator slot is filled with the
1110
  cluster-wide default iallocator.
1111

1112
  @type iallocator_slot: string
1113
  @param iallocator_slot: the name of the opcode iallocator slot
1114
  @type node_slot: string
1115
  @param node_slot: the name of the opcode target node slot
1116

1117
  """
1118
  node = getattr(lu.op, node_slot, None)
1119
  iallocator = getattr(lu.op, iallocator_slot, None)
1120

    
1121
  if node is not None and iallocator is not None:
1122
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1123
                               errors.ECODE_INVAL)
1124
  elif node is None and iallocator is None:
1125
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1126
    if default_iallocator:
1127
      setattr(lu.op, iallocator_slot, default_iallocator)
1128
    else:
1129
      raise errors.OpPrereqError("No iallocator or node given and no"
1130
                                 " cluster-wide default iallocator found."
1131
                                 " Please specify either an iallocator or a"
1132
                                 " node, or set a cluster-wide default"
1133
                                 " iallocator.")
1134

    
1135

    
1136
class LUClusterPostInit(LogicalUnit):
1137
  """Logical unit for running hooks after cluster initialization.
1138

1139
  """
1140
  HPATH = "cluster-init"
1141
  HTYPE = constants.HTYPE_CLUSTER
1142

    
1143
  def BuildHooksEnv(self):
1144
    """Build hooks env.
1145

1146
    """
1147
    return {
1148
      "OP_TARGET": self.cfg.GetClusterName(),
1149
      }
1150

    
1151
  def BuildHooksNodes(self):
1152
    """Build hooks nodes.
1153

1154
    """
1155
    return ([], [self.cfg.GetMasterNode()])
1156

    
1157
  def Exec(self, feedback_fn):
1158
    """Nothing to do.
1159

1160
    """
1161
    return True
1162

    
1163

    
1164
class LUClusterDestroy(LogicalUnit):
1165
  """Logical unit for destroying the cluster.
1166

1167
  """
1168
  HPATH = "cluster-destroy"
1169
  HTYPE = constants.HTYPE_CLUSTER
1170

    
1171
  def BuildHooksEnv(self):
1172
    """Build hooks env.
1173

1174
    """
1175
    return {
1176
      "OP_TARGET": self.cfg.GetClusterName(),
1177
      }
1178

    
1179
  def BuildHooksNodes(self):
1180
    """Build hooks nodes.
1181

1182
    """
1183
    return ([], [])
1184

    
1185
  def CheckPrereq(self):
1186
    """Check prerequisites.
1187

1188
    This checks whether the cluster is empty.
1189

1190
    Any errors are signaled by raising errors.OpPrereqError.
1191

1192
    """
1193
    master = self.cfg.GetMasterNode()
1194

    
1195
    nodelist = self.cfg.GetNodeList()
1196
    if len(nodelist) != 1 or nodelist[0] != master:
1197
      raise errors.OpPrereqError("There are still %d node(s) in"
1198
                                 " this cluster." % (len(nodelist) - 1),
1199
                                 errors.ECODE_INVAL)
1200
    instancelist = self.cfg.GetInstanceList()
1201
    if instancelist:
1202
      raise errors.OpPrereqError("There are still %d instance(s) in"
1203
                                 " this cluster." % len(instancelist),
1204
                                 errors.ECODE_INVAL)
1205

    
1206
  def Exec(self, feedback_fn):
1207
    """Destroys the cluster.
1208

1209
    """
1210
    master = self.cfg.GetMasterNode()
1211

    
1212
    # Run post hooks on master node before it's removed
1213
    _RunPostHook(self, master)
1214

    
1215
    result = self.rpc.call_node_stop_master(master, False)
1216
    result.Raise("Could not disable the master role")
1217

    
1218
    return master
1219

    
1220

    
1221
def _VerifyCertificate(filename):
1222
  """Verifies a certificate for LUClusterVerify.
1223

1224
  @type filename: string
1225
  @param filename: Path to PEM file
1226

1227
  """
1228
  try:
1229
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1230
                                           utils.ReadFile(filename))
1231
  except Exception, err: # pylint: disable-msg=W0703
1232
    return (LUClusterVerify.ETYPE_ERROR,
1233
            "Failed to load X509 certificate %s: %s" % (filename, err))
1234

    
1235
  (errcode, msg) = \
1236
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1237
                                constants.SSL_CERT_EXPIRATION_ERROR)
1238

    
1239
  if msg:
1240
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1241
  else:
1242
    fnamemsg = None
1243

    
1244
  if errcode is None:
1245
    return (None, fnamemsg)
1246
  elif errcode == utils.CERT_WARNING:
1247
    return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1248
  elif errcode == utils.CERT_ERROR:
1249
    return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1250

    
1251
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1252

    
1253

    
1254
class LUClusterVerify(LogicalUnit):
1255
  """Verifies the cluster status.
1256

1257
  """
1258
  HPATH = "cluster-verify"
1259
  HTYPE = constants.HTYPE_CLUSTER
1260
  REQ_BGL = False
1261

    
1262
  TCLUSTER = "cluster"
1263
  TNODE = "node"
1264
  TINSTANCE = "instance"
1265

    
1266
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1267
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1268
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1269
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1270
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1271
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1272
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1273
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1274
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1275
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1276
  ENODEDRBD = (TNODE, "ENODEDRBD")
1277
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1278
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1279
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1280
  ENODEHV = (TNODE, "ENODEHV")
1281
  ENODELVM = (TNODE, "ENODELVM")
1282
  ENODEN1 = (TNODE, "ENODEN1")
1283
  ENODENET = (TNODE, "ENODENET")
1284
  ENODEOS = (TNODE, "ENODEOS")
1285
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1286
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1287
  ENODERPC = (TNODE, "ENODERPC")
1288
  ENODESSH = (TNODE, "ENODESSH")
1289
  ENODEVERSION = (TNODE, "ENODEVERSION")
1290
  ENODESETUP = (TNODE, "ENODESETUP")
1291
  ENODETIME = (TNODE, "ENODETIME")
1292
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1293

    
1294
  ETYPE_FIELD = "code"
1295
  ETYPE_ERROR = "ERROR"
1296
  ETYPE_WARNING = "WARNING"
1297

    
1298
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1299

    
1300
  class NodeImage(object):
1301
    """A class representing the logical and physical status of a node.
1302

1303
    @type name: string
1304
    @ivar name: the node name to which this object refers
1305
    @ivar volumes: a structure as returned from
1306
        L{ganeti.backend.GetVolumeList} (runtime)
1307
    @ivar instances: a list of running instances (runtime)
1308
    @ivar pinst: list of configured primary instances (config)
1309
    @ivar sinst: list of configured secondary instances (config)
1310
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1311
        instances for which this node is secondary (config)
1312
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1313
    @ivar dfree: free disk, as reported by the node (runtime)
1314
    @ivar offline: the offline status (config)
1315
    @type rpc_fail: boolean
1316
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1317
        not whether the individual keys were correct) (runtime)
1318
    @type lvm_fail: boolean
1319
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1320
    @type hyp_fail: boolean
1321
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1322
    @type ghost: boolean
1323
    @ivar ghost: whether this is a known node or not (config)
1324
    @type os_fail: boolean
1325
    @ivar os_fail: whether the RPC call didn't return valid OS data
1326
    @type oslist: list
1327
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1328
    @type vm_capable: boolean
1329
    @ivar vm_capable: whether the node can host instances
1330

1331
    """
1332
    def __init__(self, offline=False, name=None, vm_capable=True):
1333
      self.name = name
1334
      self.volumes = {}
1335
      self.instances = []
1336
      self.pinst = []
1337
      self.sinst = []
1338
      self.sbp = {}
1339
      self.mfree = 0
1340
      self.dfree = 0
1341
      self.offline = offline
1342
      self.vm_capable = vm_capable
1343
      self.rpc_fail = False
1344
      self.lvm_fail = False
1345
      self.hyp_fail = False
1346
      self.ghost = False
1347
      self.os_fail = False
1348
      self.oslist = {}
1349

    
1350
  def ExpandNames(self):
1351
    self.needed_locks = {
1352
      locking.LEVEL_NODE: locking.ALL_SET,
1353
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1354
    }
1355
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1356

    
1357
  def _Error(self, ecode, item, msg, *args, **kwargs):
1358
    """Format an error message.
1359

1360
    Based on the opcode's error_codes parameter, either format a
1361
    parseable error code, or a simpler error string.
1362

1363
    This must be called only from Exec and functions called from Exec.
1364

1365
    """
1366
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1367
    itype, etxt = ecode
1368
    # first complete the msg
1369
    if args:
1370
      msg = msg % args
1371
    # then format the whole message
1372
    if self.op.error_codes:
1373
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1374
    else:
1375
      if item:
1376
        item = " " + item
1377
      else:
1378
        item = ""
1379
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1380
    # and finally report it via the feedback_fn
1381
    self._feedback_fn("  - %s" % msg)
1382

    
1383
  def _ErrorIf(self, cond, *args, **kwargs):
1384
    """Log an error message if the passed condition is True.
1385

1386
    """
1387
    cond = bool(cond) or self.op.debug_simulate_errors
1388
    if cond:
1389
      self._Error(*args, **kwargs)
1390
    # do not mark the operation as failed for WARN cases only
1391
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1392
      self.bad = self.bad or cond
1393

    
1394
  def _VerifyNode(self, ninfo, nresult):
1395
    """Perform some basic validation on data returned from a node.
1396

1397
      - check the result data structure is well formed and has all the
1398
        mandatory fields
1399
      - check ganeti version
1400

1401
    @type ninfo: L{objects.Node}
1402
    @param ninfo: the node to check
1403
    @param nresult: the results from the node
1404
    @rtype: boolean
1405
    @return: whether overall this call was successful (and we can expect
1406
         reasonable values in the respose)
1407

1408
    """
1409
    node = ninfo.name
1410
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1411

    
1412
    # main result, nresult should be a non-empty dict
1413
    test = not nresult or not isinstance(nresult, dict)
1414
    _ErrorIf(test, self.ENODERPC, node,
1415
                  "unable to verify node: no data returned")
1416
    if test:
1417
      return False
1418

    
1419
    # compares ganeti version
1420
    local_version = constants.PROTOCOL_VERSION
1421
    remote_version = nresult.get("version", None)
1422
    test = not (remote_version and
1423
                isinstance(remote_version, (list, tuple)) and
1424
                len(remote_version) == 2)
1425
    _ErrorIf(test, self.ENODERPC, node,
1426
             "connection to node returned invalid data")
1427
    if test:
1428
      return False
1429

    
1430
    test = local_version != remote_version[0]
1431
    _ErrorIf(test, self.ENODEVERSION, node,
1432
             "incompatible protocol versions: master %s,"
1433
             " node %s", local_version, remote_version[0])
1434
    if test:
1435
      return False
1436

    
1437
    # node seems compatible, we can actually try to look into its results
1438

    
1439
    # full package version
1440
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1441
                  self.ENODEVERSION, node,
1442
                  "software version mismatch: master %s, node %s",
1443
                  constants.RELEASE_VERSION, remote_version[1],
1444
                  code=self.ETYPE_WARNING)
1445

    
1446
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1447
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1448
      for hv_name, hv_result in hyp_result.iteritems():
1449
        test = hv_result is not None
1450
        _ErrorIf(test, self.ENODEHV, node,
1451
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1452

    
1453
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1454
    if ninfo.vm_capable and isinstance(hvp_result, list):
1455
      for item, hv_name, hv_result in hvp_result:
1456
        _ErrorIf(True, self.ENODEHV, node,
1457
                 "hypervisor %s parameter verify failure (source %s): %s",
1458
                 hv_name, item, hv_result)
1459

    
1460
    test = nresult.get(constants.NV_NODESETUP,
1461
                           ["Missing NODESETUP results"])
1462
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1463
             "; ".join(test))
1464

    
1465
    return True
1466

    
1467
  def _VerifyNodeTime(self, ninfo, nresult,
1468
                      nvinfo_starttime, nvinfo_endtime):
1469
    """Check the node time.
1470

1471
    @type ninfo: L{objects.Node}
1472
    @param ninfo: the node to check
1473
    @param nresult: the remote results for the node
1474
    @param nvinfo_starttime: the start time of the RPC call
1475
    @param nvinfo_endtime: the end time of the RPC call
1476

1477
    """
1478
    node = ninfo.name
1479
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1480

    
1481
    ntime = nresult.get(constants.NV_TIME, None)
1482
    try:
1483
      ntime_merged = utils.MergeTime(ntime)
1484
    except (ValueError, TypeError):
1485
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1486
      return
1487

    
1488
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1489
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1490
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1491
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1492
    else:
1493
      ntime_diff = None
1494

    
1495
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1496
             "Node time diverges by at least %s from master node time",
1497
             ntime_diff)
1498

    
1499
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1500
    """Check the node time.
1501

1502
    @type ninfo: L{objects.Node}
1503
    @param ninfo: the node to check
1504
    @param nresult: the remote results for the node
1505
    @param vg_name: the configured VG name
1506

1507
    """
1508
    if vg_name is None:
1509
      return
1510

    
1511
    node = ninfo.name
1512
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1513

    
1514
    # checks vg existence and size > 20G
1515
    vglist = nresult.get(constants.NV_VGLIST, None)
1516
    test = not vglist
1517
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1518
    if not test:
1519
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1520
                                            constants.MIN_VG_SIZE)
1521
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1522

    
1523
    # check pv names
1524
    pvlist = nresult.get(constants.NV_PVLIST, None)
1525
    test = pvlist is None
1526
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1527
    if not test:
1528
      # check that ':' is not present in PV names, since it's a
1529
      # special character for lvcreate (denotes the range of PEs to
1530
      # use on the PV)
1531
      for _, pvname, owner_vg in pvlist:
1532
        test = ":" in pvname
1533
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1534
                 " '%s' of VG '%s'", pvname, owner_vg)
1535

    
1536
  def _VerifyNodeNetwork(self, ninfo, nresult):
1537
    """Check the node time.
1538

1539
    @type ninfo: L{objects.Node}
1540
    @param ninfo: the node to check
1541
    @param nresult: the remote results for the node
1542

1543
    """
1544
    node = ninfo.name
1545
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1546

    
1547
    test = constants.NV_NODELIST not in nresult
1548
    _ErrorIf(test, self.ENODESSH, node,
1549
             "node hasn't returned node ssh connectivity data")
1550
    if not test:
1551
      if nresult[constants.NV_NODELIST]:
1552
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1553
          _ErrorIf(True, self.ENODESSH, node,
1554
                   "ssh communication with node '%s': %s", a_node, a_msg)
1555

    
1556
    test = constants.NV_NODENETTEST not in nresult
1557
    _ErrorIf(test, self.ENODENET, node,
1558
             "node hasn't returned node tcp connectivity data")
1559
    if not test:
1560
      if nresult[constants.NV_NODENETTEST]:
1561
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1562
        for anode in nlist:
1563
          _ErrorIf(True, self.ENODENET, node,
1564
                   "tcp communication with node '%s': %s",
1565
                   anode, nresult[constants.NV_NODENETTEST][anode])
1566

    
1567
    test = constants.NV_MASTERIP not in nresult
1568
    _ErrorIf(test, self.ENODENET, node,
1569
             "node hasn't returned node master IP reachability data")
1570
    if not test:
1571
      if not nresult[constants.NV_MASTERIP]:
1572
        if node == self.master_node:
1573
          msg = "the master node cannot reach the master IP (not configured?)"
1574
        else:
1575
          msg = "cannot reach the master IP"
1576
        _ErrorIf(True, self.ENODENET, node, msg)
1577

    
1578
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1579
                      diskstatus):
1580
    """Verify an instance.
1581

1582
    This function checks to see if the required block devices are
1583
    available on the instance's node.
1584

1585
    """
1586
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1587
    node_current = instanceconfig.primary_node
1588

    
1589
    node_vol_should = {}
1590
    instanceconfig.MapLVsByNode(node_vol_should)
1591

    
1592
    for node in node_vol_should:
1593
      n_img = node_image[node]
1594
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1595
        # ignore missing volumes on offline or broken nodes
1596
        continue
1597
      for volume in node_vol_should[node]:
1598
        test = volume not in n_img.volumes
1599
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1600
                 "volume %s missing on node %s", volume, node)
1601

    
1602
    if instanceconfig.admin_up:
1603
      pri_img = node_image[node_current]
1604
      test = instance not in pri_img.instances and not pri_img.offline
1605
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1606
               "instance not running on its primary node %s",
1607
               node_current)
1608

    
1609
    for node, n_img in node_image.items():
1610
      if node != node_current:
1611
        test = instance in n_img.instances
1612
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1613
                 "instance should not run on node %s", node)
1614

    
1615
    diskdata = [(nname, success, status, idx)
1616
                for (nname, disks) in diskstatus.items()
1617
                for idx, (success, status) in enumerate(disks)]
1618

    
1619
    for nname, success, bdev_status, idx in diskdata:
1620
      # the 'ghost node' construction in Exec() ensures that we have a
1621
      # node here
1622
      snode = node_image[nname]
1623
      bad_snode = snode.ghost or snode.offline
1624
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1625
               self.EINSTANCEFAULTYDISK, instance,
1626
               "couldn't retrieve status for disk/%s on %s: %s",
1627
               idx, nname, bdev_status)
1628
      _ErrorIf((instanceconfig.admin_up and success and
1629
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1630
               self.EINSTANCEFAULTYDISK, instance,
1631
               "disk/%s on %s is faulty", idx, nname)
1632

    
1633
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1634
    """Verify if there are any unknown volumes in the cluster.
1635

1636
    The .os, .swap and backup volumes are ignored. All other volumes are
1637
    reported as unknown.
1638

1639
    @type reserved: L{ganeti.utils.FieldSet}
1640
    @param reserved: a FieldSet of reserved volume names
1641

1642
    """
1643
    for node, n_img in node_image.items():
1644
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1645
        # skip non-healthy nodes
1646
        continue
1647
      for volume in n_img.volumes:
1648
        test = ((node not in node_vol_should or
1649
                volume not in node_vol_should[node]) and
1650
                not reserved.Matches(volume))
1651
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1652
                      "volume %s is unknown", volume)
1653

    
1654
  def _VerifyOrphanInstances(self, instancelist, node_image):
1655
    """Verify the list of running instances.
1656

1657
    This checks what instances are running but unknown to the cluster.
1658

1659
    """
1660
    for node, n_img in node_image.items():
1661
      for o_inst in n_img.instances:
1662
        test = o_inst not in instancelist
1663
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1664
                      "instance %s on node %s should not exist", o_inst, node)
1665

    
1666
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1667
    """Verify N+1 Memory Resilience.
1668

1669
    Check that if one single node dies we can still start all the
1670
    instances it was primary for.
1671

1672
    """
1673
    cluster_info = self.cfg.GetClusterInfo()
1674
    for node, n_img in node_image.items():
1675
      # This code checks that every node which is now listed as
1676
      # secondary has enough memory to host all instances it is
1677
      # supposed to should a single other node in the cluster fail.
1678
      # FIXME: not ready for failover to an arbitrary node
1679
      # FIXME: does not support file-backed instances
1680
      # WARNING: we currently take into account down instances as well
1681
      # as up ones, considering that even if they're down someone
1682
      # might want to start them even in the event of a node failure.
1683
      if n_img.offline:
1684
        # we're skipping offline nodes from the N+1 warning, since
1685
        # most likely we don't have good memory infromation from them;
1686
        # we already list instances living on such nodes, and that's
1687
        # enough warning
1688
        continue
1689
      for prinode, instances in n_img.sbp.items():
1690
        needed_mem = 0
1691
        for instance in instances:
1692
          bep = cluster_info.FillBE(instance_cfg[instance])
1693
          if bep[constants.BE_AUTO_BALANCE]:
1694
            needed_mem += bep[constants.BE_MEMORY]
1695
        test = n_img.mfree < needed_mem
1696
        self._ErrorIf(test, self.ENODEN1, node,
1697
                      "not enough memory to accomodate instance failovers"
1698
                      " should node %s fail (%dMiB needed, %dMiB available)",
1699
                      prinode, needed_mem, n_img.mfree)
1700

    
1701
  @classmethod
1702
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1703
                   (files_all, files_all_opt, files_mc, files_vm)):
1704
    """Verifies file checksums collected from all nodes.
1705

1706
    @param errorif: Callback for reporting errors
1707
    @param nodeinfo: List of L{objects.Node} objects
1708
    @param master_node: Name of master node
1709
    @param all_nvinfo: RPC results
1710

1711
    """
1712
    node_names = frozenset(node.name for node in nodeinfo)
1713

    
1714
    assert master_node in node_names
1715
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1716
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1717
           "Found file listed in more than one file list"
1718

    
1719
    # Define functions determining which nodes to consider for a file
1720
    file2nodefn = dict([(filename, fn)
1721
      for (files, fn) in [(files_all, None),
1722
                          (files_all_opt, None),
1723
                          (files_mc, lambda node: (node.master_candidate or
1724
                                                   node.name == master_node)),
1725
                          (files_vm, lambda node: node.vm_capable)]
1726
      for filename in files])
1727

    
1728
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1729

    
1730
    for node in nodeinfo:
1731
      nresult = all_nvinfo[node.name]
1732

    
1733
      if nresult.fail_msg or not nresult.payload:
1734
        node_files = None
1735
      else:
1736
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
1737

    
1738
      test = not (node_files and isinstance(node_files, dict))
1739
      errorif(test, cls.ENODEFILECHECK, node.name,
1740
              "Node did not return file checksum data")
1741
      if test:
1742
        continue
1743

    
1744
      for (filename, checksum) in node_files.items():
1745
        # Check if the file should be considered for a node
1746
        fn = file2nodefn[filename]
1747
        if fn is None or fn(node):
1748
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
1749

    
1750
    for (filename, checksums) in fileinfo.items():
1751
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1752

    
1753
      # Nodes having the file
1754
      with_file = frozenset(node_name
1755
                            for nodes in fileinfo[filename].values()
1756
                            for node_name in nodes)
1757

    
1758
      # Nodes missing file
1759
      missing_file = node_names - with_file
1760

    
1761
      if filename in files_all_opt:
1762
        # All or no nodes
1763
        errorif(missing_file and missing_file != node_names,
1764
                cls.ECLUSTERFILECHECK, None,
1765
                "File %s is optional, but it must exist on all or no nodes (not"
1766
                " found on %s)",
1767
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
1768
      else:
1769
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
1770
                "File %s is missing from node(s) %s", filename,
1771
                utils.CommaJoin(utils.NiceSort(missing_file)))
1772

    
1773
      # See if there are multiple versions of the file
1774
      test = len(checksums) > 1
1775
      if test:
1776
        variants = ["variant %s on %s" %
1777
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
1778
                    for (idx, (checksum, nodes)) in
1779
                      enumerate(sorted(checksums.items()))]
1780
      else:
1781
        variants = []
1782

    
1783
      errorif(test, cls.ECLUSTERFILECHECK, None,
1784
              "File %s found with %s different checksums (%s)",
1785
              filename, len(checksums), "; ".join(variants))
1786

    
1787
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1788
                      drbd_map):
1789
    """Verifies and the node DRBD status.
1790

1791
    @type ninfo: L{objects.Node}
1792
    @param ninfo: the node to check
1793
    @param nresult: the remote results for the node
1794
    @param instanceinfo: the dict of instances
1795
    @param drbd_helper: the configured DRBD usermode helper
1796
    @param drbd_map: the DRBD map as returned by
1797
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1798

1799
    """
1800
    node = ninfo.name
1801
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1802

    
1803
    if drbd_helper:
1804
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1805
      test = (helper_result == None)
1806
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1807
               "no drbd usermode helper returned")
1808
      if helper_result:
1809
        status, payload = helper_result
1810
        test = not status
1811
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1812
                 "drbd usermode helper check unsuccessful: %s", payload)
1813
        test = status and (payload != drbd_helper)
1814
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1815
                 "wrong drbd usermode helper: %s", payload)
1816

    
1817
    # compute the DRBD minors
1818
    node_drbd = {}
1819
    for minor, instance in drbd_map[node].items():
1820
      test = instance not in instanceinfo
1821
      _ErrorIf(test, self.ECLUSTERCFG, None,
1822
               "ghost instance '%s' in temporary DRBD map", instance)
1823
        # ghost instance should not be running, but otherwise we
1824
        # don't give double warnings (both ghost instance and
1825
        # unallocated minor in use)
1826
      if test:
1827
        node_drbd[minor] = (instance, False)
1828
      else:
1829
        instance = instanceinfo[instance]
1830
        node_drbd[minor] = (instance.name, instance.admin_up)
1831

    
1832
    # and now check them
1833
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1834
    test = not isinstance(used_minors, (tuple, list))
1835
    _ErrorIf(test, self.ENODEDRBD, node,
1836
             "cannot parse drbd status file: %s", str(used_minors))
1837
    if test:
1838
      # we cannot check drbd status
1839
      return
1840

    
1841
    for minor, (iname, must_exist) in node_drbd.items():
1842
      test = minor not in used_minors and must_exist
1843
      _ErrorIf(test, self.ENODEDRBD, node,
1844
               "drbd minor %d of instance %s is not active", minor, iname)
1845
    for minor in used_minors:
1846
      test = minor not in node_drbd
1847
      _ErrorIf(test, self.ENODEDRBD, node,
1848
               "unallocated drbd minor %d is in use", minor)
1849

    
1850
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1851
    """Builds the node OS structures.
1852

1853
    @type ninfo: L{objects.Node}
1854
    @param ninfo: the node to check
1855
    @param nresult: the remote results for the node
1856
    @param nimg: the node image object
1857

1858
    """
1859
    node = ninfo.name
1860
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1861

    
1862
    remote_os = nresult.get(constants.NV_OSLIST, None)
1863
    test = (not isinstance(remote_os, list) or
1864
            not compat.all(isinstance(v, list) and len(v) == 7
1865
                           for v in remote_os))
1866

    
1867
    _ErrorIf(test, self.ENODEOS, node,
1868
             "node hasn't returned valid OS data")
1869

    
1870
    nimg.os_fail = test
1871

    
1872
    if test:
1873
      return
1874

    
1875
    os_dict = {}
1876

    
1877
    for (name, os_path, status, diagnose,
1878
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1879

    
1880
      if name not in os_dict:
1881
        os_dict[name] = []
1882

    
1883
      # parameters is a list of lists instead of list of tuples due to
1884
      # JSON lacking a real tuple type, fix it:
1885
      parameters = [tuple(v) for v in parameters]
1886
      os_dict[name].append((os_path, status, diagnose,
1887
                            set(variants), set(parameters), set(api_ver)))
1888

    
1889
    nimg.oslist = os_dict
1890

    
1891
  def _VerifyNodeOS(self, ninfo, nimg, base):
1892
    """Verifies the node OS list.
1893

1894
    @type ninfo: L{objects.Node}
1895
    @param ninfo: the node to check
1896
    @param nimg: the node image object
1897
    @param base: the 'template' node we match against (e.g. from the master)
1898

1899
    """
1900
    node = ninfo.name
1901
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1902

    
1903
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1904

    
1905
    for os_name, os_data in nimg.oslist.items():
1906
      assert os_data, "Empty OS status for OS %s?!" % os_name
1907
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1908
      _ErrorIf(not f_status, self.ENODEOS, node,
1909
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1910
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1911
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1912
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1913
      # this will catched in backend too
1914
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1915
               and not f_var, self.ENODEOS, node,
1916
               "OS %s with API at least %d does not declare any variant",
1917
               os_name, constants.OS_API_V15)
1918
      # comparisons with the 'base' image
1919
      test = os_name not in base.oslist
1920
      _ErrorIf(test, self.ENODEOS, node,
1921
               "Extra OS %s not present on reference node (%s)",
1922
               os_name, base.name)
1923
      if test:
1924
        continue
1925
      assert base.oslist[os_name], "Base node has empty OS status?"
1926
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1927
      if not b_status:
1928
        # base OS is invalid, skipping
1929
        continue
1930
      for kind, a, b in [("API version", f_api, b_api),
1931
                         ("variants list", f_var, b_var),
1932
                         ("parameters", f_param, b_param)]:
1933
        _ErrorIf(a != b, self.ENODEOS, node,
1934
                 "OS %s %s differs from reference node %s: %s vs. %s",
1935
                 kind, os_name, base.name,
1936
                 utils.CommaJoin(a), utils.CommaJoin(b))
1937

    
1938
    # check any missing OSes
1939
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1940
    _ErrorIf(missing, self.ENODEOS, node,
1941
             "OSes present on reference node %s but missing on this node: %s",
1942
             base.name, utils.CommaJoin(missing))
1943

    
1944
  def _VerifyOob(self, ninfo, nresult):
1945
    """Verifies out of band functionality of a node.
1946

1947
    @type ninfo: L{objects.Node}
1948
    @param ninfo: the node to check
1949
    @param nresult: the remote results for the node
1950

1951
    """
1952
    node = ninfo.name
1953
    # We just have to verify the paths on master and/or master candidates
1954
    # as the oob helper is invoked on the master
1955
    if ((ninfo.master_candidate or ninfo.master_capable) and
1956
        constants.NV_OOB_PATHS in nresult):
1957
      for path_result in nresult[constants.NV_OOB_PATHS]:
1958
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1959

    
1960
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1961
    """Verifies and updates the node volume data.
1962

1963
    This function will update a L{NodeImage}'s internal structures
1964
    with data from the remote call.
1965

1966
    @type ninfo: L{objects.Node}
1967
    @param ninfo: the node to check
1968
    @param nresult: the remote results for the node
1969
    @param nimg: the node image object
1970
    @param vg_name: the configured VG name
1971

1972
    """
1973
    node = ninfo.name
1974
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1975

    
1976
    nimg.lvm_fail = True
1977
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1978
    if vg_name is None:
1979
      pass
1980
    elif isinstance(lvdata, basestring):
1981
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1982
               utils.SafeEncode(lvdata))
1983
    elif not isinstance(lvdata, dict):
1984
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1985
    else:
1986
      nimg.volumes = lvdata
1987
      nimg.lvm_fail = False
1988

    
1989
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1990
    """Verifies and updates the node instance list.
1991

1992
    If the listing was successful, then updates this node's instance
1993
    list. Otherwise, it marks the RPC call as failed for the instance
1994
    list key.
1995

1996
    @type ninfo: L{objects.Node}
1997
    @param ninfo: the node to check
1998
    @param nresult: the remote results for the node
1999
    @param nimg: the node image object
2000

2001
    """
2002
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2003
    test = not isinstance(idata, list)
2004
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2005
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2006
    if test:
2007
      nimg.hyp_fail = True
2008
    else:
2009
      nimg.instances = idata
2010

    
2011
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2012
    """Verifies and computes a node information map
2013

2014
    @type ninfo: L{objects.Node}
2015
    @param ninfo: the node to check
2016
    @param nresult: the remote results for the node
2017
    @param nimg: the node image object
2018
    @param vg_name: the configured VG name
2019

2020
    """
2021
    node = ninfo.name
2022
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2023

    
2024
    # try to read free memory (from the hypervisor)
2025
    hv_info = nresult.get(constants.NV_HVINFO, None)
2026
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2027
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2028
    if not test:
2029
      try:
2030
        nimg.mfree = int(hv_info["memory_free"])
2031
      except (ValueError, TypeError):
2032
        _ErrorIf(True, self.ENODERPC, node,
2033
                 "node returned invalid nodeinfo, check hypervisor")
2034

    
2035
    # FIXME: devise a free space model for file based instances as well
2036
    if vg_name is not None:
2037
      test = (constants.NV_VGLIST not in nresult or
2038
              vg_name not in nresult[constants.NV_VGLIST])
2039
      _ErrorIf(test, self.ENODELVM, node,
2040
               "node didn't return data for the volume group '%s'"
2041
               " - it is either missing or broken", vg_name)
2042
      if not test:
2043
        try:
2044
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2045
        except (ValueError, TypeError):
2046
          _ErrorIf(True, self.ENODERPC, node,
2047
                   "node returned invalid LVM info, check LVM status")
2048

    
2049
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2050
    """Gets per-disk status information for all instances.
2051

2052
    @type nodelist: list of strings
2053
    @param nodelist: Node names
2054
    @type node_image: dict of (name, L{objects.Node})
2055
    @param node_image: Node objects
2056
    @type instanceinfo: dict of (name, L{objects.Instance})
2057
    @param instanceinfo: Instance objects
2058
    @rtype: {instance: {node: [(succes, payload)]}}
2059
    @return: a dictionary of per-instance dictionaries with nodes as
2060
        keys and disk information as values; the disk information is a
2061
        list of tuples (success, payload)
2062

2063
    """
2064
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2065

    
2066
    node_disks = {}
2067
    node_disks_devonly = {}
2068
    diskless_instances = set()
2069
    diskless = constants.DT_DISKLESS
2070

    
2071
    for nname in nodelist:
2072
      node_instances = list(itertools.chain(node_image[nname].pinst,
2073
                                            node_image[nname].sinst))
2074
      diskless_instances.update(inst for inst in node_instances
2075
                                if instanceinfo[inst].disk_template == diskless)
2076
      disks = [(inst, disk)
2077
               for inst in node_instances
2078
               for disk in instanceinfo[inst].disks]
2079

    
2080
      if not disks:
2081
        # No need to collect data
2082
        continue
2083

    
2084
      node_disks[nname] = disks
2085

    
2086
      # Creating copies as SetDiskID below will modify the objects and that can
2087
      # lead to incorrect data returned from nodes
2088
      devonly = [dev.Copy() for (_, dev) in disks]
2089

    
2090
      for dev in devonly:
2091
        self.cfg.SetDiskID(dev, nname)
2092

    
2093
      node_disks_devonly[nname] = devonly
2094

    
2095
    assert len(node_disks) == len(node_disks_devonly)
2096

    
2097
    # Collect data from all nodes with disks
2098
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2099
                                                          node_disks_devonly)
2100

    
2101
    assert len(result) == len(node_disks)
2102

    
2103
    instdisk = {}
2104

    
2105
    for (nname, nres) in result.items():
2106
      disks = node_disks[nname]
2107

    
2108
      if nres.offline:
2109
        # No data from this node
2110
        data = len(disks) * [(False, "node offline")]
2111
      else:
2112
        msg = nres.fail_msg
2113
        _ErrorIf(msg, self.ENODERPC, nname,
2114
                 "while getting disk information: %s", msg)
2115
        if msg:
2116
          # No data from this node
2117
          data = len(disks) * [(False, msg)]
2118
        else:
2119
          data = []
2120
          for idx, i in enumerate(nres.payload):
2121
            if isinstance(i, (tuple, list)) and len(i) == 2:
2122
              data.append(i)
2123
            else:
2124
              logging.warning("Invalid result from node %s, entry %d: %s",
2125
                              nname, idx, i)
2126
              data.append((False, "Invalid result from the remote node"))
2127

    
2128
      for ((inst, _), status) in zip(disks, data):
2129
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2130

    
2131
    # Add empty entries for diskless instances.
2132
    for inst in diskless_instances:
2133
      assert inst not in instdisk
2134
      instdisk[inst] = {}
2135

    
2136
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2137
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2138
                      compat.all(isinstance(s, (tuple, list)) and
2139
                                 len(s) == 2 for s in statuses)
2140
                      for inst, nnames in instdisk.items()
2141
                      for nname, statuses in nnames.items())
2142
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2143

    
2144
    return instdisk
2145

    
2146
  def _VerifyHVP(self, hvp_data):
2147
    """Verifies locally the syntax of the hypervisor parameters.
2148

2149
    """
2150
    for item, hv_name, hv_params in hvp_data:
2151
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2152
             (item, hv_name))
2153
      try:
2154
        hv_class = hypervisor.GetHypervisor(hv_name)
2155
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2156
        hv_class.CheckParameterSyntax(hv_params)
2157
      except errors.GenericError, err:
2158
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2159

    
2160
  def BuildHooksEnv(self):
2161
    """Build hooks env.
2162

2163
    Cluster-Verify hooks just ran in the post phase and their failure makes
2164
    the output be logged in the verify output and the verification to fail.
2165

2166
    """
2167
    cfg = self.cfg
2168

    
2169
    env = {
2170
      "CLUSTER_TAGS": " ".join(cfg.GetClusterInfo().GetTags())
2171
      }
2172

    
2173
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2174
               for node in cfg.GetAllNodesInfo().values())
2175

    
2176
    return env
2177

    
2178
  def BuildHooksNodes(self):
2179
    """Build hooks nodes.
2180

2181
    """
2182
    return ([], self.cfg.GetNodeList())
2183

    
2184
  def Exec(self, feedback_fn):
2185
    """Verify integrity of cluster, performing various test on nodes.
2186

2187
    """
2188
    # This method has too many local variables. pylint: disable-msg=R0914
2189
    self.bad = False
2190
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2191
    verbose = self.op.verbose
2192
    self._feedback_fn = feedback_fn
2193
    feedback_fn("* Verifying global settings")
2194
    for msg in self.cfg.VerifyConfig():
2195
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2196

    
2197
    # Check the cluster certificates
2198
    for cert_filename in constants.ALL_CERT_FILES:
2199
      (errcode, msg) = _VerifyCertificate(cert_filename)
2200
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2201

    
2202
    vg_name = self.cfg.GetVGName()
2203
    drbd_helper = self.cfg.GetDRBDHelper()
2204
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2205
    cluster = self.cfg.GetClusterInfo()
2206
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2207
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2208
    nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2209
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2210
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2211
                        for iname in instancelist)
2212
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2213
    i_non_redundant = [] # Non redundant instances
2214
    i_non_a_balanced = [] # Non auto-balanced instances
2215
    n_offline = 0 # Count of offline nodes
2216
    n_drained = 0 # Count of nodes being drained
2217
    node_vol_should = {}
2218

    
2219
    # FIXME: verify OS list
2220

    
2221
    # File verification
2222
    filemap = _ComputeAncillaryFiles(cluster, False)
2223

    
2224
    # do local checksums
2225
    master_node = self.master_node = self.cfg.GetMasterNode()
2226
    master_ip = self.cfg.GetMasterIP()
2227

    
2228
    # Compute the set of hypervisor parameters
2229
    hvp_data = []
2230
    for hv_name in hypervisors:
2231
      hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2232
    for os_name, os_hvp in cluster.os_hvp.items():
2233
      for hv_name, hv_params in os_hvp.items():
2234
        if not hv_params:
2235
          continue
2236
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2237
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
2238
    # TODO: collapse identical parameter values in a single one
2239
    for instance in instanceinfo.values():
2240
      if not instance.hvparams:
2241
        continue
2242
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2243
                       cluster.FillHV(instance)))
2244
    # and verify them locally
2245
    self._VerifyHVP(hvp_data)
2246

    
2247
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2248
    node_verify_param = {
2249
      constants.NV_FILELIST:
2250
        utils.UniqueSequence(filename
2251
                             for files in filemap
2252
                             for filename in files),
2253
      constants.NV_NODELIST: [node.name for node in nodeinfo
2254
                              if not node.offline],
2255
      constants.NV_HYPERVISOR: hypervisors,
2256
      constants.NV_HVPARAMS: hvp_data,
2257
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2258
                                  node.secondary_ip) for node in nodeinfo
2259
                                 if not node.offline],
2260
      constants.NV_INSTANCELIST: hypervisors,
2261
      constants.NV_VERSION: None,
2262
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2263
      constants.NV_NODESETUP: None,
2264
      constants.NV_TIME: None,
2265
      constants.NV_MASTERIP: (master_node, master_ip),
2266
      constants.NV_OSLIST: None,
2267
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2268
      }
2269

    
2270
    if vg_name is not None:
2271
      node_verify_param[constants.NV_VGLIST] = None
2272
      node_verify_param[constants.NV_LVLIST] = vg_name
2273
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2274
      node_verify_param[constants.NV_DRBDLIST] = None
2275

    
2276
    if drbd_helper:
2277
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2278

    
2279
    # Build our expected cluster state
2280
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2281
                                                 name=node.name,
2282
                                                 vm_capable=node.vm_capable))
2283
                      for node in nodeinfo)
2284

    
2285
    # Gather OOB paths
2286
    oob_paths = []
2287
    for node in nodeinfo:
2288
      path = _SupportsOob(self.cfg, node)
2289
      if path and path not in oob_paths:
2290
        oob_paths.append(path)
2291

    
2292
    if oob_paths:
2293
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2294

    
2295
    for instance in instancelist:
2296
      inst_config = instanceinfo[instance]
2297

    
2298
      for nname in inst_config.all_nodes:
2299
        if nname not in node_image:
2300
          # ghost node
2301
          gnode = self.NodeImage(name=nname)
2302
          gnode.ghost = True
2303
          node_image[nname] = gnode
2304

    
2305
      inst_config.MapLVsByNode(node_vol_should)
2306

    
2307
      pnode = inst_config.primary_node
2308
      node_image[pnode].pinst.append(instance)
2309

    
2310
      for snode in inst_config.secondary_nodes:
2311
        nimg = node_image[snode]
2312
        nimg.sinst.append(instance)
2313
        if pnode not in nimg.sbp:
2314
          nimg.sbp[pnode] = []
2315
        nimg.sbp[pnode].append(instance)
2316

    
2317
    # At this point, we have the in-memory data structures complete,
2318
    # except for the runtime information, which we'll gather next
2319

    
2320
    # Due to the way our RPC system works, exact response times cannot be
2321
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2322
    # time before and after executing the request, we can at least have a time
2323
    # window.
2324
    nvinfo_starttime = time.time()
2325
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2326
                                           self.cfg.GetClusterName())
2327
    nvinfo_endtime = time.time()
2328

    
2329
    all_drbd_map = self.cfg.ComputeDRBDMap()
2330

    
2331
    feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2332
    instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2333

    
2334
    feedback_fn("* Verifying configuration file consistency")
2335
    self._VerifyFiles(_ErrorIf, nodeinfo, master_node, all_nvinfo, filemap)
2336

    
2337
    feedback_fn("* Verifying node status")
2338

    
2339
    refos_img = None
2340

    
2341
    for node_i in nodeinfo:
2342
      node = node_i.name
2343
      nimg = node_image[node]
2344

    
2345
      if node_i.offline:
2346
        if verbose:
2347
          feedback_fn("* Skipping offline node %s" % (node,))
2348
        n_offline += 1
2349
        continue
2350

    
2351
      if node == master_node:
2352
        ntype = "master"
2353
      elif node_i.master_candidate:
2354
        ntype = "master candidate"
2355
      elif node_i.drained:
2356
        ntype = "drained"
2357
        n_drained += 1
2358
      else:
2359
        ntype = "regular"
2360
      if verbose:
2361
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2362

    
2363
      msg = all_nvinfo[node].fail_msg
2364
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2365
      if msg:
2366
        nimg.rpc_fail = True
2367
        continue
2368

    
2369
      nresult = all_nvinfo[node].payload
2370

    
2371
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2372
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2373
      self._VerifyNodeNetwork(node_i, nresult)
2374
      self._VerifyOob(node_i, nresult)
2375

    
2376
      if nimg.vm_capable:
2377
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2378
        self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2379
                             all_drbd_map)
2380

    
2381
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2382
        self._UpdateNodeInstances(node_i, nresult, nimg)
2383
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2384
        self._UpdateNodeOS(node_i, nresult, nimg)
2385
        if not nimg.os_fail:
2386
          if refos_img is None:
2387
            refos_img = nimg
2388
          self._VerifyNodeOS(node_i, nimg, refos_img)
2389

    
2390
    feedback_fn("* Verifying instance status")
2391
    for instance in instancelist:
2392
      if verbose:
2393
        feedback_fn("* Verifying instance %s" % instance)
2394
      inst_config = instanceinfo[instance]
2395
      self._VerifyInstance(instance, inst_config, node_image,
2396
                           instdisk[instance])
2397
      inst_nodes_offline = []
2398

    
2399
      pnode = inst_config.primary_node
2400
      pnode_img = node_image[pnode]
2401
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2402
               self.ENODERPC, pnode, "instance %s, connection to"
2403
               " primary node failed", instance)
2404

    
2405
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2406
               self.EINSTANCEBADNODE, instance,
2407
               "instance is marked as running and lives on offline node %s",
2408
               inst_config.primary_node)
2409

    
2410
      # If the instance is non-redundant we cannot survive losing its primary
2411
      # node, so we are not N+1 compliant. On the other hand we have no disk
2412
      # templates with more than one secondary so that situation is not well
2413
      # supported either.
2414
      # FIXME: does not support file-backed instances
2415
      if not inst_config.secondary_nodes:
2416
        i_non_redundant.append(instance)
2417

    
2418
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2419
               instance, "instance has multiple secondary nodes: %s",
2420
               utils.CommaJoin(inst_config.secondary_nodes),
2421
               code=self.ETYPE_WARNING)
2422

    
2423
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2424
        pnode = inst_config.primary_node
2425
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2426
        instance_groups = {}
2427

    
2428
        for node in instance_nodes:
2429
          instance_groups.setdefault(nodeinfo_byname[node].group,
2430
                                     []).append(node)
2431

    
2432
        pretty_list = [
2433
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2434
          # Sort so that we always list the primary node first.
2435
          for group, nodes in sorted(instance_groups.items(),
2436
                                     key=lambda (_, nodes): pnode in nodes,
2437
                                     reverse=True)]
2438

    
2439
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2440
                      instance, "instance has primary and secondary nodes in"
2441
                      " different groups: %s", utils.CommaJoin(pretty_list),
2442
                      code=self.ETYPE_WARNING)
2443

    
2444
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2445
        i_non_a_balanced.append(instance)
2446

    
2447
      for snode in inst_config.secondary_nodes:
2448
        s_img = node_image[snode]
2449
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2450
                 "instance %s, connection to secondary node failed", instance)
2451

    
2452
        if s_img.offline:
2453
          inst_nodes_offline.append(snode)
2454

    
2455
      # warn that the instance lives on offline nodes
2456
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2457
               "instance has offline secondary node(s) %s",
2458
               utils.CommaJoin(inst_nodes_offline))
2459
      # ... or ghost/non-vm_capable nodes
2460
      for node in inst_config.all_nodes:
2461
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2462
                 "instance lives on ghost node %s", node)
2463
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2464
                 instance, "instance lives on non-vm_capable node %s", node)
2465

    
2466
    feedback_fn("* Verifying orphan volumes")
2467
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2468
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2469

    
2470
    feedback_fn("* Verifying orphan instances")
2471
    self._VerifyOrphanInstances(instancelist, node_image)
2472

    
2473
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2474
      feedback_fn("* Verifying N+1 Memory redundancy")
2475
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2476

    
2477
    feedback_fn("* Other Notes")
2478
    if i_non_redundant:
2479
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2480
                  % len(i_non_redundant))
2481

    
2482
    if i_non_a_balanced:
2483
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2484
                  % len(i_non_a_balanced))
2485

    
2486
    if n_offline:
2487
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2488

    
2489
    if n_drained:
2490
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2491

    
2492
    return not self.bad
2493

    
2494
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2495
    """Analyze the post-hooks' result
2496

2497
    This method analyses the hook result, handles it, and sends some
2498
    nicely-formatted feedback back to the user.
2499

2500
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2501
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2502
    @param hooks_results: the results of the multi-node hooks rpc call
2503
    @param feedback_fn: function used send feedback back to the caller
2504
    @param lu_result: previous Exec result
2505
    @return: the new Exec result, based on the previous result
2506
        and hook results
2507

2508
    """
2509
    # We only really run POST phase hooks, and are only interested in
2510
    # their results
2511
    if phase == constants.HOOKS_PHASE_POST:
2512
      # Used to change hooks' output to proper indentation
2513
      feedback_fn("* Hooks Results")
2514
      assert hooks_results, "invalid result from hooks"
2515

    
2516
      for node_name in hooks_results:
2517
        res = hooks_results[node_name]
2518
        msg = res.fail_msg
2519
        test = msg and not res.offline
2520
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2521
                      "Communication failure in hooks execution: %s", msg)
2522
        if res.offline or msg:
2523
          # No need to investigate payload if node is offline or gave an error.
2524
          # override manually lu_result here as _ErrorIf only
2525
          # overrides self.bad
2526
          lu_result = 1
2527
          continue
2528
        for script, hkr, output in res.payload:
2529
          test = hkr == constants.HKR_FAIL
2530
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2531
                        "Script %s failed, output:", script)
2532
          if test:
2533
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2534
            feedback_fn("%s" % output)
2535
            lu_result = 0
2536

    
2537
      return lu_result
2538

    
2539

    
2540
class LUClusterVerifyDisks(NoHooksLU):
2541
  """Verifies the cluster disks status.
2542

2543
  """
2544
  REQ_BGL = False
2545

    
2546
  def ExpandNames(self):
2547
    self.needed_locks = {
2548
      locking.LEVEL_NODE: locking.ALL_SET,
2549
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2550
    }
2551
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2552

    
2553
  def Exec(self, feedback_fn):
2554
    """Verify integrity of cluster disks.
2555

2556
    @rtype: tuple of three items
2557
    @return: a tuple of (dict of node-to-node_error, list of instances
2558
        which need activate-disks, dict of instance: (node, volume) for
2559
        missing volumes
2560

2561
    """
2562
    result = res_nodes, res_instances, res_missing = {}, [], {}
2563

    
2564
    nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2565
    instances = self.cfg.GetAllInstancesInfo().values()
2566

    
2567
    nv_dict = {}
2568
    for inst in instances:
2569
      inst_lvs = {}
2570
      if not inst.admin_up:
2571
        continue
2572
      inst.MapLVsByNode(inst_lvs)
2573
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2574
      for node, vol_list in inst_lvs.iteritems():
2575
        for vol in vol_list:
2576
          nv_dict[(node, vol)] = inst
2577

    
2578
    if not nv_dict:
2579
      return result
2580

    
2581
    node_lvs = self.rpc.call_lv_list(nodes, [])
2582
    for node, node_res in node_lvs.items():
2583
      if node_res.offline:
2584
        continue
2585
      msg = node_res.fail_msg
2586
      if msg:
2587
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2588
        res_nodes[node] = msg
2589
        continue
2590

    
2591
      lvs = node_res.payload
2592
      for lv_name, (_, _, lv_online) in lvs.items():
2593
        inst = nv_dict.pop((node, lv_name), None)
2594
        if (not lv_online and inst is not None
2595
            and inst.name not in res_instances):
2596
          res_instances.append(inst.name)
2597

    
2598
    # any leftover items in nv_dict are missing LVs, let's arrange the
2599
    # data better
2600
    for key, inst in nv_dict.iteritems():
2601
      if inst.name not in res_missing:
2602
        res_missing[inst.name] = []
2603
      res_missing[inst.name].append(key)
2604

    
2605
    return result
2606

    
2607

    
2608
class LUClusterRepairDiskSizes(NoHooksLU):
2609
  """Verifies the cluster disks sizes.
2610

2611
  """
2612
  REQ_BGL = False
2613

    
2614
  def ExpandNames(self):
2615
    if self.op.instances:
2616
      self.wanted_names = []
2617
      for name in self.op.instances:
2618
        full_name = _ExpandInstanceName(self.cfg, name)
2619
        self.wanted_names.append(full_name)
2620
      self.needed_locks = {
2621
        locking.LEVEL_NODE: [],
2622
        locking.LEVEL_INSTANCE: self.wanted_names,
2623
        }
2624
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2625
    else:
2626
      self.wanted_names = None
2627
      self.needed_locks = {
2628
        locking.LEVEL_NODE: locking.ALL_SET,
2629
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2630
        }
2631
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2632

    
2633
  def DeclareLocks(self, level):
2634
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2635
      self._LockInstancesNodes(primary_only=True)
2636

    
2637
  def CheckPrereq(self):
2638
    """Check prerequisites.
2639

2640
    This only checks the optional instance list against the existing names.
2641

2642
    """
2643
    if self.wanted_names is None:
2644
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2645

    
2646
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2647
                             in self.wanted_names]
2648

    
2649
  def _EnsureChildSizes(self, disk):
2650
    """Ensure children of the disk have the needed disk size.
2651

2652
    This is valid mainly for DRBD8 and fixes an issue where the
2653
    children have smaller disk size.
2654

2655
    @param disk: an L{ganeti.objects.Disk} object
2656

2657
    """
2658
    if disk.dev_type == constants.LD_DRBD8:
2659
      assert disk.children, "Empty children for DRBD8?"
2660
      fchild = disk.children[0]
2661
      mismatch = fchild.size < disk.size
2662
      if mismatch:
2663
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2664
                     fchild.size, disk.size)
2665
        fchild.size = disk.size
2666

    
2667
      # and we recurse on this child only, not on the metadev
2668
      return self._EnsureChildSizes(fchild) or mismatch
2669
    else:
2670
      return False
2671

    
2672
  def Exec(self, feedback_fn):
2673
    """Verify the size of cluster disks.
2674

2675
    """
2676
    # TODO: check child disks too
2677
    # TODO: check differences in size between primary/secondary nodes
2678
    per_node_disks = {}
2679
    for instance in self.wanted_instances:
2680
      pnode = instance.primary_node
2681
      if pnode not in per_node_disks:
2682
        per_node_disks[pnode] = []
2683
      for idx, disk in enumerate(instance.disks):
2684
        per_node_disks[pnode].append((instance, idx, disk))
2685

    
2686
    changed = []
2687
    for node, dskl in per_node_disks.items():
2688
      newl = [v[2].Copy() for v in dskl]
2689
      for dsk in newl:
2690
        self.cfg.SetDiskID(dsk, node)
2691
      result = self.rpc.call_blockdev_getsize(node, newl)
2692
      if result.fail_msg:
2693
        self.LogWarning("Failure in blockdev_getsize call to node"
2694
                        " %s, ignoring", node)
2695
        continue
2696
      if len(result.payload) != len(dskl):
2697
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
2698
                        " result.payload=%s", node, len(dskl), result.payload)
2699
        self.LogWarning("Invalid result from node %s, ignoring node results",
2700
                        node)
2701
        continue
2702
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
2703
        if size is None:
2704
          self.LogWarning("Disk %d of instance %s did not return size"
2705
                          " information, ignoring", idx, instance.name)
2706
          continue
2707
        if not isinstance(size, (int, long)):
2708
          self.LogWarning("Disk %d of instance %s did not return valid"
2709
                          " size information, ignoring", idx, instance.name)
2710
          continue
2711
        size = size >> 20
2712
        if size != disk.size:
2713
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2714
                       " correcting: recorded %d, actual %d", idx,
2715
                       instance.name, disk.size, size)
2716
          disk.size = size
2717
          self.cfg.Update(instance, feedback_fn)
2718
          changed.append((instance.name, idx, size))
2719
        if self._EnsureChildSizes(disk):
2720
          self.cfg.Update(instance, feedback_fn)
2721
          changed.append((instance.name, idx, disk.size))
2722
    return changed
2723

    
2724

    
2725
class LUClusterRename(LogicalUnit):
2726
  """Rename the cluster.
2727

2728
  """
2729
  HPATH = "cluster-rename"
2730
  HTYPE = constants.HTYPE_CLUSTER
2731

    
2732
  def BuildHooksEnv(self):
2733
    """Build hooks env.
2734

2735
    """
2736
    return {
2737
      "OP_TARGET": self.cfg.GetClusterName(),
2738
      "NEW_NAME": self.op.name,
2739
      }
2740

    
2741
  def BuildHooksNodes(self):
2742
    """Build hooks nodes.
2743

2744
    """
2745
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2746

    
2747
  def CheckPrereq(self):
2748
    """Verify that the passed name is a valid one.
2749

2750
    """
2751
    hostname = netutils.GetHostname(name=self.op.name,
2752
                                    family=self.cfg.GetPrimaryIPFamily())
2753

    
2754
    new_name = hostname.name
2755
    self.ip = new_ip = hostname.ip
2756
    old_name = self.cfg.GetClusterName()
2757
    old_ip = self.cfg.GetMasterIP()
2758
    if new_name == old_name and new_ip == old_ip:
2759
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2760
                                 " cluster has changed",
2761
                                 errors.ECODE_INVAL)
2762
    if new_ip != old_ip:
2763
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2764
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2765
                                   " reachable on the network" %
2766
                                   new_ip, errors.ECODE_NOTUNIQUE)
2767

    
2768
    self.op.name = new_name
2769

    
2770
  def Exec(self, feedback_fn):
2771
    """Rename the cluster.
2772

2773
    """
2774
    clustername = self.op.name
2775
    ip = self.ip
2776

    
2777
    # shutdown the master IP
2778
    master = self.cfg.GetMasterNode()
2779
    result = self.rpc.call_node_stop_master(master, False)
2780
    result.Raise("Could not disable the master role")
2781

    
2782
    try:
2783
      cluster = self.cfg.GetClusterInfo()
2784
      cluster.cluster_name = clustername
2785
      cluster.master_ip = ip
2786
      self.cfg.Update(cluster, feedback_fn)
2787

    
2788
      # update the known hosts file
2789
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2790
      node_list = self.cfg.GetOnlineNodeList()
2791
      try:
2792
        node_list.remove(master)
2793
      except ValueError:
2794
        pass
2795
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2796
    finally:
2797
      result = self.rpc.call_node_start_master(master, False, False)
2798
      msg = result.fail_msg
2799
      if msg:
2800
        self.LogWarning("Could not re-enable the master role on"
2801
                        " the master, please restart manually: %s", msg)
2802

    
2803
    return clustername
2804

    
2805

    
2806
class LUClusterSetParams(LogicalUnit):
2807
  """Change the parameters of the cluster.
2808

2809
  """
2810
  HPATH = "cluster-modify"
2811
  HTYPE = constants.HTYPE_CLUSTER
2812
  REQ_BGL = False
2813

    
2814
  def CheckArguments(self):
2815
    """Check parameters
2816

2817
    """
2818
    if self.op.uid_pool:
2819
      uidpool.CheckUidPool(self.op.uid_pool)
2820

    
2821
    if self.op.add_uids:
2822
      uidpool.CheckUidPool(self.op.add_uids)
2823

    
2824
    if self.op.remove_uids:
2825
      uidpool.CheckUidPool(self.op.remove_uids)
2826

    
2827
  def ExpandNames(self):
2828
    # FIXME: in the future maybe other cluster params won't require checking on
2829
    # all nodes to be modified.
2830
    self.needed_locks = {
2831
      locking.LEVEL_NODE: locking.ALL_SET,
2832
    }
2833
    self.share_locks[locking.LEVEL_NODE] = 1
2834

    
2835
  def BuildHooksEnv(self):
2836
    """Build hooks env.
2837

2838
    """
2839
    return {
2840
      "OP_TARGET": self.cfg.GetClusterName(),
2841
      "NEW_VG_NAME": self.op.vg_name,
2842
      }
2843

    
2844
  def BuildHooksNodes(self):
2845
    """Build hooks nodes.
2846

2847
    """
2848
    mn = self.cfg.GetMasterNode()
2849
    return ([mn], [mn])
2850

    
2851
  def CheckPrereq(self):
2852
    """Check prerequisites.
2853

2854
    This checks whether the given params don't conflict and
2855
    if the given volume group is valid.
2856

2857
    """
2858
    if self.op.vg_name is not None and not self.op.vg_name:
2859
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2860
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2861
                                   " instances exist", errors.ECODE_INVAL)
2862

    
2863
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2864
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2865
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2866
                                   " drbd-based instances exist",
2867
                                   errors.ECODE_INVAL)
2868

    
2869
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2870

    
2871
    # if vg_name not None, checks given volume group on all nodes
2872
    if self.op.vg_name:
2873
      vglist = self.rpc.call_vg_list(node_list)
2874
      for node in node_list:
2875
        msg = vglist[node].fail_msg
2876
        if msg:
2877
          # ignoring down node
2878
          self.LogWarning("Error while gathering data on node %s"
2879
                          " (ignoring node): %s", node, msg)
2880
          continue
2881
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2882
                                              self.op.vg_name,
2883
                                              constants.MIN_VG_SIZE)
2884
        if vgstatus:
2885
          raise errors.OpPrereqError("Error on node '%s': %s" %
2886
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2887

    
2888
    if self.op.drbd_helper:
2889
      # checks given drbd helper on all nodes
2890
      helpers = self.rpc.call_drbd_helper(node_list)
2891
      for node in node_list:
2892
        ninfo = self.cfg.GetNodeInfo(node)
2893
        if ninfo.offline:
2894
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2895
          continue
2896
        msg = helpers[node].fail_msg
2897
        if msg:
2898
          raise errors.OpPrereqError("Error checking drbd helper on node"
2899
                                     " '%s': %s" % (node, msg),
2900
                                     errors.ECODE_ENVIRON)
2901
        node_helper = helpers[node].payload
2902
        if node_helper != self.op.drbd_helper:
2903
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2904
                                     (node, node_helper), errors.ECODE_ENVIRON)
2905

    
2906
    self.cluster = cluster = self.cfg.GetClusterInfo()
2907
    # validate params changes
2908
    if self.op.beparams:
2909
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2910
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2911

    
2912
    if self.op.ndparams:
2913
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2914
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2915

    
2916
      # TODO: we need a more general way to handle resetting
2917
      # cluster-level parameters to default values
2918
      if self.new_ndparams["oob_program"] == "":
2919
        self.new_ndparams["oob_program"] = \
2920
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
2921

    
2922
    if self.op.nicparams:
2923
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2924
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2925
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2926
      nic_errors = []
2927

    
2928
      # check all instances for consistency
2929
      for instance in self.cfg.GetAllInstancesInfo().values():
2930
        for nic_idx, nic in enumerate(instance.nics):
2931
          params_copy = copy.deepcopy(nic.nicparams)
2932
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2933

    
2934
          # check parameter syntax
2935
          try:
2936
            objects.NIC.CheckParameterSyntax(params_filled)
2937
          except errors.ConfigurationError, err:
2938
            nic_errors.append("Instance %s, nic/%d: %s" %
2939
                              (instance.name, nic_idx, err))
2940

    
2941
          # if we're moving instances to routed, check that they have an ip
2942
          target_mode = params_filled[constants.NIC_MODE]
2943
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2944
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2945
                              (instance.name, nic_idx))
2946
      if nic_errors:
2947
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2948
                                   "\n".join(nic_errors))
2949

    
2950
    # hypervisor list/parameters
2951
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2952
    if self.op.hvparams:
2953
      for hv_name, hv_dict in self.op.hvparams.items():
2954
        if hv_name not in self.new_hvparams:
2955
          self.new_hvparams[hv_name] = hv_dict
2956
        else:
2957
          self.new_hvparams[hv_name].update(hv_dict)
2958

    
2959
    # os hypervisor parameters
2960
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2961
    if self.op.os_hvp:
2962
      for os_name, hvs in self.op.os_hvp.items():
2963
        if os_name not in self.new_os_hvp:
2964
          self.new_os_hvp[os_name] = hvs
2965
        else:
2966
          for hv_name, hv_dict in hvs.items():
2967
            if hv_name not in self.new_os_hvp[os_name]:
2968
              self.new_os_hvp[os_name][hv_name] = hv_dict
2969
            else:
2970
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2971

    
2972
    # os parameters
2973
    self.new_osp = objects.FillDict(cluster.osparams, {})
2974
    if self.op.osparams:
2975
      for os_name, osp in self.op.osparams.items():
2976
        if os_name not in self.new_osp:
2977
          self.new_osp[os_name] = {}
2978

    
2979
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2980
                                                  use_none=True)
2981

    
2982
        if not self.new_osp[os_name]:
2983
          # we removed all parameters
2984
          del self.new_osp[os_name]
2985
        else:
2986
          # check the parameter validity (remote check)
2987
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2988
                         os_name, self.new_osp[os_name])
2989

    
2990
    # changes to the hypervisor list
2991
    if self.op.enabled_hypervisors is not None:
2992
      self.hv_list = self.op.enabled_hypervisors
2993
      for hv in self.hv_list:
2994
        # if the hypervisor doesn't already exist in the cluster
2995
        # hvparams, we initialize it to empty, and then (in both
2996
        # cases) we make sure to fill the defaults, as we might not
2997
        # have a complete defaults list if the hypervisor wasn't
2998
        # enabled before
2999
        if hv not in new_hvp:
3000
          new_hvp[hv] = {}
3001
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3002
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3003
    else:
3004
      self.hv_list = cluster.enabled_hypervisors
3005

    
3006
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3007
      # either the enabled list has changed, or the parameters have, validate
3008
      for hv_name, hv_params in self.new_hvparams.items():
3009
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3010
            (self.op.enabled_hypervisors and
3011
             hv_name in self.op.enabled_hypervisors)):
3012
          # either this is a new hypervisor, or its parameters have changed
3013
          hv_class = hypervisor.GetHypervisor(hv_name)
3014
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3015
          hv_class.CheckParameterSyntax(hv_params)
3016
          _CheckHVParams(self, node_list, hv_name, hv_params)
3017

    
3018
    if self.op.os_hvp:
3019
      # no need to check any newly-enabled hypervisors, since the
3020
      # defaults have already been checked in the above code-block
3021
      for os_name, os_hvp in self.new_os_hvp.items():
3022
        for hv_name, hv_params in os_hvp.items():
3023
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3024
          # we need to fill in the new os_hvp on top of the actual hv_p
3025
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3026
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3027
          hv_class = hypervisor.GetHypervisor(hv_name)
3028
          hv_class.CheckParameterSyntax(new_osp)
3029
          _CheckHVParams(self, node_list, hv_name, new_osp)
3030

    
3031
    if self.op.default_iallocator:
3032
      alloc_script = utils.FindFile(self.op.default_iallocator,
3033
                                    constants.IALLOCATOR_SEARCH_PATH,
3034
                                    os.path.isfile)
3035
      if alloc_script is None:
3036
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3037
                                   " specified" % self.op.default_iallocator,
3038
                                   errors.ECODE_INVAL)
3039

    
3040
  def Exec(self, feedback_fn):
3041
    """Change the parameters of the cluster.
3042

3043
    """
3044
    if self.op.vg_name is not None:
3045
      new_volume = self.op.vg_name
3046
      if not new_volume:
3047
        new_volume = None
3048
      if new_volume != self.cfg.GetVGName():
3049
        self.cfg.SetVGName(new_volume)
3050
      else:
3051
        feedback_fn("Cluster LVM configuration already in desired"
3052
                    " state, not changing")
3053
    if self.op.drbd_helper is not None:
3054
      new_helper = self.op.drbd_helper
3055
      if not new_helper:
3056
        new_helper = None
3057
      if new_helper != self.cfg.GetDRBDHelper():
3058
        self.cfg.SetDRBDHelper(new_helper)
3059
      else:
3060
        feedback_fn("Cluster DRBD helper already in desired state,"
3061
                    " not changing")
3062
    if self.op.hvparams:
3063
      self.cluster.hvparams = self.new_hvparams
3064
    if self.op.os_hvp:
3065
      self.cluster.os_hvp = self.new_os_hvp
3066
    if self.op.enabled_hypervisors is not None:
3067
      self.cluster.hvparams = self.new_hvparams
3068
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3069
    if self.op.beparams:
3070
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3071
    if self.op.nicparams:
3072
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3073
    if self.op.osparams:
3074
      self.cluster.osparams = self.new_osp
3075
    if self.op.ndparams:
3076
      self.cluster.ndparams = self.new_ndparams
3077

    
3078
    if self.op.candidate_pool_size is not None:
3079
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3080
      # we need to update the pool size here, otherwise the save will fail
3081
      _AdjustCandidatePool(self, [])
3082

    
3083
    if self.op.maintain_node_health is not None:
3084
      self.cluster.maintain_node_health = self.op.maintain_node_health
3085

    
3086
    if self.op.prealloc_wipe_disks is not None:
3087
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3088

    
3089
    if self.op.add_uids is not None:
3090
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3091

    
3092
    if self.op.remove_uids is not None:
3093
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3094

    
3095
    if self.op.uid_pool is not None:
3096
      self.cluster.uid_pool = self.op.uid_pool
3097

    
3098
    if self.op.default_iallocator is not None:
3099
      self.cluster.default_iallocator = self.op.default_iallocator
3100

    
3101
    if self.op.reserved_lvs is not None:
3102
      self.cluster.reserved_lvs = self.op.reserved_lvs
3103

    
3104
    def helper_os(aname, mods, desc):
3105
      desc += " OS list"
3106
      lst = getattr(self.cluster, aname)
3107
      for key, val in mods:
3108
        if key == constants.DDM_ADD:
3109
          if val in lst:
3110
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3111
          else:
3112
            lst.append(val)
3113
        elif key == constants.DDM_REMOVE:
3114
          if val in lst:
3115
            lst.remove(val)
3116
          else:
3117
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3118
        else:
3119
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3120

    
3121
    if self.op.hidden_os:
3122
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3123

    
3124
    if self.op.blacklisted_os:
3125
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3126

    
3127
    if self.op.master_netdev:
3128
      master = self.cfg.GetMasterNode()
3129
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3130
                  self.cluster.master_netdev)
3131
      result = self.rpc.call_node_stop_master(master, False)
3132
      result.Raise("Could not disable the master ip")
3133
      feedback_fn("Changing master_netdev from %s to %s" %
3134
                  (self.cluster.master_netdev, self.op.master_netdev))
3135
      self.cluster.master_netdev = self.op.master_netdev
3136

    
3137
    self.cfg.Update(self.cluster, feedback_fn)
3138

    
3139
    if self.op.master_netdev:
3140
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3141
                  self.op.master_netdev)
3142
      result = self.rpc.call_node_start_master(master, False, False)
3143
      if result.fail_msg:
3144
        self.LogWarning("Could not re-enable the master ip on"
3145
                        " the master, please restart manually: %s",
3146
                        result.fail_msg)
3147

    
3148

    
3149
def _UploadHelper(lu, nodes, fname):
3150
  """Helper for uploading a file and showing warnings.
3151

3152
  """
3153
  if os.path.exists(fname):
3154
    result = lu.rpc.call_upload_file(nodes, fname)
3155
    for to_node, to_result in result.items():
3156
      msg = to_result.fail_msg
3157
      if msg:
3158
        msg = ("Copy of file %s to node %s failed: %s" %
3159
               (fname, to_node, msg))
3160
        lu.proc.LogWarning(msg)
3161

    
3162

    
3163
def _ComputeAncillaryFiles(cluster, redist):
3164
  """Compute files external to Ganeti which need to be consistent.
3165

3166
  @type redist: boolean
3167
  @param redist: Whether to include files which need to be redistributed
3168

3169
  """
3170
  # Compute files for all nodes
3171
  files_all = set([
3172
    constants.SSH_KNOWN_HOSTS_FILE,
3173
    constants.CONFD_HMAC_KEY,
3174
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3175
    ])
3176

    
3177
  if not redist:
3178
    files_all.update(constants.ALL_CERT_FILES)
3179
    files_all.update(ssconf.SimpleStore().GetFileList())
3180

    
3181
  if cluster.modify_etc_hosts:
3182
    files_all.add(constants.ETC_HOSTS)
3183

    
3184
  # Files which must either exist on all nodes or on none
3185
  files_all_opt = set([
3186
    constants.RAPI_USERS_FILE,
3187
    ])
3188

    
3189
  # Files which should only be on master candidates
3190
  files_mc = set()
3191
  if not redist:
3192
    files_mc.add(constants.CLUSTER_CONF_FILE)
3193

    
3194
  # Files which should only be on VM-capable nodes
3195
  files_vm = set(filename
3196
    for hv_name in cluster.enabled_hypervisors
3197
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3198

    
3199
  # Filenames must be unique
3200
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3201
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3202
         "Found file listed in more than one file list"
3203

    
3204
  return (files_all, files_all_opt, files_mc, files_vm)
3205

    
3206

    
3207
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3208
  """Distribute additional files which are part of the cluster configuration.
3209

3210
  ConfigWriter takes care of distributing the config and ssconf files, but
3211
  there are more files which should be distributed to all nodes. This function
3212
  makes sure those are copied.
3213

3214
  @param lu: calling logical unit
3215
  @param additional_nodes: list of nodes not in the config to distribute to
3216
  @type additional_vm: boolean
3217
  @param additional_vm: whether the additional nodes are vm-capable or not
3218

3219
  """
3220
  # Gather target nodes
3221
  cluster = lu.cfg.GetClusterInfo()
3222
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3223

    
3224
  online_nodes = lu.cfg.GetOnlineNodeList()
3225
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3226

    
3227
  if additional_nodes is not None:
3228
    online_nodes.extend(additional_nodes)
3229
    if additional_vm:
3230
      vm_nodes.extend(additional_nodes)
3231

    
3232
  # Never distribute to master node
3233
  for nodelist in [online_nodes, vm_nodes]:
3234
    if master_info.name in nodelist:
3235
      nodelist.remove(master_info.name)
3236

    
3237
  # Gather file lists
3238
  (files_all, files_all_opt, files_mc, files_vm) = \
3239
    _ComputeAncillaryFiles(cluster, True)
3240

    
3241
  # Never re-distribute configuration file from here
3242
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3243
              constants.CLUSTER_CONF_FILE in files_vm)
3244
  assert not files_mc, "Master candidates not handled in this function"
3245

    
3246
  filemap = [
3247
    (online_nodes, files_all),
3248
    (online_nodes, files_all_opt),
3249
    (vm_nodes, files_vm),
3250
    ]
3251

    
3252
  # Upload the files
3253
  for (node_list, files) in filemap:
3254
    for fname in files:
3255
      _UploadHelper(lu, node_list, fname)
3256

    
3257

    
3258
class LUClusterRedistConf(NoHooksLU):
3259
  """Force the redistribution of cluster configuration.
3260

3261
  This is a very simple LU.
3262

3263
  """
3264
  REQ_BGL = False
3265

    
3266
  def ExpandNames(self):
3267
    self.needed_locks = {
3268
      locking.LEVEL_NODE: locking.ALL_SET,
3269
    }
3270
    self.share_locks[locking.LEVEL_NODE] = 1
3271

    
3272
  def Exec(self, feedback_fn):
3273
    """Redistribute the configuration.
3274

3275
    """
3276
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3277
    _RedistributeAncillaryFiles(self)
3278

    
3279

    
3280
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3281
  """Sleep and poll for an instance's disk to sync.
3282

3283
  """
3284
  if not instance.disks or disks is not None and not disks:
3285
    return True
3286

    
3287
  disks = _ExpandCheckDisks(instance, disks)
3288

    
3289
  if not oneshot:
3290
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3291

    
3292
  node = instance.primary_node
3293

    
3294
  for dev in disks:
3295
    lu.cfg.SetDiskID(dev, node)
3296

    
3297
  # TODO: Convert to utils.Retry
3298

    
3299
  retries = 0
3300
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3301
  while True:
3302
    max_time = 0
3303
    done = True
3304
    cumul_degraded = False
3305
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3306
    msg = rstats.fail_msg
3307
    if msg:
3308
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3309
      retries += 1
3310
      if retries >= 10:
3311
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3312
                                 " aborting." % node)
3313
      time.sleep(6)
3314
      continue
3315
    rstats = rstats.payload
3316
    retries = 0
3317
    for i, mstat in enumerate(rstats):
3318
      if mstat is None:
3319
        lu.LogWarning("Can't compute data for node %s/%s",
3320
                           node, disks[i].iv_name)
3321
        continue
3322

    
3323
      cumul_degraded = (cumul_degraded or
3324
                        (mstat.is_degraded and mstat.sync_percent is None))
3325
      if mstat.sync_percent is not None:
3326
        done = False
3327
        if mstat.estimated_time is not None:
3328
          rem_time = ("%s remaining (estimated)" %
3329
                      utils.FormatSeconds(mstat.estimated_time))
3330
          max_time = mstat.estimated_time
3331
        else:
3332
          rem_time = "no time estimate"
3333
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3334
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3335

    
3336
    # if we're done but degraded, let's do a few small retries, to
3337
    # make sure we see a stable and not transient situation; therefore
3338
    # we force restart of the loop
3339
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3340
      logging.info("Degraded disks found, %d retries left", degr_retries)
3341
      degr_retries -= 1
3342
      time.sleep(1)
3343
      continue
3344

    
3345
    if done or oneshot:
3346
      break
3347

    
3348
    time.sleep(min(60, max_time))
3349

    
3350
  if done:
3351
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3352
  return not cumul_degraded
3353

    
3354

    
3355
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3356
  """Check that mirrors are not degraded.
3357

3358
  The ldisk parameter, if True, will change the test from the
3359
  is_degraded attribute (which represents overall non-ok status for
3360
  the device(s)) to the ldisk (representing the local storage status).
3361

3362
  """
3363
  lu.cfg.SetDiskID(dev, node)
3364

    
3365
  result = True
3366

    
3367
  if on_primary or dev.AssembleOnSecondary():
3368
    rstats = lu.rpc.call_blockdev_find(node, dev)
3369
    msg = rstats.fail_msg
3370
    if msg:
3371
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3372
      result = False
3373
    elif not rstats.payload:
3374
      lu.LogWarning("Can't find disk on node %s", node)
3375
      result = False
3376
    else:
3377
      if ldisk:
3378
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3379
      else:
3380
        result = result and not rstats.payload.is_degraded
3381

    
3382
  if dev.children:
3383
    for child in dev.children:
3384
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3385

    
3386
  return result
3387

    
3388

    
3389
class LUOobCommand(NoHooksLU):
3390
  """Logical unit for OOB handling.
3391

3392
  """
3393
  REG_BGL = False
3394
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3395

    
3396
  def CheckPrereq(self):
3397
    """Check prerequisites.
3398

3399
    This checks:
3400
     - the node exists in the configuration
3401
     - OOB is supported
3402

3403
    Any errors are signaled by raising errors.OpPrereqError.
3404

3405
    """
3406
    self.nodes = []
3407
    self.master_node = self.cfg.GetMasterNode()
3408

    
3409
    assert self.op.power_delay >= 0.0
3410

    
3411
    if self.op.node_names:
3412
      if self.op.command in self._SKIP_MASTER:
3413
        if self.master_node in self.op.node_names:
3414
          master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3415
          master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3416

    
3417
          if master_oob_handler:
3418
            additional_text = ("Run '%s %s %s' if you want to operate on the"
3419
                               " master regardless") % (master_oob_handler,
3420
                                                        self.op.command,
3421
                                                        self.master_node)
3422
          else:
3423
            additional_text = "The master node does not support out-of-band"
3424

    
3425
          raise errors.OpPrereqError(("Operating on the master node %s is not"
3426
                                      " allowed for %s\n%s") %
3427
                                     (self.master_node, self.op.command,
3428
                                      additional_text), errors.ECODE_INVAL)
3429
    else:
3430
      self.op.node_names = self.cfg.GetNodeList()
3431
      if self.op.command in self._SKIP_MASTER:
3432
        self.op.node_names.remove(self.master_node)
3433

    
3434
    if self.op.command in self._SKIP_MASTER:
3435
      assert self.master_node not in self.op.node_names
3436

    
3437
    for node_name in self.op.node_names:
3438
      node = self.cfg.GetNodeInfo(node_name)
3439

    
3440
      if node is None:
3441
        raise errors.OpPrereqError("Node %s not found" % node_name,
3442
                                   errors.ECODE_NOENT)
3443
      else:
3444
        self.nodes.append(node)
3445

    
3446
      if (not self.op.ignore_status and
3447
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3448
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3449
                                    " not marked offline") % node_name,
3450
                                   errors.ECODE_STATE)
3451

    
3452
  def ExpandNames(self):
3453
    """Gather locks we need.
3454

3455
    """
3456
    if self.op.node_names:
3457
      self.op.node_names = [_ExpandNodeName(self.cfg, name)
3458
                            for name in self.op.node_names]
3459
      lock_names = self.op.node_names
3460
    else:
3461
      lock_names = locking.ALL_SET
3462

    
3463
    self.needed_locks = {
3464
      locking.LEVEL_NODE: lock_names,
3465
      }
3466

    
3467
  def Exec(self, feedback_fn):
3468
    """Execute OOB and return result if we expect any.
3469

3470
    """
3471
    master_node = self.master_node
3472
    ret = []
3473

    
3474
    for idx, node in enumerate(self.nodes):
3475
      node_entry = [(constants.RS_NORMAL, node.name)]
3476
      ret.append(node_entry)
3477

    
3478
      oob_program = _SupportsOob(self.cfg, node)
3479

    
3480
      if not oob_program:
3481
        node_entry.append((constants.RS_UNAVAIL, None))
3482
        continue
3483

    
3484
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3485
                   self.op.command, oob_program, node.name)
3486
      result = self.rpc.call_run_oob(master_node, oob_program,
3487
                                     self.op.command, node.name,
3488
                                     self.op.timeout)
3489

    
3490
      if result.fail_msg:
3491
        self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3492
                        node.name, result.fail_msg)
3493
        node_entry.append((constants.RS_NODATA, None))
3494
      else:
3495
        try:
3496
          self._CheckPayload(result)
3497
        except errors.OpExecError, err:
3498
          self.LogWarning("The payload returned by '%s' is not valid: %s",
3499
                          node.name, err)
3500
          node_entry.append((constants.RS_NODATA, None))
3501
        else:
3502
          if self.op.command == constants.OOB_HEALTH:
3503
            # For health we should log important events
3504
            for item, status in result.payload:
3505
              if status in [constants.OOB_STATUS_WARNING,
3506
                            constants.OOB_STATUS_CRITICAL]:
3507
                self.LogWarning("On node '%s' item '%s' has status '%s'",
3508
                                node.name, item, status)
3509

    
3510
          if self.op.command == constants.OOB_POWER_ON:
3511
            node.powered = True
3512
          elif self.op.command == constants.OOB_POWER_OFF:
3513
            node.powered = False
3514
          elif self.op.command == constants.OOB_POWER_STATUS:
3515
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3516
            if powered != node.powered:
3517
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3518
                               " match actual power state (%s)"), node.powered,
3519
                              node.name, powered)
3520

    
3521
          # For configuration changing commands we should update the node
3522
          if self.op.command in (constants.OOB_POWER_ON,
3523
                                 constants.OOB_POWER_OFF):
3524
            self.cfg.Update(node, feedback_fn)
3525

    
3526
          node_entry.append((constants.RS_NORMAL, result.payload))
3527

    
3528
          if (self.op.command == constants.OOB_POWER_ON and
3529
              idx < len(self.nodes) - 1):
3530
            time.sleep(self.op.power_delay)
3531

    
3532
    return ret
3533

    
3534
  def _CheckPayload(self, result):
3535
    """Checks if the payload is valid.
3536

3537
    @param result: RPC result
3538
    @raises errors.OpExecError: If payload is not valid
3539

3540
    """
3541
    errs = []
3542
    if self.op.command == constants.OOB_HEALTH:
3543
      if not isinstance(result.payload, list):
3544
        errs.append("command 'health' is expected to return a list but got %s" %
3545
                    type(result.payload))
3546
      else:
3547
        for item, status in result.payload:
3548
          if status not in constants.OOB_STATUSES:
3549
            errs.append("health item '%s' has invalid status '%s'" %
3550
                        (item, status))
3551

    
3552
    if self.op.command == constants.OOB_POWER_STATUS:
3553
      if not isinstance(result.payload, dict):
3554
        errs.append("power-status is expected to return a dict but got %s" %
3555
                    type(result.payload))
3556

    
3557
    if self.op.command in [
3558
        constants.OOB_POWER_ON,
3559
        constants.OOB_POWER_OFF,
3560
        constants.OOB_POWER_CYCLE,
3561
        ]:
3562
      if result.payload is not None:
3563
        errs.append("%s is expected to not return payload but got '%s'" %
3564
                    (self.op.command, result.payload))
3565

    
3566
    if errs:
3567
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3568
                               utils.CommaJoin(errs))
3569

    
3570
class _OsQuery(_QueryBase):
3571
  FIELDS = query.OS_FIELDS
3572

    
3573
  def ExpandNames(self, lu):
3574
    # Lock all nodes in shared mode
3575
    # Temporary removal of locks, should be reverted later
3576
    # TODO: reintroduce locks when they are lighter-weight
3577
    lu.needed_locks = {}
3578
    #self.share_locks[locking.LEVEL_NODE] = 1
3579
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3580

    
3581
    # The following variables interact with _QueryBase._GetNames
3582
    if self.names:
3583
      self.wanted = self.names
3584
    else:
3585
      self.wanted = locking.ALL_SET
3586

    
3587
    self.do_locking = self.use_locking
3588

    
3589
  def DeclareLocks(self, lu, level):
3590
    pass
3591

    
3592
  @staticmethod
3593
  def _DiagnoseByOS(rlist):
3594
    """Remaps a per-node return list into an a per-os per-node dictionary
3595

3596
    @param rlist: a map with node names as keys and OS objects as values
3597

3598
    @rtype: dict
3599
    @return: a dictionary with osnames as keys and as value another
3600
        map, with nodes as keys and tuples of (path, status, diagnose,
3601
        variants, parameters, api_versions) as values, eg::
3602

3603
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3604
                                     (/srv/..., False, "invalid api")],
3605
                           "node2": [(/srv/..., True, "", [], [])]}
3606
          }
3607

3608
    """
3609
    all_os = {}
3610
    # we build here the list of nodes that didn't fail the RPC (at RPC
3611
    # level), so that nodes with a non-responding node daemon don't
3612
    # make all OSes invalid
3613
    good_nodes = [node_name for node_name in rlist
3614
                  if not rlist[node_name].fail_msg]
3615
    for node_name, nr in rlist.items():
3616
      if nr.fail_msg or not nr.payload:
3617
        continue
3618
      for (name, path, status, diagnose, variants,
3619
           params, api_versions) in nr.payload:
3620
        if name not in all_os:
3621
          # build a list of nodes for this os containing empty lists
3622
          # for each node in node_list
3623
          all_os[name] = {}
3624
          for nname in good_nodes:
3625
            all_os[name][nname] = []
3626
        # convert params from [name, help] to (name, help)
3627
        params = [tuple(v) for v in params]
3628
        all_os[name][node_name].append((path, status, diagnose,
3629
                                        variants, params, api_versions))
3630
    return all_os
3631

    
3632
  def _GetQueryData(self, lu):
3633
    """Computes the list of nodes and their attributes.
3634

3635
    """
3636
    # Locking is not used
3637
    assert not (lu.acquired_locks or self.do_locking or self.use_locking)
3638

    
3639
    valid_nodes = [node.name
3640
                   for node in lu.cfg.GetAllNodesInfo().values()
3641
                   if not node.offline and node.vm_capable]
3642
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3643
    cluster = lu.cfg.GetClusterInfo()
3644

    
3645
    data = {}
3646

    
3647
    for (os_name, os_data) in pol.items():
3648
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3649
                          hidden=(os_name in cluster.hidden_os),
3650
                          blacklisted=(os_name in cluster.blacklisted_os))
3651

    
3652
      variants = set()
3653
      parameters = set()
3654
      api_versions = set()
3655

    
3656
      for idx, osl in enumerate(os_data.values()):
3657
        info.valid = bool(info.valid and osl and osl[0][1])
3658
        if not info.valid:
3659
          break
3660

    
3661
        (node_variants, node_params, node_api) = osl[0][3:6]
3662
        if idx == 0:
3663
          # First entry
3664
          variants.update(node_variants)
3665
          parameters.update(node_params)
3666
          api_versions.update(node_api)
3667
        else:
3668
          # Filter out inconsistent values
3669
          variants.intersection_update(node_variants)
3670
          parameters.intersection_update(node_params)
3671
          api_versions.intersection_update(node_api)
3672

    
3673
      info.variants = list(variants)
3674
      info.parameters = list(parameters)
3675
      info.api_versions = list(api_versions)
3676

    
3677
      data[os_name] = info
3678

    
3679
    # Prepare data in requested order
3680
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3681
            if name in data]
3682

    
3683

    
3684
class LUOsDiagnose(NoHooksLU):
3685
  """Logical unit for OS diagnose/query.
3686

3687
  """
3688
  REQ_BGL = False
3689

    
3690
  @staticmethod
3691
  def _BuildFilter(fields, names):
3692
    """Builds a filter for querying OSes.
3693

3694
    """
3695
    name_filter = qlang.MakeSimpleFilter("name", names)
3696

    
3697
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3698
    # respective field is not requested
3699
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3700
                     for fname in ["hidden", "blacklisted"]
3701
                     if fname not in fields]
3702
    if "valid" not in fields:
3703
      status_filter.append([qlang.OP_TRUE, "valid"])
3704

    
3705
    if status_filter:
3706
      status_filter.insert(0, qlang.OP_AND)
3707
    else:
3708
      status_filter = None
3709

    
3710
    if name_filter and status_filter:
3711
      return [qlang.OP_AND, name_filter, status_filter]
3712
    elif name_filter:
3713
      return name_filter
3714
    else:
3715
      return status_filter
3716

    
3717
  def CheckArguments(self):
3718
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3719
                       self.op.output_fields, False)
3720

    
3721
  def ExpandNames(self):
3722
    self.oq.ExpandNames(self)
3723

    
3724
  def Exec(self, feedback_fn):
3725
    return self.oq.OldStyleQuery(self)
3726

    
3727

    
3728
class LUNodeRemove(LogicalUnit):
3729
  """Logical unit for removing a node.
3730

3731
  """
3732
  HPATH = "node-remove"
3733
  HTYPE = constants.HTYPE_NODE
3734

    
3735
  def BuildHooksEnv(self):
3736
    """Build hooks env.
3737

3738
    This doesn't run on the target node in the pre phase as a failed
3739
    node would then be impossible to remove.
3740

3741
    """
3742
    return {
3743
      "OP_TARGET": self.op.node_name,
3744
      "NODE_NAME": self.op.node_name,
3745
      }
3746

    
3747
  def BuildHooksNodes(self):
3748
    """Build hooks nodes.
3749

3750
    """
3751
    all_nodes = self.cfg.GetNodeList()
3752
    try:
3753
      all_nodes.remove(self.op.node_name)
3754
    except ValueError:
3755
      logging.warning("Node '%s', which is about to be removed, was not found"
3756
                      " in the list of all nodes", self.op.node_name)
3757
    return (all_nodes, all_nodes)
3758

    
3759
  def CheckPrereq(self):
3760
    """Check prerequisites.
3761

3762
    This checks:
3763
     - the node exists in the configuration
3764
     - it does not have primary or secondary instances
3765
     - it's not the master
3766

3767
    Any errors are signaled by raising errors.OpPrereqError.
3768

3769
    """
3770
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3771
    node = self.cfg.GetNodeInfo(self.op.node_name)
3772
    assert node is not None
3773

    
3774
    instance_list = self.cfg.GetInstanceList()
3775

    
3776
    masternode = self.cfg.GetMasterNode()
3777
    if node.name == masternode:
3778
      raise errors.OpPrereqError("Node is the master node,"
3779
                                 " you need to failover first.",
3780
                                 errors.ECODE_INVAL)
3781

    
3782
    for instance_name in instance_list:
3783
      instance = self.cfg.GetInstanceInfo(instance_name)
3784
      if node.name in instance.all_nodes:
3785
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3786
                                   " please remove first." % instance_name,
3787
                                   errors.ECODE_INVAL)
3788
    self.op.node_name = node.name
3789
    self.node = node
3790

    
3791
  def Exec(self, feedback_fn):
3792
    """Removes the node from the cluster.
3793

3794
    """
3795
    node = self.node
3796
    logging.info("Stopping the node daemon and removing configs from node %s",
3797
                 node.name)
3798

    
3799
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3800

    
3801
    # Promote nodes to master candidate as needed
3802
    _AdjustCandidatePool(self, exceptions=[node.name])
3803
    self.context.RemoveNode(node.name)
3804

    
3805
    # Run post hooks on the node before it's removed
3806
    _RunPostHook(self, node.name)
3807

    
3808
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3809
    msg = result.fail_msg
3810
    if msg:
3811
      self.LogWarning("Errors encountered on the remote node while leaving"
3812
                      " the cluster: %s", msg)
3813

    
3814
    # Remove node from our /etc/hosts
3815
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3816
      master_node = self.cfg.GetMasterNode()
3817
      result = self.rpc.call_etc_hosts_modify(master_node,
3818
                                              constants.ETC_HOSTS_REMOVE,
3819
                                              node.name, None)
3820
      result.Raise("Can't update hosts file with new host data")
3821
      _RedistributeAncillaryFiles(self)
3822

    
3823

    
3824
class _NodeQuery(_QueryBase):
3825
  FIELDS = query.NODE_FIELDS
3826

    
3827
  def ExpandNames(self, lu):
3828
    lu.needed_locks = {}
3829
    lu.share_locks[locking.LEVEL_NODE] = 1
3830

    
3831
    if self.names:
3832
      self.wanted = _GetWantedNodes(lu, self.names)
3833
    else:
3834
      self.wanted = locking.ALL_SET
3835

    
3836
    self.do_locking = (self.use_locking and
3837
                       query.NQ_LIVE in self.requested_data)
3838

    
3839
    if self.do_locking:
3840
      # if we don't request only static fields, we need to lock the nodes
3841
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3842

    
3843
  def DeclareLocks(self, lu, level):
3844
    pass
3845

    
3846
  def _GetQueryData(self, lu):
3847
    """Computes the list of nodes and their attributes.
3848

3849
    """
3850
    all_info = lu.cfg.GetAllNodesInfo()
3851

    
3852
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3853

    
3854
    # Gather data as requested
3855
    if query.NQ_LIVE in self.requested_data:
3856
      # filter out non-vm_capable nodes
3857
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3858

    
3859
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3860
                                        lu.cfg.GetHypervisorType())
3861
      live_data = dict((name, nresult.payload)
3862
                       for (name, nresult) in node_data.items()
3863
                       if not nresult.fail_msg and nresult.payload)
3864
    else:
3865
      live_data = None
3866

    
3867
    if query.NQ_INST in self.requested_data:
3868
      node_to_primary = dict([(name, set()) for name in nodenames])
3869
      node_to_secondary = dict([(name, set()) for name in nodenames])
3870

    
3871
      inst_data = lu.cfg.GetAllInstancesInfo()
3872

    
3873
      for inst in inst_data.values():
3874
        if inst.primary_node in node_to_primary:
3875
          node_to_primary[inst.primary_node].add(inst.name)
3876
        for secnode in inst.secondary_nodes:
3877
          if secnode in node_to_secondary:
3878
            node_to_secondary[secnode].add(inst.name)
3879
    else:
3880
      node_to_primary = None
3881
      node_to_secondary = None
3882

    
3883
    if query.NQ_OOB in self.requested_data:
3884
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3885
                         for name, node in all_info.iteritems())
3886
    else:
3887
      oob_support = None
3888

    
3889
    if query.NQ_GROUP in self.requested_data:
3890
      groups = lu.cfg.GetAllNodeGroupsInfo()
3891
    else:
3892
      groups = {}
3893

    
3894
    return query.NodeQueryData([all_info[name] for name in nodenames],
3895
                               live_data, lu.cfg.GetMasterNode(),
3896
                               node_to_primary, node_to_secondary, groups,
3897
                               oob_support, lu.cfg.GetClusterInfo())
3898

    
3899

    
3900
class LUNodeQuery(NoHooksLU):
3901
  """Logical unit for querying nodes.
3902

3903
  """
3904
  # pylint: disable-msg=W0142
3905
  REQ_BGL = False
3906

    
3907
  def CheckArguments(self):
3908
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3909
                         self.op.output_fields, self.op.use_locking)
3910

    
3911
  def ExpandNames(self):
3912
    self.nq.ExpandNames(self)
3913

    
3914
  def Exec(self, feedback_fn):
3915
    return self.nq.OldStyleQuery(self)
3916

    
3917

    
3918
class LUNodeQueryvols(NoHooksLU):
3919
  """Logical unit for getting volumes on node(s).
3920

3921
  """
3922
  REQ_BGL = False
3923
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3924
  _FIELDS_STATIC = utils.FieldSet("node")
3925

    
3926
  def CheckArguments(self):
3927
    _CheckOutputFields(static=self._FIELDS_STATIC,
3928
                       dynamic=self._FIELDS_DYNAMIC,
3929
                       selected=self.op.output_fields)
3930

    
3931
  def ExpandNames(self):
3932
    self.needed_locks = {}
3933
    self.share_locks[locking.LEVEL_NODE] = 1
3934
    if not self.op.nodes:
3935
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3936
    else:
3937
      self.needed_locks[locking.LEVEL_NODE] = \
3938
        _GetWantedNodes(self, self.op.nodes)
3939

    
3940
  def Exec(self, feedback_fn):
3941
    """Computes the list of nodes and their attributes.
3942

3943
    """
3944
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3945
    volumes = self.rpc.call_node_volumes(nodenames)
3946

    
3947
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3948
             in self.cfg.GetInstanceList()]
3949

    
3950
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3951

    
3952
    output = []
3953
    for node in nodenames:
3954
      nresult = volumes[node]
3955
      if nresult.offline:
3956
        continue
3957
      msg = nresult.fail_msg
3958
      if msg:
3959
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3960
        continue
3961

    
3962
      node_vols = nresult.payload[:]
3963
      node_vols.sort(key=lambda vol: vol['dev'])
3964

    
3965
      for vol in node_vols:
3966
        node_output = []
3967
        for field in self.op.output_fields:
3968
          if field == "node":
3969
            val = node
3970
          elif field == "phys":
3971
            val = vol['dev']
3972
          elif field == "vg":
3973
            val = vol['vg']
3974
          elif field == "name":
3975
            val = vol['name']
3976
          elif field == "size":
3977
            val = int(float(vol['size']))
3978
          elif field == "instance":
3979
            for inst in ilist:
3980
              if node not in lv_by_node[inst]:
3981
                continue
3982
              if vol['name'] in lv_by_node[inst][node]:
3983
                val = inst.name
3984
                break
3985
            else:
3986
              val = '-'
3987
          else:
3988
            raise errors.ParameterError(field)
3989
          node_output.append(str(val))
3990

    
3991
        output.append(node_output)
3992

    
3993
    return output
3994

    
3995

    
3996
class LUNodeQueryStorage(NoHooksLU):
3997
  """Logical unit for getting information on storage units on node(s).
3998

3999
  """
4000
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4001
  REQ_BGL = False
4002

    
4003
  def CheckArguments(self):
4004
    _CheckOutputFields(static=self._FIELDS_STATIC,
4005
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4006
                       selected=self.op.output_fields)
4007

    
4008
  def ExpandNames(self):
4009
    self.needed_locks = {}
4010
    self.share_locks[locking.LEVEL_NODE] = 1
4011

    
4012
    if self.op.nodes:
4013
      self.needed_locks[locking.LEVEL_NODE] = \
4014
        _GetWantedNodes(self, self.op.nodes)
4015
    else:
4016
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4017

    
4018
  def Exec(self, feedback_fn):
4019
    """Computes the list of nodes and their attributes.
4020

4021
    """
4022
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
4023

    
4024
    # Always get name to sort by
4025
    if constants.SF_NAME in self.op.output_fields:
4026
      fields = self.op.output_fields[:]
4027
    else:
4028
      fields = [constants.SF_NAME] + self.op.output_fields
4029

    
4030
    # Never ask for node or type as it's only known to the LU
4031
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4032
      while extra in fields:
4033
        fields.remove(extra)
4034

    
4035
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4036
    name_idx = field_idx[constants.SF_NAME]
4037

    
4038
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4039
    data = self.rpc.call_storage_list(self.nodes,
4040
                                      self.op.storage_type, st_args,
4041
                                      self.op.name, fields)
4042

    
4043
    result = []
4044

    
4045
    for node in utils.NiceSort(self.nodes):
4046
      nresult = data[node]
4047
      if nresult.offline:
4048
        continue
4049

    
4050
      msg = nresult.fail_msg
4051
      if msg:
4052
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4053
        continue
4054

    
4055
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4056

    
4057
      for name in utils.NiceSort(rows.keys()):
4058
        row = rows[name]
4059

    
4060
        out = []
4061

    
4062
        for field in self.op.output_fields:
4063
          if field == constants.SF_NODE:
4064
            val = node
4065
          elif field == constants.SF_TYPE:
4066
            val = self.op.storage_type
4067
          elif field in field_idx:
4068
            val = row[field_idx[field]]
4069
          else:
4070
            raise errors.ParameterError(field)
4071

    
4072
          out.append(val)
4073

    
4074
        result.append(out)
4075

    
4076
    return result
4077

    
4078

    
4079
class _InstanceQuery(_QueryBase):
4080
  FIELDS = query.INSTANCE_FIELDS
4081

    
4082
  def ExpandNames(self, lu):
4083
    lu.needed_locks = {}
4084
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
4085
    lu.share_locks[locking.LEVEL_NODE] = 1
4086

    
4087
    if self.names:
4088
      self.wanted = _GetWantedInstances(lu, self.names)
4089
    else:
4090
      self.wanted = locking.ALL_SET
4091

    
4092
    self.do_locking = (self.use_locking and
4093
                       query.IQ_LIVE in self.requested_data)
4094
    if self.do_locking:
4095
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4096
      lu.needed_locks[locking.LEVEL_NODE] = []
4097
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4098

    
4099
  def DeclareLocks(self, lu, level):
4100
    if level == locking.LEVEL_NODE and self.do_locking:
4101
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
4102

    
4103
  def _GetQueryData(self, lu):
4104
    """Computes the list of instances and their attributes.
4105

4106
    """
4107
    cluster = lu.cfg.GetClusterInfo()
4108
    all_info = lu.cfg.GetAllInstancesInfo()
4109

    
4110
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4111

    
4112
    instance_list = [all_info[name] for name in instance_names]
4113
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4114
                                        for inst in instance_list)))
4115
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4116
    bad_nodes = []
4117
    offline_nodes = []
4118
    wrongnode_inst = set()
4119

    
4120
    # Gather data as requested
4121
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4122
      live_data = {}
4123
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4124
      for name in nodes:
4125
        result = node_data[name]
4126
        if result.offline:
4127
          # offline nodes will be in both lists
4128
          assert result.fail_msg
4129
          offline_nodes.append(name)
4130
        if result.fail_msg:
4131
          bad_nodes.append(name)
4132
        elif result.payload:
4133
          for inst in result.payload:
4134
            if inst in all_info:
4135
              if all_info[inst].primary_node == name:
4136
                live_data.update(result.payload)
4137
              else:
4138
                wrongnode_inst.add(inst)
4139
            else:
4140
              # orphan instance; we don't list it here as we don't
4141
              # handle this case yet in the output of instance listing
4142
              logging.warning("Orphan instance '%s' found on node %s",
4143
                              inst, name)
4144
        # else no instance is alive
4145
    else:
4146
      live_data = {}
4147

    
4148
    if query.IQ_DISKUSAGE in self.requested_data:
4149
      disk_usage = dict((inst.name,
4150
                         _ComputeDiskSize(inst.disk_template,
4151
                                          [{constants.IDISK_SIZE: disk.size}
4152
                                           for disk in inst.disks]))
4153
                        for inst in instance_list)
4154
    else:
4155
      disk_usage = None
4156

    
4157
    if query.IQ_CONSOLE in self.requested_data:
4158
      consinfo = {}
4159
      for inst in instance_list:
4160
        if inst.name in live_data:
4161
          # Instance is running
4162
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4163
        else:
4164
          consinfo[inst.name] = None
4165
      assert set(consinfo.keys()) == set(instance_names)
4166
    else:
4167
      consinfo = None
4168

    
4169
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4170
                                   disk_usage, offline_nodes, bad_nodes,
4171
                                   live_data, wrongnode_inst, consinfo)
4172

    
4173

    
4174
class LUQuery(NoHooksLU):
4175
  """Query for resources/items of a certain kind.
4176

4177
  """
4178
  # pylint: disable-msg=W0142
4179
  REQ_BGL = False
4180

    
4181
  def CheckArguments(self):
4182
    qcls = _GetQueryImplementation(self.op.what)
4183

    
4184
    self.impl = qcls(self.op.filter, self.op.fields, False)
4185

    
4186
  def ExpandNames(self):
4187
    self.impl.ExpandNames(self)
4188

    
4189
  def DeclareLocks(self, level):
4190
    self.impl.DeclareLocks(self, level)
4191

    
4192
  def Exec(self, feedback_fn):
4193
    return self.impl.NewStyleQuery(self)
4194

    
4195

    
4196
class LUQueryFields(NoHooksLU):
4197
  """Query for resources/items of a certain kind.
4198

4199
  """
4200
  # pylint: disable-msg=W0142
4201
  REQ_BGL = False
4202

    
4203
  def CheckArguments(self):
4204
    self.qcls = _GetQueryImplementation(self.op.what)
4205

    
4206
  def ExpandNames(self):
4207
    self.needed_locks = {}
4208

    
4209
  def Exec(self, feedback_fn):
4210
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4211

    
4212

    
4213
class LUNodeModifyStorage(NoHooksLU):
4214
  """Logical unit for modifying a storage volume on a node.
4215

4216
  """
4217
  REQ_BGL = False
4218

    
4219
  def CheckArguments(self):
4220
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4221

    
4222
    storage_type = self.op.storage_type
4223

    
4224
    try:
4225
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4226
    except KeyError:
4227
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4228
                                 " modified" % storage_type,
4229
                                 errors.ECODE_INVAL)
4230

    
4231
    diff = set(self.op.changes.keys()) - modifiable
4232
    if diff:
4233
      raise errors.OpPrereqError("The following fields can not be modified for"
4234
                                 " storage units of type '%s': %r" %
4235
                                 (storage_type, list(diff)),
4236
                                 errors.ECODE_INVAL)
4237

    
4238
  def ExpandNames(self):
4239
    self.needed_locks = {
4240
      locking.LEVEL_NODE: self.op.node_name,
4241
      }
4242

    
4243
  def Exec(self, feedback_fn):
4244
    """Computes the list of nodes and their attributes.
4245

4246
    """
4247
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4248
    result = self.rpc.call_storage_modify(self.op.node_name,
4249
                                          self.op.storage_type, st_args,
4250
                                          self.op.name, self.op.changes)
4251
    result.Raise("Failed to modify storage unit '%s' on %s" %
4252
                 (self.op.name, self.op.node_name))
4253

    
4254

    
4255
class LUNodeAdd(LogicalUnit):
4256
  """Logical unit for adding node to the cluster.
4257

4258
  """
4259
  HPATH = "node-add"
4260
  HTYPE = constants.HTYPE_NODE
4261
  _NFLAGS = ["master_capable", "vm_capable"]
4262

    
4263
  def CheckArguments(self):
4264
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4265
    # validate/normalize the node name
4266
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4267
                                         family=self.primary_ip_family)
4268
    self.op.node_name = self.hostname.name
4269
    if self.op.readd and self.op.group:
4270
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4271
                                 " being readded", errors.ECODE_INVAL)
4272

    
4273
  def BuildHooksEnv(self):
4274
    """Build hooks env.
4275

4276
    This will run on all nodes before, and on all nodes + the new node after.
4277

4278
    """
4279
    return {
4280
      "OP_TARGET": self.op.node_name,
4281
      "NODE_NAME": self.op.node_name,
4282
      "NODE_PIP": self.op.primary_ip,
4283
      "NODE_SIP": self.op.secondary_ip,
4284
      "MASTER_CAPABLE": str(self.op.master_capable),
4285
      "VM_CAPABLE": str(self.op.vm_capable),
4286
      }
4287

    
4288
  def BuildHooksNodes(self):
4289
    """Build hooks nodes.
4290

4291
    """
4292
    # Exclude added node
4293
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4294
    post_nodes = pre_nodes + [self.op.node_name, ]
4295

    
4296
    return (pre_nodes, post_nodes)
4297

    
4298
  def CheckPrereq(self):
4299
    """Check prerequisites.
4300

4301
    This checks:
4302
     - the new node is not already in the config
4303
     - it is resolvable
4304
     - its parameters (single/dual homed) matches the cluster
4305

4306
    Any errors are signaled by raising errors.OpPrereqError.
4307

4308
    """
4309
    cfg = self.cfg
4310
    hostname = self.hostname
4311
    node = hostname.name
4312
    primary_ip = self.op.primary_ip = hostname.ip
4313
    if self.op.secondary_ip is None:
4314
      if self.primary_ip_family == netutils.IP6Address.family:
4315
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4316
                                   " IPv4 address must be given as secondary",
4317
                                   errors.ECODE_INVAL)
4318
      self.op.secondary_ip = primary_ip
4319

    
4320
    secondary_ip = self.op.secondary_ip
4321
    if not netutils.IP4Address.IsValid(secondary_ip):
4322
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4323
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4324

    
4325
    node_list = cfg.GetNodeList()
4326
    if not self.op.readd and node in node_list:
4327
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4328
                                 node, errors.ECODE_EXISTS)
4329
    elif self.op.readd and node not in node_list:
4330
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4331
                                 errors.ECODE_NOENT)
4332

    
4333
    self.changed_primary_ip = False
4334

    
4335
    for existing_node_name in node_list:
4336
      existing_node = cfg.GetNodeInfo(existing_node_name)
4337

    
4338
      if self.op.readd and node == existing_node_name:
4339
        if existing_node.secondary_ip != secondary_ip:
4340
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4341
                                     " address configuration as before",
4342
                                     errors.ECODE_INVAL)
4343
        if existing_node.primary_ip != primary_ip:
4344
          self.changed_primary_ip = True
4345

    
4346
        continue
4347

    
4348
      if (existing_node.primary_ip == primary_ip or
4349
          existing_node.secondary_ip == primary_ip or
4350
          existing_node.primary_ip == secondary_ip or
4351
          existing_node.secondary_ip == secondary_ip):
4352
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4353
                                   " existing node %s" % existing_node.name,
4354
                                   errors.ECODE_NOTUNIQUE)
4355

    
4356
    # After this 'if' block, None is no longer a valid value for the
4357
    # _capable op attributes
4358
    if self.op.readd:
4359
      old_node = self.cfg.GetNodeInfo(node)
4360
      assert old_node is not None, "Can't retrieve locked node %s" % node
4361
      for attr in self._NFLAGS:
4362
        if getattr(self.op, attr) is None:
4363
          setattr(self.op, attr, getattr(old_node, attr))
4364
    else:
4365
      for attr in self._NFLAGS:
4366
        if getattr(self.op, attr) is None:
4367
          setattr(self.op, attr, True)
4368

    
4369
    if self.op.readd and not self.op.vm_capable:
4370
      pri, sec = cfg.GetNodeInstances(node)
4371
      if pri or sec:
4372
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4373
                                   " flag set to false, but it already holds"
4374
                                   " instances" % node,
4375
                                   errors.ECODE_STATE)
4376

    
4377
    # check that the type of the node (single versus dual homed) is the
4378
    # same as for the master
4379
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4380
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4381
    newbie_singlehomed = secondary_ip == primary_ip
4382
    if master_singlehomed != newbie_singlehomed:
4383
      if master_singlehomed:
4384
        raise errors.OpPrereqError("The master has no secondary ip but the"
4385
                                   " new node has one",
4386
                                   errors.ECODE_INVAL)
4387
      else:
4388
        raise errors.OpPrereqError("The master has a secondary ip but the"
4389
                                   " new node doesn't have one",
4390
                                   errors.ECODE_INVAL)
4391

    
4392
    # checks reachability
4393
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4394
      raise errors.OpPrereqError("Node not reachable by ping",
4395
                                 errors.ECODE_ENVIRON)
4396

    
4397
    if not newbie_singlehomed:
4398
      # check reachability from my secondary ip to newbie's secondary ip
4399
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4400
                           source=myself.secondary_ip):
4401
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4402
                                   " based ping to node daemon port",
4403
                                   errors.ECODE_ENVIRON)
4404

    
4405
    if self.op.readd:
4406
      exceptions = [node]
4407
    else:
4408
      exceptions = []
4409

    
4410
    if self.op.master_capable:
4411
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4412
    else:
4413
      self.master_candidate = False
4414

    
4415
    if self.op.readd:
4416
      self.new_node = old_node
4417
    else:
4418
      node_group = cfg.LookupNodeGroup(self.op.group)
4419
      self.new_node = objects.Node(name=node,
4420
                                   primary_ip=primary_ip,
4421
                                   secondary_ip=secondary_ip,
4422
                                   master_candidate=self.master_candidate,
4423
                                   offline=False, drained=False,
4424
                                   group=node_group)
4425

    
4426
    if self.op.ndparams:
4427
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4428

    
4429
  def Exec(self, feedback_fn):
4430
    """Adds the new node to the cluster.
4431

4432
    """
4433
    new_node = self.new_node
4434
    node = new_node.name
4435

    
4436
    # We adding a new node so we assume it's powered
4437
    new_node.powered = True
4438

    
4439
    # for re-adds, reset the offline/drained/master-candidate flags;
4440
    # we need to reset here, otherwise offline would prevent RPC calls
4441
    # later in the procedure; this also means that if the re-add
4442
    # fails, we are left with a non-offlined, broken node
4443
    if self.op.readd:
4444
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4445
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4446
      # if we demote the node, we do cleanup later in the procedure
4447
      new_node.master_candidate = self.master_candidate
4448
      if self.changed_primary_ip:
4449
        new_node.primary_ip = self.op.primary_ip
4450

    
4451
    # copy the master/vm_capable flags
4452
    for attr in self._NFLAGS:
4453
      setattr(new_node, attr, getattr(self.op, attr))
4454

    
4455
    # notify the user about any possible mc promotion
4456
    if new_node.master_candidate:
4457
      self.LogInfo("Node will be a master candidate")
4458

    
4459
    if self.op.ndparams:
4460
      new_node.ndparams = self.op.ndparams
4461
    else:
4462
      new_node.ndparams = {}
4463

    
4464
    # check connectivity
4465
    result = self.rpc.call_version([node])[node]
4466
    result.Raise("Can't get version information from node %s" % node)
4467
    if constants.PROTOCOL_VERSION == result.payload:
4468
      logging.info("Communication to node %s fine, sw version %s match",
4469
                   node, result.payload)
4470
    else:
4471
      raise errors.OpExecError("Version mismatch master version %s,"
4472
                               " node version %s" %
4473
                               (constants.PROTOCOL_VERSION, result.payload))
4474

    
4475
    # Add node to our /etc/hosts, and add key to known_hosts
4476
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4477
      master_node = self.cfg.GetMasterNode()
4478
      result = self.rpc.call_etc_hosts_modify(master_node,
4479
                                              constants.ETC_HOSTS_ADD,
4480
                                              self.hostname.name,
4481
                                              self.hostname.ip)
4482
      result.Raise("Can't update hosts file with new host data")
4483

    
4484
    if new_node.secondary_ip != new_node.primary_ip:
4485
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4486
                               False)
4487

    
4488
    node_verify_list = [self.cfg.GetMasterNode()]
4489
    node_verify_param = {
4490
      constants.NV_NODELIST: [node],
4491
      # TODO: do a node-net-test as well?
4492
    }
4493

    
4494
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4495
                                       self.cfg.GetClusterName())
4496
    for verifier in node_verify_list:
4497
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4498
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4499
      if nl_payload:
4500
        for failed in nl_payload:
4501
          feedback_fn("ssh/hostname verification failed"
4502
                      " (checking from %s): %s" %
4503
                      (verifier, nl_payload[failed]))
4504
        raise errors.OpExecError("ssh/hostname verification failed.")
4505

    
4506
    if self.op.readd:
4507
      _RedistributeAncillaryFiles(self)
4508
      self.context.ReaddNode(new_node)
4509
      # make sure we redistribute the config
4510
      self.cfg.Update(new_node, feedback_fn)
4511
      # and make sure the new node will not have old files around
4512
      if not new_node.master_candidate:
4513
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4514
        msg = result.fail_msg
4515
        if msg:
4516
          self.LogWarning("Node failed to demote itself from master"
4517
                          " candidate status: %s" % msg)
4518
    else:
4519
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4520
                                  additional_vm=self.op.vm_capable)
4521
      self.context.AddNode(new_node, self.proc.GetECId())
4522

    
4523

    
4524
class LUNodeSetParams(LogicalUnit):
4525
  """Modifies the parameters of a node.
4526

4527
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4528
      to the node role (as _ROLE_*)
4529
  @cvar _R2F: a dictionary from node role to tuples of flags
4530
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4531

4532
  """
4533
  HPATH = "node-modify"
4534
  HTYPE = constants.HTYPE_NODE
4535
  REQ_BGL = False
4536
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4537
  _F2R = {
4538
    (True, False, False): _ROLE_CANDIDATE,
4539
    (False, True, False): _ROLE_DRAINED,
4540
    (False, False, True): _ROLE_OFFLINE,
4541
    (False, False, False): _ROLE_REGULAR,
4542
    }
4543
  _R2F = dict((v, k) for k, v in _F2R.items())
4544
  _FLAGS = ["master_candidate", "drained", "offline"]
4545

    
4546
  def CheckArguments(self):
4547
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4548
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4549
                self.op.master_capable, self.op.vm_capable,
4550
                self.op.secondary_ip, self.op.ndparams]
4551
    if all_mods.count(None) == len(all_mods):
4552
      raise errors.OpPrereqError("Please pass at least one modification",
4553
                                 errors.ECODE_INVAL)
4554
    if all_mods.count(True) > 1:
4555
      raise errors.OpPrereqError("Can't set the node into more than one"
4556
                                 " state at the same time",
4557
                                 errors.ECODE_INVAL)
4558

    
4559
    # Boolean value that tells us whether we might be demoting from MC
4560
    self.might_demote = (self.op.master_candidate == False or
4561
                         self.op.offline == True or
4562
                         self.op.drained == True or
4563
                         self.op.master_capable == False)
4564

    
4565
    if self.op.secondary_ip:
4566
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4567
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4568
                                   " address" % self.op.secondary_ip,
4569
                                   errors.ECODE_INVAL)
4570

    
4571
    self.lock_all = self.op.auto_promote and self.might_demote
4572
    self.lock_instances = self.op.secondary_ip is not None
4573

    
4574
  def ExpandNames(self):
4575
    if self.lock_all:
4576
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4577
    else:
4578
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4579

    
4580
    if self.lock_instances:
4581
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4582

    
4583
  def DeclareLocks(self, level):
4584
    # If we have locked all instances, before waiting to lock nodes, release
4585
    # all the ones living on nodes unrelated to the current operation.
4586
    if level == locking.LEVEL_NODE and self.lock_instances:
4587
      instances_release = []
4588
      instances_keep = []
4589
      self.affected_instances = []
4590
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4591
        for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4592
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4593
          i_mirrored = instance.disk_template in constants.DTS_INT_MIRROR
4594
          if i_mirrored and self.op.node_name in instance.all_nodes:
4595
            instances_keep.append(instance_name)
4596
            self.affected_instances.append(instance)
4597
          else:
4598
            instances_release.append(instance_name)
4599
        if instances_release:
4600
          self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4601
          self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4602

    
4603
  def BuildHooksEnv(self):
4604
    """Build hooks env.
4605

4606
    This runs on the master node.
4607

4608
    """
4609
    return {
4610
      "OP_TARGET": self.op.node_name,
4611
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4612
      "OFFLINE": str(self.op.offline),
4613
      "DRAINED": str(self.op.drained),
4614
      "MASTER_CAPABLE": str(self.op.master_capable),
4615
      "VM_CAPABLE": str(self.op.vm_capable),
4616
      }
4617

    
4618
  def BuildHooksNodes(self):
4619
    """Build hooks nodes.
4620

4621
    """
4622
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
4623
    return (nl, nl)
4624

    
4625
  def CheckPrereq(self):
4626
    """Check prerequisites.
4627

4628
    This only checks the instance list against the existing names.
4629

4630
    """
4631
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4632

    
4633
    if (self.op.master_candidate is not None or
4634
        self.op.drained is not None or
4635
        self.op.offline is not None):
4636
      # we can't change the master's node flags
4637
      if self.op.node_name == self.cfg.GetMasterNode():
4638
        raise errors.OpPrereqError("The master role can be changed"
4639
                                   " only via master-failover",
4640
                                   errors.ECODE_INVAL)
4641

    
4642
    if self.op.master_candidate and not node.master_capable:
4643
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4644
                                 " it a master candidate" % node.name,
4645
                                 errors.ECODE_STATE)
4646

    
4647
    if self.op.vm_capable == False:
4648
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4649
      if ipri or isec:
4650
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4651
                                   " the vm_capable flag" % node.name,
4652
                                   errors.ECODE_STATE)
4653

    
4654
    if node.master_candidate and self.might_demote and not self.lock_all:
4655
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
4656
      # check if after removing the current node, we're missing master
4657
      # candidates
4658
      (mc_remaining, mc_should, _) = \
4659
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4660
      if mc_remaining < mc_should:
4661
        raise errors.OpPrereqError("Not enough master candidates, please"
4662
                                   " pass auto promote option to allow"
4663
                                   " promotion", errors.ECODE_STATE)
4664

    
4665
    self.old_flags = old_flags = (node.master_candidate,
4666
                                  node.drained, node.offline)
4667
    assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4668
    self.old_role = old_role = self._F2R[old_flags]
4669

    
4670
    # Check for ineffective changes
4671
    for attr in self._FLAGS:
4672
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4673
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4674
        setattr(self.op, attr, None)
4675

    
4676
    # Past this point, any flag change to False means a transition
4677
    # away from the respective state, as only real changes are kept
4678

    
4679
    # TODO: We might query the real power state if it supports OOB
4680
    if _SupportsOob(self.cfg, node):
4681
      if self.op.offline is False and not (node.powered or
4682
                                           self.op.powered == True):
4683
        raise errors.OpPrereqError(("Please power on node %s first before you"
4684
                                    " can reset offline state") %
4685
                                   self.op.node_name)
4686
    elif self.op.powered is not None:
4687
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
4688
                                  " which does not support out-of-band"
4689
                                  " handling") % self.op.node_name)
4690

    
4691
    # If we're being deofflined/drained, we'll MC ourself if needed
4692
    if (self.op.drained == False or self.op.offline == False or
4693
        (self.op.master_capable and not node.master_capable)):
4694
      if _DecideSelfPromotion(self):
4695
        self.op.master_candidate = True
4696
        self.LogInfo("Auto-promoting node to master candidate")
4697

    
4698
    # If we're no longer master capable, we'll demote ourselves from MC
4699
    if self.op.master_capable == False and node.master_candidate:
4700
      self.LogInfo("Demoting from master candidate")
4701
      self.op.master_candidate = False
4702

    
4703
    # Compute new role
4704
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4705
    if self.op.master_candidate:
4706
      new_role = self._ROLE_CANDIDATE
4707
    elif self.op.drained:
4708
      new_role = self._ROLE_DRAINED
4709
    elif self.op.offline:
4710
      new_role = self._ROLE_OFFLINE
4711
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4712
      # False is still in new flags, which means we're un-setting (the
4713
      # only) True flag
4714
      new_role = self._ROLE_REGULAR
4715
    else: # no new flags, nothing, keep old role
4716
      new_role = old_role
4717

    
4718
    self.new_role = new_role
4719

    
4720
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4721
      # Trying to transition out of offline status
4722
      result = self.rpc.call_version([node.name])[node.name]
4723
      if result.fail_msg:
4724
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4725
                                   " to report its version: %s" %
4726
                                   (node.name, result.fail_msg),
4727
                                   errors.ECODE_STATE)
4728
      else:
4729
        self.LogWarning("Transitioning node from offline to online state"
4730
                        " without using re-add. Please make sure the node"
4731
                        " is healthy!")
4732

    
4733
    if self.op.secondary_ip:
4734
      # Ok even without locking, because this can't be changed by any LU
4735
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4736
      master_singlehomed = master.secondary_ip == master.primary_ip
4737
      if master_singlehomed and self.op.secondary_ip:
4738
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4739
                                   " homed cluster", errors.ECODE_INVAL)
4740

    
4741
      if node.offline:
4742
        if self.affected_instances:
4743
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4744
                                     " node has instances (%s) configured"
4745
                                     " to use it" % self.affected_instances)
4746
      else:
4747
        # On online nodes, check that no instances are running, and that
4748
        # the node has the new ip and we can reach it.
4749
        for instance in self.affected_instances:
4750
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4751

    
4752
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4753
        if master.name != node.name:
4754
          # check reachability from master secondary ip to new secondary ip
4755
          if not netutils.TcpPing(self.op.secondary_ip,
4756
                                  constants.DEFAULT_NODED_PORT,
4757
                                  source=master.secondary_ip):
4758
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4759
                                       " based ping to node daemon port",
4760
                                       errors.ECODE_ENVIRON)
4761

    
4762
    if self.op.ndparams:
4763
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4764
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4765
      self.new_ndparams = new_ndparams
4766

    
4767
  def Exec(self, feedback_fn):
4768
    """Modifies a node.
4769

4770
    """
4771
    node = self.node
4772
    old_role = self.old_role
4773
    new_role = self.new_role
4774

    
4775
    result = []
4776

    
4777
    if self.op.ndparams:
4778
      node.ndparams = self.new_ndparams
4779

    
4780
    if self.op.powered is not None:
4781
      node.powered = self.op.powered
4782

    
4783
    for attr in ["master_capable", "vm_capable"]:
4784
      val = getattr(self.op, attr)
4785
      if val is not None:
4786
        setattr(node, attr, val)
4787
        result.append((attr, str(val)))
4788

    
4789
    if new_role != old_role:
4790
      # Tell the node to demote itself, if no longer MC and not offline
4791
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4792
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4793
        if msg:
4794
          self.LogWarning("Node failed to demote itself: %s", msg)
4795

    
4796
      new_flags = self._R2F[new_role]
4797
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4798
        if of != nf:
4799
          result.append((desc, str(nf)))
4800
      (node.master_candidate, node.drained, node.offline) = new_flags
4801

    
4802
      # we locked all nodes, we adjust the CP before updating this node
4803
      if self.lock_all:
4804
        _AdjustCandidatePool(self, [node.name])
4805

    
4806
    if self.op.secondary_ip:
4807
      node.secondary_ip = self.op.secondary_ip
4808
      result.append(("secondary_ip", self.op.secondary_ip))
4809

    
4810
    # this will trigger configuration file update, if needed
4811
    self.cfg.Update(node, feedback_fn)
4812

    
4813
    # this will trigger job queue propagation or cleanup if the mc
4814
    # flag changed
4815
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4816
      self.context.ReaddNode(node)
4817

    
4818
    return result
4819

    
4820

    
4821
class LUNodePowercycle(NoHooksLU):
4822
  """Powercycles a node.
4823

4824
  """
4825
  REQ_BGL = False
4826

    
4827
  def CheckArguments(self):
4828
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4829
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4830
      raise errors.OpPrereqError("The node is the master and the force"
4831
                                 " parameter was not set",
4832
                                 errors.ECODE_INVAL)
4833

    
4834
  def ExpandNames(self):
4835
    """Locking for PowercycleNode.
4836

4837
    This is a last-resort option and shouldn't block on other
4838
    jobs. Therefore, we grab no locks.
4839

4840
    """
4841
    self.needed_locks = {}
4842

    
4843
  def Exec(self, feedback_fn):
4844
    """Reboots a node.
4845

4846
    """
4847
    result = self.rpc.call_node_powercycle(self.op.node_name,
4848
                                           self.cfg.GetHypervisorType())
4849
    result.Raise("Failed to schedule the reboot")
4850
    return result.payload
4851

    
4852

    
4853
class LUClusterQuery(NoHooksLU):
4854
  """Query cluster configuration.
4855

4856
  """
4857
  REQ_BGL = False
4858

    
4859
  def ExpandNames(self):
4860
    self.needed_locks = {}
4861

    
4862
  def Exec(self, feedback_fn):
4863
    """Return cluster config.
4864

4865
    """
4866
    cluster = self.cfg.GetClusterInfo()
4867
    os_hvp = {}
4868

    
4869
    # Filter just for enabled hypervisors
4870
    for os_name, hv_dict in cluster.os_hvp.items():
4871
      os_hvp[os_name] = {}
4872
      for hv_name, hv_params in hv_dict.items():
4873
        if hv_name in cluster.enabled_hypervisors:
4874
          os_hvp[os_name][hv_name] = hv_params
4875

    
4876
    # Convert ip_family to ip_version
4877
    primary_ip_version = constants.IP4_VERSION
4878
    if cluster.primary_ip_family == netutils.IP6Address.family:
4879
      primary_ip_version = constants.IP6_VERSION
4880

    
4881
    result = {
4882
      "software_version": constants.RELEASE_VERSION,
4883
      "protocol_version": constants.PROTOCOL_VERSION,
4884
      "config_version": constants.CONFIG_VERSION,
4885
      "os_api_version": max(constants.OS_API_VERSIONS),
4886
      "export_version": constants.EXPORT_VERSION,
4887
      "architecture": (platform.architecture()[0], platform.machine()),
4888
      "name": cluster.cluster_name,
4889
      "master": cluster.master_node,
4890
      "default_hypervisor": cluster.enabled_hypervisors[0],
4891
      "enabled_hypervisors": cluster.enabled_hypervisors,
4892
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4893
                        for hypervisor_name in cluster.enabled_hypervisors]),
4894
      "os_hvp": os_hvp,
4895
      "beparams": cluster.beparams,
4896
      "osparams": cluster.osparams,
4897
      "nicparams": cluster.nicparams,
4898
      "ndparams": cluster.ndparams,
4899
      "candidate_pool_size": cluster.candidate_pool_size,
4900
      "master_netdev": cluster.master_netdev,
4901
      "volume_group_name": cluster.volume_group_name,
4902
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4903
      "file_storage_dir": cluster.file_storage_dir,
4904
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
4905
      "maintain_node_health": cluster.maintain_node_health,
4906
      "ctime": cluster.ctime,
4907
      "mtime": cluster.mtime,
4908
      "uuid": cluster.uuid,
4909
      "tags": list(cluster.GetTags()),
4910
      "uid_pool": cluster.uid_pool,
4911
      "default_iallocator": cluster.default_iallocator,
4912
      "reserved_lvs": cluster.reserved_lvs,
4913
      "primary_ip_version": primary_ip_version,
4914
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4915
      "hidden_os": cluster.hidden_os,
4916
      "blacklisted_os": cluster.blacklisted_os,
4917
      }
4918

    
4919
    return result
4920

    
4921

    
4922
class LUClusterConfigQuery(NoHooksLU):
4923
  """Return configuration values.
4924

4925
  """
4926
  REQ_BGL = False
4927
  _FIELDS_DYNAMIC = utils.FieldSet()
4928
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4929
                                  "watcher_pause", "volume_group_name")
4930

    
4931
  def CheckArguments(self):
4932
    _CheckOutputFields(static=self._FIELDS_STATIC,
4933
                       dynamic=self._FIELDS_DYNAMIC,
4934
                       selected=self.op.output_fields)
4935

    
4936
  def ExpandNames(self):
4937
    self.needed_locks = {}
4938

    
4939
  def Exec(self, feedback_fn):
4940
    """Dump a representation of the cluster config to the standard output.
4941

4942
    """
4943
    values = []
4944
    for field in self.op.output_fields:
4945
      if field == "cluster_name":
4946
        entry = self.cfg.GetClusterName()
4947
      elif field == "master_node":
4948
        entry = self.cfg.GetMasterNode()
4949
      elif field == "drain_flag":
4950
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4951
      elif field == "watcher_pause":
4952
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4953
      elif field == "volume_group_name":
4954
        entry = self.cfg.GetVGName()
4955
      else:
4956
        raise errors.ParameterError(field)
4957
      values.append(entry)
4958
    return values
4959

    
4960

    
4961
class LUInstanceActivateDisks(NoHooksLU):
4962
  """Bring up an instance's disks.
4963

4964
  """
4965
  REQ_BGL = False
4966

    
4967
  def ExpandNames(self):
4968
    self._ExpandAndLockInstance()
4969
    self.needed_locks[locking.LEVEL_NODE] = []
4970
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4971

    
4972
  def DeclareLocks(self, level):
4973
    if level == locking.LEVEL_NODE:
4974
      self._LockInstancesNodes()
4975

    
4976
  def CheckPrereq(self):
4977
    """Check prerequisites.
4978

4979
    This checks that the instance is in the cluster.
4980

4981
    """
4982
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4983
    assert self.instance is not None, \
4984
      "Cannot retrieve locked instance %s" % self.op.instance_name
4985
    _CheckNodeOnline(self, self.instance.primary_node)
4986

    
4987
  def Exec(self, feedback_fn):
4988
    """Activate the disks.
4989

4990
    """
4991
    disks_ok, disks_info = \
4992
              _AssembleInstanceDisks(self, self.instance,
4993
                                     ignore_size=self.op.ignore_size)
4994
    if not disks_ok:
4995
      raise errors.OpExecError("Cannot activate block devices")
4996

    
4997
    return disks_info
4998

    
4999

    
5000
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5001
                           ignore_size=False):
5002
  """Prepare the block devices for an instance.
5003

5004
  This sets up the block devices on all nodes.
5005

5006
  @type lu: L{LogicalUnit}
5007
  @param lu: the logical unit on whose behalf we execute
5008
  @type instance: L{objects.Instance}
5009
  @param instance: the instance for whose disks we assemble
5010
  @type disks: list of L{objects.Disk} or None
5011
  @param disks: which disks to assemble (or all, if None)
5012
  @type ignore_secondaries: boolean
5013
  @param ignore_secondaries: if true, errors on secondary nodes
5014
      won't result in an error return from the function
5015
  @type ignore_size: boolean
5016
  @param ignore_size: if true, the current known size of the disk
5017
      will not be used during the disk activation, useful for cases
5018
      when the size is wrong
5019
  @return: False if the operation failed, otherwise a list of
5020
      (host, instance_visible_name, node_visible_name)
5021
      with the mapping from node devices to instance devices
5022

5023
  """
5024
  device_info = []
5025
  disks_ok = True
5026
  iname = instance.name
5027
  disks = _ExpandCheckDisks(instance, disks)
5028

    
5029
  # With the two passes mechanism we try to reduce the window of
5030
  # opportunity for the race condition of switching DRBD to primary
5031
  # before handshaking occured, but we do not eliminate it
5032

    
5033
  # The proper fix would be to wait (with some limits) until the
5034
  # connection has been made and drbd transitions from WFConnection
5035
  # into any other network-connected state (Connected, SyncTarget,
5036
  # SyncSource, etc.)
5037

    
5038
  # 1st pass, assemble on all nodes in secondary mode
5039
  for idx, inst_disk in enumerate(disks):
5040
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5041
      if ignore_size:
5042
        node_disk = node_disk.Copy()
5043
        node_disk.UnsetSize()
5044
      lu.cfg.SetDiskID(node_disk, node)
5045
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5046
      msg = result.fail_msg
5047
      if msg:
5048
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5049
                           " (is_primary=False, pass=1): %s",
5050
                           inst_disk.iv_name, node, msg)
5051
        if not ignore_secondaries:
5052
          disks_ok = False
5053

    
5054
  # FIXME: race condition on drbd migration to primary
5055

    
5056
  # 2nd pass, do only the primary node
5057
  for idx, inst_disk in enumerate(disks):
5058
    dev_path = None
5059

    
5060
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5061
      if node != instance.primary_node:
5062
        continue
5063
      if ignore_size:
5064
        node_disk = node_disk.Copy()
5065
        node_disk.UnsetSize()
5066
      lu.cfg.SetDiskID(node_disk, node)
5067
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5068
      msg = result.fail_msg
5069
      if msg:
5070
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5071
                           " (is_primary=True, pass=2): %s",
5072
                           inst_disk.iv_name, node, msg)
5073
        disks_ok = False
5074
      else:
5075
        dev_path = result.payload
5076

    
5077
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5078

    
5079
  # leave the disks configured for the primary node
5080
  # this is a workaround that would be fixed better by
5081
  # improving the logical/physical id handling
5082
  for disk in disks:
5083
    lu.cfg.SetDiskID(disk, instance.primary_node)
5084

    
5085
  return disks_ok, device_info
5086

    
5087

    
5088
def _StartInstanceDisks(lu, instance, force):
5089
  """Start the disks of an instance.
5090

5091
  """
5092
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5093
                                           ignore_secondaries=force)
5094
  if not disks_ok:
5095
    _ShutdownInstanceDisks(lu, instance)
5096
    if force is not None and not force:
5097
      lu.proc.LogWarning("", hint="If the message above refers to a"
5098
                         " secondary node,"
5099
                         " you can retry the operation using '--force'.")
5100
    raise errors.OpExecError("Disk consistency error")
5101

    
5102

    
5103
class LUInstanceDeactivateDisks(NoHooksLU):
5104
  """Shutdown an instance's disks.
5105

5106
  """
5107
  REQ_BGL = False
5108

    
5109
  def ExpandNames(self):
5110
    self._ExpandAndLockInstance()
5111
    self.needed_locks[locking.LEVEL_NODE] = []
5112
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5113

    
5114
  def DeclareLocks(self, level):
5115
    if level == locking.LEVEL_NODE:
5116
      self._LockInstancesNodes()
5117

    
5118
  def CheckPrereq(self):
5119
    """Check prerequisites.
5120

5121
    This checks that the instance is in the cluster.
5122

5123
    """
5124
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5125
    assert self.instance is not None, \
5126
      "Cannot retrieve locked instance %s" % self.op.instance_name
5127

    
5128
  def Exec(self, feedback_fn):
5129
    """Deactivate the disks
5130

5131
    """
5132
    instance = self.instance
5133
    if self.op.force:
5134
      _ShutdownInstanceDisks(self, instance)
5135
    else:
5136
      _SafeShutdownInstanceDisks(self, instance)
5137

    
5138

    
5139
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5140
  """Shutdown block devices of an instance.
5141

5142
  This function checks if an instance is running, before calling
5143
  _ShutdownInstanceDisks.
5144

5145
  """
5146
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5147
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5148

    
5149

    
5150
def _ExpandCheckDisks(instance, disks):
5151
  """Return the instance disks selected by the disks list
5152

5153
  @type disks: list of L{objects.Disk} or None
5154
  @param disks: selected disks
5155
  @rtype: list of L{objects.Disk}
5156
  @return: selected instance disks to act on
5157

5158
  """
5159
  if disks is None:
5160
    return instance.disks
5161
  else:
5162
    if not set(disks).issubset(instance.disks):
5163
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5164
                                   " target instance")
5165
    return disks
5166

    
5167

    
5168
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5169
  """Shutdown block devices of an instance.
5170

5171
  This does the shutdown on all nodes of the instance.
5172

5173
  If the ignore_primary is false, errors on the primary node are
5174
  ignored.
5175

5176
  """
5177
  all_result = True
5178
  disks = _ExpandCheckDisks(instance, disks)
5179

    
5180
  for disk in disks:
5181
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5182
      lu.cfg.SetDiskID(top_disk, node)
5183
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5184
      msg = result.fail_msg
5185
      if msg:
5186
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5187
                      disk.iv_name, node, msg)
5188
        if ((node == instance.primary_node and not ignore_primary) or
5189
            (node != instance.primary_node and not result.offline)):
5190
          all_result = False
5191
  return all_result
5192

    
5193

    
5194
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5195
  """Checks if a node has enough free memory.
5196

5197
  This function check if a given node has the needed amount of free
5198
  memory. In case the node has less memory or we cannot get the
5199
  information from the node, this function raise an OpPrereqError
5200
  exception.
5201

5202
  @type lu: C{LogicalUnit}
5203
  @param lu: a logical unit from which we get configuration data
5204
  @type node: C{str}
5205
  @param node: the node to check
5206
  @type reason: C{str}
5207
  @param reason: string to use in the error message
5208
  @type requested: C{int}
5209
  @param requested: the amount of memory in MiB to check for
5210
  @type hypervisor_name: C{str}
5211
  @param hypervisor_name: the hypervisor to ask for memory stats
5212
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5213
      we cannot check the node
5214

5215
  """
5216
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5217
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5218
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5219
  free_mem = nodeinfo[node].payload.get('memory_free', None)
5220
  if not isinstance(free_mem, int):
5221
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5222
                               " was '%s'" % (node, free_mem),
5223
                               errors.ECODE_ENVIRON)
5224
  if requested > free_mem:
5225
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5226
                               " needed %s MiB, available %s MiB" %
5227
                               (node, reason, requested, free_mem),
5228
                               errors.ECODE_NORES)
5229

    
5230

    
5231
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5232
  """Checks if nodes have enough free disk space in the all VGs.
5233

5234
  This function check if all given nodes have the needed amount of
5235
  free disk. In case any node has less disk or we cannot get the
5236
  information from the node, this function raise an OpPrereqError
5237
  exception.
5238

5239
  @type lu: C{LogicalUnit}
5240
  @param lu: a logical unit from which we get configuration data
5241
  @type nodenames: C{list}
5242
  @param nodenames: the list of node names to check
5243
  @type req_sizes: C{dict}
5244
  @param req_sizes: the hash of vg and corresponding amount of disk in
5245
      MiB to check for
5246
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5247
      or we cannot check the node
5248

5249
  """
5250
  for vg, req_size in req_sizes.items():
5251
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5252

    
5253

    
5254
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5255
  """Checks if nodes have enough free disk space in the specified VG.
5256

5257
  This function check if all given nodes have the needed amount of
5258
  free disk. In case any node has less disk or we cannot get the
5259
  information from the node, this function raise an OpPrereqError
5260
  exception.
5261

5262
  @type lu: C{LogicalUnit}
5263
  @param lu: a logical unit from which we get configuration data
5264
  @type nodenames: C{list}
5265
  @param nodenames: the list of node names to check
5266
  @type vg: C{str}
5267
  @param vg: the volume group to check
5268
  @type requested: C{int}
5269
  @param requested: the amount of disk in MiB to check for
5270
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5271
      or we cannot check the node
5272

5273
  """
5274
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5275
  for node in nodenames:
5276
    info = nodeinfo[node]
5277
    info.Raise("Cannot get current information from node %s" % node,
5278
               prereq=True, ecode=errors.ECODE_ENVIRON)
5279
    vg_free = info.payload.get("vg_free", None)
5280
    if not isinstance(vg_free, int):
5281
      raise errors.OpPrereqError("Can't compute free disk space on node"
5282
                                 " %s for vg %s, result was '%s'" %
5283
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5284
    if requested > vg_free:
5285
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5286
                                 " vg %s: required %d MiB, available %d MiB" %
5287
                                 (node, vg, requested, vg_free),
5288
                                 errors.ECODE_NORES)
5289

    
5290

    
5291
class LUInstanceStartup(LogicalUnit):
5292
  """Starts an instance.
5293

5294
  """
5295
  HPATH = "instance-start"
5296
  HTYPE = constants.HTYPE_INSTANCE
5297
  REQ_BGL = False
5298

    
5299
  def CheckArguments(self):
5300
    # extra beparams
5301
    if self.op.beparams:
5302
      # fill the beparams dict
5303
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5304

    
5305
  def ExpandNames(self):
5306
    self._ExpandAndLockInstance()
5307

    
5308
  def BuildHooksEnv(self):
5309
    """Build hooks env.
5310

5311
    This runs on master, primary and secondary nodes of the instance.
5312

5313
    """
5314
    env = {
5315
      "FORCE": self.op.force,
5316
      }
5317

    
5318
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5319

    
5320
    return env
5321

    
5322
  def BuildHooksNodes(self):
5323
    """Build hooks nodes.
5324

5325
    """
5326
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5327
    return (nl, nl)
5328

    
5329
  def CheckPrereq(self):
5330
    """Check prerequisites.
5331

5332
    This checks that the instance is in the cluster.
5333

5334
    """
5335
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5336
    assert self.instance is not None, \
5337
      "Cannot retrieve locked instance %s" % self.op.instance_name
5338

    
5339
    # extra hvparams
5340
    if self.op.hvparams:
5341
      # check hypervisor parameter syntax (locally)
5342
      cluster = self.cfg.GetClusterInfo()
5343
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5344
      filled_hvp = cluster.FillHV(instance)
5345
      filled_hvp.update(self.op.hvparams)
5346
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5347
      hv_type.CheckParameterSyntax(filled_hvp)
5348
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5349

    
5350
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5351

    
5352
    if self.primary_offline and self.op.ignore_offline_nodes:
5353
      self.proc.LogWarning("Ignoring offline primary node")
5354

    
5355
      if self.op.hvparams or self.op.beparams:
5356
        self.proc.LogWarning("Overridden parameters are ignored")
5357
    else:
5358
      _CheckNodeOnline(self, instance.primary_node)
5359

    
5360
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5361

    
5362
      # check bridges existence
5363
      _CheckInstanceBridgesExist(self, instance)
5364

    
5365
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5366
                                                instance.name,
5367
                                                instance.hypervisor)
5368
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5369
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5370
      if not remote_info.payload: # not running already
5371
        _CheckNodeFreeMemory(self, instance.primary_node,
5372
                             "starting instance %s" % instance.name,
5373
                             bep[constants.BE_MEMORY], instance.hypervisor)
5374

    
5375
  def Exec(self, feedback_fn):
5376
    """Start the instance.
5377

5378
    """
5379
    instance = self.instance
5380
    force = self.op.force
5381

    
5382
    self.cfg.MarkInstanceUp(instance.name)
5383

    
5384
    if self.primary_offline:
5385
      assert self.op.ignore_offline_nodes
5386
      self.proc.LogInfo("Primary node offline, marked instance as started")
5387
    else:
5388
      node_current = instance.primary_node
5389

    
5390
      _StartInstanceDisks(self, instance, force)
5391

    
5392
      result = self.rpc.call_instance_start(node_current, instance,
5393
                                            self.op.hvparams, self.op.beparams)
5394
      msg = result.fail_msg
5395
      if msg:
5396
        _ShutdownInstanceDisks(self, instance)
5397
        raise errors.OpExecError("Could not start instance: %s" % msg)
5398

    
5399

    
5400
class LUInstanceReboot(LogicalUnit):
5401
  """Reboot an instance.
5402

5403
  """
5404
  HPATH = "instance-reboot"
5405
  HTYPE = constants.HTYPE_INSTANCE
5406
  REQ_BGL = False
5407

    
5408
  def ExpandNames(self):
5409
    self._ExpandAndLockInstance()
5410

    
5411
  def BuildHooksEnv(self):
5412
    """Build hooks env.
5413

5414
    This runs on master, primary and secondary nodes of the instance.
5415

5416
    """
5417
    env = {
5418
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5419
      "REBOOT_TYPE": self.op.reboot_type,
5420
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5421
      }
5422

    
5423
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5424

    
5425
    return env
5426

    
5427
  def BuildHooksNodes(self):
5428
    """Build hooks nodes.
5429

5430
    """
5431
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5432
    return (nl, nl)
5433

    
5434
  def CheckPrereq(self):
5435
    """Check prerequisites.
5436

5437
    This checks that the instance is in the cluster.
5438

5439
    """
5440
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5441
    assert self.instance is not None, \
5442
      "Cannot retrieve locked instance %s" % self.op.instance_name
5443

    
5444
    _CheckNodeOnline(self, instance.primary_node)
5445

    
5446
    # check bridges existence
5447
    _CheckInstanceBridgesExist(self, instance)
5448

    
5449
  def Exec(self, feedback_fn):
5450
    """Reboot the instance.
5451

5452
    """
5453
    instance = self.instance
5454
    ignore_secondaries = self.op.ignore_secondaries
5455
    reboot_type = self.op.reboot_type
5456

    
5457
    remote_info = self.rpc.call_instance_info(instance.primary_node,
5458
                                              instance.name,
5459
                                              instance.hypervisor)
5460
    remote_info.Raise("Error checking node %s" % instance.primary_node)
5461
    instance_running = bool(remote_info.payload)
5462

    
5463
    node_current = instance.primary_node
5464

    
5465
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5466
                                            constants.INSTANCE_REBOOT_HARD]:
5467
      for disk in instance.disks:
5468
        self.cfg.SetDiskID(disk, node_current)
5469
      result = self.rpc.call_instance_reboot(node_current, instance,
5470
                                             reboot_type,
5471
                                             self.op.shutdown_timeout)
5472
      result.Raise("Could not reboot instance")
5473
    else:
5474
      if instance_running:
5475
        result = self.rpc.call_instance_shutdown(node_current, instance,
5476
                                                 self.op.shutdown_timeout)
5477
        result.Raise("Could not shutdown instance for full reboot")
5478
        _ShutdownInstanceDisks(self, instance)
5479
      else:
5480
        self.LogInfo("Instance %s was already stopped, starting now",
5481
                     instance.name)
5482
      _StartInstanceDisks(self, instance, ignore_secondaries)
5483
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5484
      msg = result.fail_msg
5485
      if msg:
5486
        _ShutdownInstanceDisks(self, instance)
5487
        raise errors.OpExecError("Could not start instance for"
5488
                                 " full reboot: %s" % msg)
5489

    
5490
    self.cfg.MarkInstanceUp(instance.name)
5491

    
5492

    
5493
class LUInstanceShutdown(LogicalUnit):
5494
  """Shutdown an instance.
5495

5496
  """
5497
  HPATH = "instance-stop"
5498
  HTYPE = constants.HTYPE_INSTANCE
5499
  REQ_BGL = False
5500

    
5501
  def ExpandNames(self):
5502
    self._ExpandAndLockInstance()
5503

    
5504
  def BuildHooksEnv(self):
5505
    """Build hooks env.
5506

5507
    This runs on master, primary and secondary nodes of the instance.
5508

5509
    """
5510
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5511
    env["TIMEOUT"] = self.op.timeout
5512
    return env
5513

    
5514
  def BuildHooksNodes(self):
5515
    """Build hooks nodes.
5516

5517
    """
5518
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5519
    return (nl, nl)
5520

    
5521
  def CheckPrereq(self):
5522
    """Check prerequisites.
5523

5524
    This checks that the instance is in the cluster.
5525

5526
    """
5527
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5528
    assert self.instance is not None, \
5529
      "Cannot retrieve locked instance %s" % self.op.instance_name
5530

    
5531
    self.primary_offline = \
5532
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5533

    
5534
    if self.primary_offline and self.op.ignore_offline_nodes:
5535
      self.proc.LogWarning("Ignoring offline primary node")
5536
    else:
5537
      _CheckNodeOnline(self, self.instance.primary_node)
5538

    
5539
  def Exec(self, feedback_fn):
5540
    """Shutdown the instance.
5541

5542
    """
5543
    instance = self.instance
5544
    node_current = instance.primary_node
5545
    timeout = self.op.timeout
5546

    
5547
    self.cfg.MarkInstanceDown(instance.name)
5548

    
5549
    if self.primary_offline:
5550
      assert self.op.ignore_offline_nodes
5551
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5552
    else:
5553
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5554
      msg = result.fail_msg
5555
      if msg:
5556
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5557

    
5558
      _ShutdownInstanceDisks(self, instance)
5559

    
5560

    
5561
class LUInstanceReinstall(LogicalUnit):
5562
  """Reinstall an instance.
5563

5564
  """
5565
  HPATH = "instance-reinstall"
5566
  HTYPE = constants.HTYPE_INSTANCE
5567
  REQ_BGL = False
5568

    
5569
  def ExpandNames(self):
5570
    self._ExpandAndLockInstance()
5571

    
5572
  def BuildHooksEnv(self):
5573
    """Build hooks env.
5574

5575
    This runs on master, primary and secondary nodes of the instance.
5576

5577
    """
5578
    return _BuildInstanceHookEnvByObject(self, self.instance)
5579

    
5580
  def BuildHooksNodes(self):
5581
    """Build hooks nodes.
5582

5583
    """
5584
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5585
    return (nl, nl)
5586

    
5587
  def CheckPrereq(self):
5588
    """Check prerequisites.
5589

5590
    This checks that the instance is in the cluster and is not running.
5591

5592
    """
5593
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5594
    assert instance is not None, \
5595
      "Cannot retrieve locked instance %s" % self.op.instance_name
5596
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5597
                     " offline, cannot reinstall")
5598
    for node in instance.secondary_nodes:
5599
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5600
                       " cannot reinstall")
5601

    
5602
    if instance.disk_template == constants.DT_DISKLESS:
5603
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5604
                                 self.op.instance_name,
5605
                                 errors.ECODE_INVAL)
5606
    _CheckInstanceDown(self, instance, "cannot reinstall")
5607

    
5608
    if self.op.os_type is not None:
5609
      # OS verification
5610
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5611
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5612
      instance_os = self.op.os_type
5613
    else:
5614
      instance_os = instance.os
5615

    
5616
    nodelist = list(instance.all_nodes)
5617

    
5618
    if self.op.osparams:
5619
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5620
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5621
      self.os_inst = i_osdict # the new dict (without defaults)
5622
    else:
5623
      self.os_inst = None
5624

    
5625
    self.instance = instance
5626

    
5627
  def Exec(self, feedback_fn):
5628
    """Reinstall the instance.
5629

5630
    """
5631
    inst = self.instance
5632

    
5633
    if self.op.os_type is not None:
5634
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5635
      inst.os = self.op.os_type
5636
      # Write to configuration
5637
      self.cfg.Update(inst, feedback_fn)
5638

    
5639
    _StartInstanceDisks(self, inst, None)
5640
    try:
5641
      feedback_fn("Running the instance OS create scripts...")
5642
      # FIXME: pass debug option from opcode to backend
5643
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5644
                                             self.op.debug_level,
5645
                                             osparams=self.os_inst)
5646
      result.Raise("Could not install OS for instance %s on node %s" %
5647
                   (inst.name, inst.primary_node))
5648
    finally:
5649
      _ShutdownInstanceDisks(self, inst)
5650

    
5651

    
5652
class LUInstanceRecreateDisks(LogicalUnit):
5653
  """Recreate an instance's missing disks.
5654

5655
  """
5656
  HPATH = "instance-recreate-disks"
5657
  HTYPE = constants.HTYPE_INSTANCE
5658
  REQ_BGL = False
5659

    
5660
  def ExpandNames(self):
5661
    self._ExpandAndLockInstance()
5662

    
5663
  def BuildHooksEnv(self):
5664
    """Build hooks env.
5665

5666
    This runs on master, primary and secondary nodes of the instance.
5667

5668
    """
5669
    return _BuildInstanceHookEnvByObject(self, self.instance)
5670

    
5671
  def BuildHooksNodes(self):
5672
    """Build hooks nodes.
5673

5674
    """
5675
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5676
    return (nl, nl)
5677

    
5678
  def CheckPrereq(self):
5679
    """Check prerequisites.
5680

5681
    This checks that the instance is in the cluster and is not running.
5682

5683
    """
5684
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5685
    assert instance is not None, \
5686
      "Cannot retrieve locked instance %s" % self.op.instance_name
5687
    _CheckNodeOnline(self, instance.primary_node)
5688

    
5689
    if instance.disk_template == constants.DT_DISKLESS:
5690
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5691
                                 self.op.instance_name, errors.ECODE_INVAL)
5692
    _CheckInstanceDown(self, instance, "cannot recreate disks")
5693

    
5694
    if not self.op.disks:
5695
      self.op.disks = range(len(instance.disks))
5696
    else:
5697
      for idx in self.op.disks:
5698
        if idx >= len(instance.disks):
5699
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5700
                                     errors.ECODE_INVAL)
5701

    
5702
    self.instance = instance
5703

    
5704
  def Exec(self, feedback_fn):
5705
    """Recreate the disks.
5706

5707
    """
5708
    to_skip = []
5709
    for idx, _ in enumerate(self.instance.disks):
5710
      if idx not in self.op.disks: # disk idx has not been passed in
5711
        to_skip.append(idx)
5712
        continue
5713

    
5714
    _CreateDisks(self, self.instance, to_skip=to_skip)
5715

    
5716

    
5717
class LUInstanceRename(LogicalUnit):
5718
  """Rename an instance.
5719

5720
  """
5721
  HPATH = "instance-rename"
5722
  HTYPE = constants.HTYPE_INSTANCE
5723

    
5724
  def CheckArguments(self):
5725
    """Check arguments.
5726

5727
    """
5728
    if self.op.ip_check and not self.op.name_check:
5729
      # TODO: make the ip check more flexible and not depend on the name check
5730
      raise errors.OpPrereqError("Cannot do ip check without a name check",
5731
                                 errors.ECODE_INVAL)
5732

    
5733
  def BuildHooksEnv(self):
5734
    """Build hooks env.
5735

5736
    This runs on master, primary and secondary nodes of the instance.
5737

5738
    """
5739
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5740
    env["INSTANCE_NEW_NAME"] = self.op.new_name
5741
    return env
5742

    
5743
  def BuildHooksNodes(self):
5744
    """Build hooks nodes.
5745

5746
    """
5747
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5748
    return (nl, nl)
5749

    
5750
  def CheckPrereq(self):
5751
    """Check prerequisites.
5752

5753
    This checks that the instance is in the cluster and is not running.
5754

5755
    """
5756
    self.op.instance_name = _ExpandInstanceName(self.cfg,
5757
                                                self.op.instance_name)
5758
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5759
    assert instance is not None
5760
    _CheckNodeOnline(self, instance.primary_node)
5761
    _CheckInstanceDown(self, instance, "cannot rename")
5762
    self.instance = instance
5763

    
5764
    new_name = self.op.new_name
5765
    if self.op.name_check:
5766
      hostname = netutils.GetHostname(name=new_name)
5767
      self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5768
                   hostname.name)
5769
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5770
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5771
                                    " same as given hostname '%s'") %
5772
                                    (hostname.name, self.op.new_name),
5773
                                    errors.ECODE_INVAL)
5774
      new_name = self.op.new_name = hostname.name
5775
      if (self.op.ip_check and
5776
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5777
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5778
                                   (hostname.ip, new_name),
5779
                                   errors.ECODE_NOTUNIQUE)
5780

    
5781
    instance_list = self.cfg.GetInstanceList()
5782
    if new_name in instance_list and new_name != instance.name:
5783
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5784
                                 new_name, errors.ECODE_EXISTS)
5785

    
5786
  def Exec(self, feedback_fn):
5787
    """Rename the instance.
5788

5789
    """
5790
    inst = self.instance
5791
    old_name = inst.name
5792

    
5793
    rename_file_storage = False
5794
    if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5795
        self.op.new_name != inst.name):
5796
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5797
      rename_file_storage = True
5798

    
5799
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5800
    # Change the instance lock. This is definitely safe while we hold the BGL
5801
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5802
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5803

    
5804
    # re-read the instance from the configuration after rename
5805
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5806

    
5807
    if rename_file_storage:
5808
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5809
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5810
                                                     old_file_storage_dir,
5811
                                                     new_file_storage_dir)
5812
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5813
                   " (but the instance has been renamed in Ganeti)" %
5814
                   (inst.primary_node, old_file_storage_dir,
5815
                    new_file_storage_dir))
5816

    
5817
    _StartInstanceDisks(self, inst, None)
5818
    try:
5819
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5820
                                                 old_name, self.op.debug_level)
5821
      msg = result.fail_msg
5822
      if msg:
5823
        msg = ("Could not run OS rename script for instance %s on node %s"
5824
               " (but the instance has been renamed in Ganeti): %s" %
5825
               (inst.name, inst.primary_node, msg))
5826
        self.proc.LogWarning(msg)
5827
    finally:
5828
      _ShutdownInstanceDisks(self, inst)
5829

    
5830
    return inst.name
5831

    
5832

    
5833
class LUInstanceRemove(LogicalUnit):
5834
  """Remove an instance.
5835

5836
  """
5837
  HPATH = "instance-remove"
5838
  HTYPE = constants.HTYPE_INSTANCE
5839
  REQ_BGL = False
5840

    
5841
  def ExpandNames(self):
5842
    self._ExpandAndLockInstance()
5843
    self.needed_locks[locking.LEVEL_NODE] = []
5844
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5845

    
5846
  def DeclareLocks(self, level):
5847
    if level == locking.LEVEL_NODE:
5848
      self._LockInstancesNodes()
5849

    
5850
  def BuildHooksEnv(self):
5851
    """Build hooks env.
5852

5853
    This runs on master, primary and secondary nodes of the instance.
5854

5855
    """
5856
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5857
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5858
    return env
5859

    
5860
  def BuildHooksNodes(self):
5861
    """Build hooks nodes.
5862

5863
    """
5864
    nl = [self.cfg.GetMasterNode()]
5865
    nl_post = list(self.instance.all_nodes) + nl
5866
    return (nl, nl_post)
5867

    
5868
  def CheckPrereq(self):
5869
    """Check prerequisites.
5870

5871
    This checks that the instance is in the cluster.
5872

5873
    """
5874
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5875
    assert self.instance is not None, \
5876
      "Cannot retrieve locked instance %s" % self.op.instance_name
5877

    
5878
  def Exec(self, feedback_fn):
5879
    """Remove the instance.
5880

5881
    """
5882
    instance = self.instance
5883
    logging.info("Shutting down instance %s on node %s",
5884
                 instance.name, instance.primary_node)
5885

    
5886
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5887
                                             self.op.shutdown_timeout)
5888
    msg = result.fail_msg
5889
    if msg:
5890
      if self.op.ignore_failures:
5891
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5892
      else:
5893
        raise errors.OpExecError("Could not shutdown instance %s on"
5894
                                 " node %s: %s" %
5895
                                 (instance.name, instance.primary_node, msg))
5896

    
5897
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5898

    
5899

    
5900
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5901
  """Utility function to remove an instance.
5902

5903
  """
5904
  logging.info("Removing block devices for instance %s", instance.name)
5905

    
5906
  if not _RemoveDisks(lu, instance):
5907
    if not ignore_failures:
5908
      raise errors.OpExecError("Can't remove instance's disks")
5909
    feedback_fn("Warning: can't remove instance's disks")
5910

    
5911
  logging.info("Removing instance %s out of cluster config", instance.name)
5912

    
5913
  lu.cfg.RemoveInstance(instance.name)
5914

    
5915
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5916
    "Instance lock removal conflict"
5917

    
5918
  # Remove lock for the instance
5919
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5920

    
5921

    
5922
class LUInstanceQuery(NoHooksLU):
5923
  """Logical unit for querying instances.
5924

5925
  """
5926
  # pylint: disable-msg=W0142
5927
  REQ_BGL = False
5928

    
5929
  def CheckArguments(self):
5930
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5931
                             self.op.output_fields, self.op.use_locking)
5932

    
5933
  def ExpandNames(self):
5934
    self.iq.ExpandNames(self)
5935

    
5936
  def DeclareLocks(self, level):
5937
    self.iq.DeclareLocks(self, level)
5938

    
5939
  def Exec(self, feedback_fn):
5940
    return self.iq.OldStyleQuery(self)
5941

    
5942

    
5943
class LUInstanceFailover(LogicalUnit):
5944
  """Failover an instance.
5945

5946
  """
5947
  HPATH = "instance-failover"
5948
  HTYPE = constants.HTYPE_INSTANCE
5949
  REQ_BGL = False
5950

    
5951
  def CheckArguments(self):
5952
    """Check the arguments.
5953

5954
    """
5955
    self.iallocator = getattr(self.op, "iallocator", None)
5956
    self.target_node = getattr(self.op, "target_node", None)
5957

    
5958
  def ExpandNames(self):
5959
    self._ExpandAndLockInstance()
5960

    
5961
    if self.op.target_node is not None:
5962
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5963

    
5964
    self.needed_locks[locking.LEVEL_NODE] = []
5965
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5966

    
5967
    ignore_consistency = self.op.ignore_consistency
5968
    shutdown_timeout = self.op.shutdown_timeout
5969
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5970
                                       cleanup=False,
5971
                                       iallocator=self.op.iallocator,
5972
                                       target_node=self.op.target_node,
5973
                                       failover=True,
5974
                                       ignore_consistency=ignore_consistency,
5975
                                       shutdown_timeout=shutdown_timeout)
5976
    self.tasklets = [self._migrater]
5977

    
5978
  def DeclareLocks(self, level):
5979
    if level == locking.LEVEL_NODE:
5980
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
5981
      if instance.disk_template in constants.DTS_EXT_MIRROR:
5982
        if self.op.target_node is None:
5983
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5984
        else:
5985
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
5986
                                                   self.op.target_node]
5987
        del self.recalculate_locks[locking.LEVEL_NODE]
5988
      else:
5989
        self._LockInstancesNodes()
5990

    
5991
  def BuildHooksEnv(self):
5992
    """Build hooks env.
5993

5994
    This runs on master, primary and secondary nodes of the instance.
5995

5996
    """
5997
    instance = self._migrater.instance
5998
    source_node = instance.primary_node
5999
    target_node = self._migrater.target_node
6000
    env = {
6001
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6002
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6003
      "OLD_PRIMARY": source_node,
6004
      "NEW_PRIMARY": target_node,
6005
      }
6006

    
6007
    if instance.disk_template in constants.DTS_INT_MIRROR:
6008
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6009
      env["NEW_SECONDARY"] = source_node
6010
    else:
6011
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6012

    
6013
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6014

    
6015
    return env
6016

    
6017
  def BuildHooksNodes(self):
6018
    """Build hooks nodes.
6019

6020
    """
6021
    instance = self._migrater.instance
6022
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6023
    return (nl, nl + [instance.primary_node])
6024

    
6025

    
6026
class LUInstanceMigrate(LogicalUnit):
6027
  """Migrate an instance.
6028

6029
  This is migration without shutting down, compared to the failover,
6030
  which is done with shutdown.
6031

6032
  """
6033
  HPATH = "instance-migrate"
6034
  HTYPE = constants.HTYPE_INSTANCE
6035
  REQ_BGL = False
6036

    
6037
  def ExpandNames(self):
6038
    self._ExpandAndLockInstance()
6039

    
6040
    if self.op.target_node is not None:
6041
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6042

    
6043
    self.needed_locks[locking.LEVEL_NODE] = []
6044
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6045

    
6046
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6047
                                       cleanup=self.op.cleanup,
6048
                                       iallocator=self.op.iallocator,
6049
                                       target_node=self.op.target_node,
6050
                                       failover=False,
6051
                                       fallback=self.op.allow_failover)
6052
    self.tasklets = [self._migrater]
6053

    
6054
  def DeclareLocks(self, level):
6055
    if level == locking.LEVEL_NODE:
6056
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6057
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6058
        if self.op.target_node is None:
6059
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6060
        else:
6061
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6062
                                                   self.op.target_node]
6063
        del self.recalculate_locks[locking.LEVEL_NODE]
6064
      else:
6065
        self._LockInstancesNodes()
6066

    
6067
  def BuildHooksEnv(self):
6068
    """Build hooks env.
6069

6070
    This runs on master, primary and secondary nodes of the instance.
6071

6072
    """
6073
    instance = self._migrater.instance
6074
    source_node = instance.primary_node
6075
    target_node = self._migrater.target_node
6076
    env = _BuildInstanceHookEnvByObject(self, instance)
6077
    env.update({
6078
      "MIGRATE_LIVE": self._migrater.live,
6079
      "MIGRATE_CLEANUP": self.op.cleanup,
6080
      "OLD_PRIMARY": source_node,
6081
      "NEW_PRIMARY": target_node,
6082
      })
6083

    
6084
    if instance.disk_template in constants.DTS_INT_MIRROR:
6085
      env["OLD_SECONDARY"] = target_node
6086
      env["NEW_SECONDARY"] = source_node
6087
    else:
6088
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6089

    
6090
    return env
6091

    
6092
  def BuildHooksNodes(self):
6093
    """Build hooks nodes.
6094

6095
    """
6096
    instance = self._migrater.instance
6097
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6098
    return (nl, nl + [instance.primary_node])
6099

    
6100

    
6101
class LUInstanceMove(LogicalUnit):
6102
  """Move an instance by data-copying.
6103

6104
  """
6105
  HPATH = "instance-move"
6106
  HTYPE = constants.HTYPE_INSTANCE
6107
  REQ_BGL = False
6108

    
6109
  def ExpandNames(self):
6110
    self._ExpandAndLockInstance()
6111
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6112
    self.op.target_node = target_node
6113
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6114
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6115

    
6116
  def DeclareLocks(self, level):
6117
    if level == locking.LEVEL_NODE:
6118
      self._LockInstancesNodes(primary_only=True)
6119

    
6120
  def BuildHooksEnv(self):
6121
    """Build hooks env.
6122

6123
    This runs on master, primary and secondary nodes of the instance.
6124

6125
    """
6126
    env = {
6127
      "TARGET_NODE": self.op.target_node,
6128
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6129
      }
6130
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6131
    return env
6132

    
6133
  def BuildHooksNodes(self):
6134
    """Build hooks nodes.
6135

6136
    """
6137
    nl = [
6138
      self.cfg.GetMasterNode(),
6139
      self.instance.primary_node,
6140
      self.op.target_node,
6141
      ]
6142
    return (nl, nl)
6143

    
6144
  def CheckPrereq(self):
6145
    """Check prerequisites.
6146

6147
    This checks that the instance is in the cluster.
6148

6149
    """
6150
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6151
    assert self.instance is not None, \
6152
      "Cannot retrieve locked instance %s" % self.op.instance_name
6153

    
6154
    node = self.cfg.GetNodeInfo(self.op.target_node)
6155
    assert node is not None, \
6156
      "Cannot retrieve locked node %s" % self.op.target_node
6157

    
6158
    self.target_node = target_node = node.name
6159

    
6160
    if target_node == instance.primary_node:
6161
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6162
                                 (instance.name, target_node),
6163
                                 errors.ECODE_STATE)
6164

    
6165
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6166

    
6167
    for idx, dsk in enumerate(instance.disks):
6168
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6169
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6170
                                   " cannot copy" % idx, errors.ECODE_STATE)
6171

    
6172
    _CheckNodeOnline(self, target_node)
6173
    _CheckNodeNotDrained(self, target_node)
6174
    _CheckNodeVmCapable(self, target_node)
6175

    
6176
    if instance.admin_up:
6177
      # check memory requirements on the secondary node
6178
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6179
                           instance.name, bep[constants.BE_MEMORY],
6180
                           instance.hypervisor)
6181
    else:
6182
      self.LogInfo("Not checking memory on the secondary node as"
6183
                   " instance will not be started")
6184

    
6185
    # check bridge existance
6186
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6187

    
6188
  def Exec(self, feedback_fn):
6189
    """Move an instance.
6190

6191
    The move is done by shutting it down on its present node, copying
6192
    the data over (slow) and starting it on the new node.
6193

6194
    """
6195
    instance = self.instance
6196

    
6197
    source_node = instance.primary_node
6198
    target_node = self.target_node
6199

    
6200
    self.LogInfo("Shutting down instance %s on source node %s",
6201
                 instance.name, source_node)
6202

    
6203
    result = self.rpc.call_instance_shutdown(source_node, instance,
6204
                                             self.op.shutdown_timeout)
6205
    msg = result.fail_msg
6206
    if msg:
6207
      if self.op.ignore_consistency:
6208
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6209
                             " Proceeding anyway. Please make sure node"
6210
                             " %s is down. Error details: %s",
6211
                             instance.name, source_node, source_node, msg)
6212
      else:
6213
        raise errors.OpExecError("Could not shutdown instance %s on"
6214
                                 " node %s: %s" %
6215
                                 (instance.name, source_node, msg))
6216

    
6217
    # create the target disks
6218
    try:
6219
      _CreateDisks(self, instance, target_node=target_node)
6220
    except errors.OpExecError:
6221
      self.LogWarning("Device creation failed, reverting...")
6222
      try:
6223
        _RemoveDisks(self, instance, target_node=target_node)
6224
      finally:
6225
        self.cfg.ReleaseDRBDMinors(instance.name)
6226
        raise
6227

    
6228
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6229

    
6230
    errs = []
6231
    # activate, get path, copy the data over
6232
    for idx, disk in enumerate(instance.disks):
6233
      self.LogInfo("Copying data for disk %d", idx)
6234
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6235
                                               instance.name, True, idx)
6236
      if result.fail_msg:
6237
        self.LogWarning("Can't assemble newly created disk %d: %s",
6238
                        idx, result.fail_msg)
6239
        errs.append(result.fail_msg)
6240
        break
6241
      dev_path = result.payload
6242
      result = self.rpc.call_blockdev_export(source_node, disk,
6243
                                             target_node, dev_path,
6244
                                             cluster_name)
6245
      if result.fail_msg:
6246
        self.LogWarning("Can't copy data over for disk %d: %s",
6247
                        idx, result.fail_msg)
6248
        errs.append(result.fail_msg)
6249
        break
6250

    
6251
    if errs:
6252
      self.LogWarning("Some disks failed to copy, aborting")
6253
      try:
6254
        _RemoveDisks(self, instance, target_node=target_node)
6255
      finally:
6256
        self.cfg.ReleaseDRBDMinors(instance.name)
6257
        raise errors.OpExecError("Errors during disk copy: %s" %
6258
                                 (",".join(errs),))
6259

    
6260
    instance.primary_node = target_node
6261
    self.cfg.Update(instance, feedback_fn)
6262

    
6263
    self.LogInfo("Removing the disks on the original node")
6264
    _RemoveDisks(self, instance, target_node=source_node)
6265

    
6266
    # Only start the instance if it's marked as up
6267
    if instance.admin_up:
6268
      self.LogInfo("Starting instance %s on node %s",
6269
                   instance.name, target_node)
6270

    
6271
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6272
                                           ignore_secondaries=True)
6273
      if not disks_ok:
6274
        _ShutdownInstanceDisks(self, instance)
6275
        raise errors.OpExecError("Can't activate the instance's disks")
6276

    
6277
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6278
      msg = result.fail_msg
6279
      if msg:
6280
        _ShutdownInstanceDisks(self, instance)
6281
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6282
                                 (instance.name, target_node, msg))
6283

    
6284

    
6285
class LUNodeMigrate(LogicalUnit):
6286
  """Migrate all instances from a node.
6287

6288
  """
6289
  HPATH = "node-migrate"
6290
  HTYPE = constants.HTYPE_NODE
6291
  REQ_BGL = False
6292

    
6293
  def CheckArguments(self):
6294
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6295

    
6296
  def ExpandNames(self):
6297
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6298

    
6299
    self.needed_locks = {}
6300

    
6301
    # Create tasklets for migrating instances for all instances on this node
6302
    names = []
6303
    tasklets = []
6304

    
6305
    self.lock_all_nodes = False
6306

    
6307
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6308
      logging.debug("Migrating instance %s", inst.name)
6309
      names.append(inst.name)
6310

    
6311
      tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False,
6312
                                        iallocator=self.op.iallocator,
6313
                                        taget_node=None))
6314

    
6315
      if inst.disk_template in constants.DTS_EXT_MIRROR:
6316
        # We need to lock all nodes, as the iallocator will choose the
6317
        # destination nodes afterwards
6318
        self.lock_all_nodes = True
6319

    
6320
    self.tasklets = tasklets
6321

    
6322
    # Declare node locks
6323
    if self.lock_all_nodes:
6324
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6325
    else:
6326
      self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6327
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6328

    
6329
    # Declare instance locks
6330
    self.needed_locks[locking.LEVEL_INSTANCE] = names
6331

    
6332
  def DeclareLocks(self, level):
6333
    if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6334
      self._LockInstancesNodes()
6335

    
6336
  def BuildHooksEnv(self):
6337
    """Build hooks env.
6338

6339
    This runs on the master, the primary and all the secondaries.
6340

6341
    """
6342
    return {
6343
      "NODE_NAME": self.op.node_name,
6344
      }
6345

    
6346
  def BuildHooksNodes(self):
6347
    """Build hooks nodes.
6348

6349
    """
6350
    nl = [self.cfg.GetMasterNode()]
6351
    return (nl, nl)
6352

    
6353

    
6354
class TLMigrateInstance(Tasklet):
6355
  """Tasklet class for instance migration.
6356

6357
  @type live: boolean
6358
  @ivar live: whether the migration will be done live or non-live;
6359
      this variable is initalized only after CheckPrereq has run
6360
  @type cleanup: boolean
6361
  @ivar cleanup: Wheater we cleanup from a failed migration
6362
  @type iallocator: string
6363
  @ivar iallocator: The iallocator used to determine target_node
6364
  @type target_node: string
6365
  @ivar target_node: If given, the target_node to reallocate the instance to
6366
  @type failover: boolean
6367
  @ivar failover: Whether operation results in failover or migration
6368
  @type fallback: boolean
6369
  @ivar fallback: Whether fallback to failover is allowed if migration not
6370
                  possible
6371
  @type ignore_consistency: boolean
6372
  @ivar ignore_consistency: Wheter we should ignore consistency between source
6373
                            and target node
6374
  @type shutdown_timeout: int
6375
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
6376

6377
  """
6378
  def __init__(self, lu, instance_name, cleanup=False, iallocator=None,
6379
               target_node=None, failover=False, fallback=False,
6380
               ignore_consistency=False,
6381
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6382
    """Initializes this class.
6383

6384
    """
6385
    Tasklet.__init__(self, lu)
6386

    
6387
    # Parameters
6388
    self.instance_name = instance_name
6389
    self.cleanup = cleanup
6390
    self.live = False # will be overridden later
6391
    self.iallocator = iallocator
6392
    self.target_node = target_node
6393
    self.failover = failover
6394
    self.fallback = fallback
6395
    self.ignore_consistency = ignore_consistency
6396
    self.shutdown_timeout = shutdown_timeout
6397

    
6398
  def CheckPrereq(self):
6399
    """Check prerequisites.
6400

6401
    This checks that the instance is in the cluster.
6402

6403
    """
6404
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6405
    instance = self.cfg.GetInstanceInfo(instance_name)
6406
    assert instance is not None
6407
    self.instance = instance
6408

    
6409
    if (not self.cleanup and not instance.admin_up and not self.failover and
6410
        self.fallback):
6411
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6412
                      " to failover")
6413
      self.failover = True
6414

    
6415
    if instance.disk_template not in constants.DTS_MIRRORED:
6416
      if self.failover:
6417
        text = "failovers"
6418
      else:
6419
        text = "migrations"
6420
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6421
                                 " %s" % (instance.disk_template, text),
6422
                                 errors.ECODE_STATE)
6423

    
6424
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6425
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6426

    
6427
      if self.iallocator:
6428
        self._RunAllocator()
6429

    
6430
      # self.target_node is already populated, either directly or by the
6431
      # iallocator run
6432
      target_node = self.target_node
6433

    
6434
      if len(self.lu.tasklets) == 1:
6435
        # It is safe to remove locks only when we're the only tasklet in the LU
6436
        nodes_keep = [instance.primary_node, self.target_node]
6437
        nodes_rel = [node for node in self.lu.acquired_locks[locking.LEVEL_NODE]
6438
                     if node not in nodes_keep]
6439
        self.lu.context.glm.release(locking.LEVEL_NODE, nodes_rel)
6440
        self.lu.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6441

    
6442
    else:
6443
      secondary_nodes = instance.secondary_nodes
6444
      if not secondary_nodes:
6445
        raise errors.ConfigurationError("No secondary node but using"
6446
                                        " %s disk template" %
6447
                                        instance.disk_template)
6448
      target_node = secondary_nodes[0]
6449
      if self.iallocator or (self.target_node and
6450
                             self.target_node != target_node):
6451
        if self.failover:
6452
          text = "failed over"
6453
        else:
6454
          text = "migrated"
6455
        raise errors.OpPrereqError("Instances with disk template %s cannot"
6456
                                   " be %s over to arbitrary nodes"
6457
                                   " (neither an iallocator nor a target"
6458
                                   " node can be passed)" %
6459
                                   (text, instance.disk_template),
6460
                                   errors.ECODE_INVAL)
6461

    
6462
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6463

    
6464
    # check memory requirements on the secondary node
6465
    if not self.failover or instance.admin_up:
6466
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6467
                           instance.name, i_be[constants.BE_MEMORY],
6468
                           instance.hypervisor)
6469
    else:
6470
      self.lu.LogInfo("Not checking memory on the secondary node as"
6471
                      " instance will not be started")
6472

    
6473
    # check bridge existance
6474
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6475

    
6476
    if not self.cleanup:
6477
      _CheckNodeNotDrained(self.lu, target_node)
6478
      if not self.failover:
6479
        result = self.rpc.call_instance_migratable(instance.primary_node,
6480
                                                   instance)
6481
        if result.fail_msg and self.fallback:
6482
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6483
                          " failover")
6484
          self.failover = True
6485
        else:
6486
          result.Raise("Can't migrate, please use failover",
6487
                       prereq=True, ecode=errors.ECODE_STATE)
6488

    
6489
    assert not (self.failover and self.cleanup)
6490

    
6491
  def _RunAllocator(self):
6492
    """Run the allocator based on input opcode.
6493

6494
    """
6495
    ial = IAllocator(self.cfg, self.rpc,
6496
                     mode=constants.IALLOCATOR_MODE_RELOC,
6497
                     name=self.instance_name,
6498
                     # TODO See why hail breaks with a single node below
6499
                     relocate_from=[self.instance.primary_node,
6500
                                    self.instance.primary_node],
6501
                     )
6502

    
6503
    ial.Run(self.iallocator)
6504

    
6505
    if not ial.success:
6506
      raise errors.OpPrereqError("Can't compute nodes using"
6507
                                 " iallocator '%s': %s" %
6508
                                 (self.iallocator, ial.info),
6509
                                 errors.ECODE_NORES)
6510
    if len(ial.result) != ial.required_nodes:
6511
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6512
                                 " of nodes (%s), required %s" %
6513
                                 (self.iallocator, len(ial.result),
6514
                                  ial.required_nodes), errors.ECODE_FAULT)
6515
    self.target_node = ial.result[0]
6516
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6517
                 self.instance_name, self.iallocator,
6518
                 utils.CommaJoin(ial.result))
6519

    
6520
    if not self.failover:
6521
      if self.lu.op.live is not None and self.lu.op.mode is not None:
6522
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6523
                                   " parameters are accepted",
6524
                                   errors.ECODE_INVAL)
6525
      if self.lu.op.live is not None:
6526
        if self.lu.op.live:
6527
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
6528
        else:
6529
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6530
        # reset the 'live' parameter to None so that repeated
6531
        # invocations of CheckPrereq do not raise an exception
6532
        self.lu.op.live = None
6533
      elif self.lu.op.mode is None:
6534
        # read the default value from the hypervisor
6535
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6536
                                                skip_globals=False)
6537
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6538

    
6539
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6540
    else:
6541
      # Failover is never live
6542
      self.live = False
6543

    
6544
  def _WaitUntilSync(self):
6545
    """Poll with custom rpc for disk sync.
6546

6547
    This uses our own step-based rpc call.
6548

6549
    """
6550
    self.feedback_fn("* wait until resync is done")
6551
    all_done = False
6552
    while not all_done:
6553
      all_done = True
6554
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6555
                                            self.nodes_ip,
6556
                                            self.instance.disks)
6557
      min_percent = 100
6558
      for node, nres in result.items():
6559
        nres.Raise("Cannot resync disks on node %s" % node)
6560
        node_done, node_percent = nres.payload
6561
        all_done = all_done and node_done
6562
        if node_percent is not None:
6563
          min_percent = min(min_percent, node_percent)
6564
      if not all_done:
6565
        if min_percent < 100:
6566
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6567
        time.sleep(2)
6568

    
6569
  def _EnsureSecondary(self, node):
6570
    """Demote a node to secondary.
6571

6572
    """
6573
    self.feedback_fn("* switching node %s to secondary mode" % node)
6574

    
6575
    for dev in self.instance.disks:
6576
      self.cfg.SetDiskID(dev, node)
6577

    
6578
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6579
                                          self.instance.disks)
6580
    result.Raise("Cannot change disk to secondary on node %s" % node)
6581

    
6582
  def _GoStandalone(self):
6583
    """Disconnect from the network.
6584

6585
    """
6586
    self.feedback_fn("* changing into standalone mode")
6587
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6588
                                               self.instance.disks)
6589
    for node, nres in result.items():
6590
      nres.Raise("Cannot disconnect disks node %s" % node)
6591

    
6592
  def _GoReconnect(self, multimaster):
6593
    """Reconnect to the network.
6594

6595
    """
6596
    if multimaster:
6597
      msg = "dual-master"
6598
    else:
6599
      msg = "single-master"
6600
    self.feedback_fn("* changing disks into %s mode" % msg)
6601
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6602
                                           self.instance.disks,
6603
                                           self.instance.name, multimaster)
6604
    for node, nres in result.items():
6605
      nres.Raise("Cannot change disks config on node %s" % node)
6606

    
6607
  def _ExecCleanup(self):
6608
    """Try to cleanup after a failed migration.
6609

6610
    The cleanup is done by:
6611
      - check that the instance is running only on one node
6612
        (and update the config if needed)
6613
      - change disks on its secondary node to secondary
6614
      - wait until disks are fully synchronized
6615
      - disconnect from the network
6616
      - change disks into single-master mode
6617
      - wait again until disks are fully synchronized
6618

6619
    """
6620
    instance = self.instance
6621
    target_node = self.target_node
6622
    source_node = self.source_node
6623

    
6624
    # check running on only one node
6625
    self.feedback_fn("* checking where the instance actually runs"
6626
                     " (if this hangs, the hypervisor might be in"
6627
                     " a bad state)")
6628
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6629
    for node, result in ins_l.items():
6630
      result.Raise("Can't contact node %s" % node)
6631

    
6632
    runningon_source = instance.name in ins_l[source_node].payload
6633
    runningon_target = instance.name in ins_l[target_node].payload
6634

    
6635
    if runningon_source and runningon_target:
6636
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6637
                               " or the hypervisor is confused. You will have"
6638
                               " to ensure manually that it runs only on one"
6639
                               " and restart this operation.")
6640

    
6641
    if not (runningon_source or runningon_target):
6642
      raise errors.OpExecError("Instance does not seem to be running at all."
6643
                               " In this case, it's safer to repair by"
6644
                               " running 'gnt-instance stop' to ensure disk"
6645
                               " shutdown, and then restarting it.")
6646

    
6647
    if runningon_target:
6648
      # the migration has actually succeeded, we need to update the config
6649
      self.feedback_fn("* instance running on secondary node (%s),"
6650
                       " updating config" % target_node)
6651
      instance.primary_node = target_node
6652
      self.cfg.Update(instance, self.feedback_fn)
6653
      demoted_node = source_node
6654
    else:
6655
      self.feedback_fn("* instance confirmed to be running on its"
6656
                       " primary node (%s)" % source_node)
6657
      demoted_node = target_node
6658

    
6659
    if instance.disk_template in constants.DTS_INT_MIRROR:
6660
      self._EnsureSecondary(demoted_node)
6661
      try:
6662
        self._WaitUntilSync()
6663
      except errors.OpExecError:
6664
        # we ignore here errors, since if the device is standalone, it
6665
        # won't be able to sync
6666
        pass
6667
      self._GoStandalone()
6668
      self._GoReconnect(False)
6669
      self._WaitUntilSync()
6670

    
6671
    self.feedback_fn("* done")
6672

    
6673
  def _RevertDiskStatus(self):
6674
    """Try to revert the disk status after a failed migration.
6675

6676
    """
6677
    target_node = self.target_node
6678
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6679
      return
6680

    
6681
    try:
6682
      self._EnsureSecondary(target_node)
6683
      self._GoStandalone()
6684
      self._GoReconnect(False)
6685
      self._WaitUntilSync()
6686
    except errors.OpExecError, err:
6687
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6688
                         " drives: error '%s'\n"
6689
                         "Please look and recover the instance status" %
6690
                         str(err))
6691

    
6692
  def _AbortMigration(self):
6693
    """Call the hypervisor code to abort a started migration.
6694

6695
    """
6696
    instance = self.instance
6697
    target_node = self.target_node
6698
    migration_info = self.migration_info
6699

    
6700
    abort_result = self.rpc.call_finalize_migration(target_node,
6701
                                                    instance,
6702
                                                    migration_info,
6703
                                                    False)
6704
    abort_msg = abort_result.fail_msg
6705
    if abort_msg:
6706
      logging.error("Aborting migration failed on target node %s: %s",
6707
                    target_node, abort_msg)
6708
      # Don't raise an exception here, as we stil have to try to revert the
6709
      # disk status, even if this step failed.
6710

    
6711
  def _ExecMigration(self):
6712
    """Migrate an instance.
6713

6714
    The migrate is done by:
6715
      - change the disks into dual-master mode
6716
      - wait until disks are fully synchronized again
6717
      - migrate the instance
6718
      - change disks on the new secondary node (the old primary) to secondary
6719
      - wait until disks are fully synchronized
6720
      - change disks into single-master mode
6721

6722
    """
6723
    instance = self.instance
6724
    target_node = self.target_node
6725
    source_node = self.source_node
6726

    
6727
    self.feedback_fn("* checking disk consistency between source and target")
6728
    for dev in instance.disks:
6729
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6730
        raise errors.OpExecError("Disk %s is degraded or not fully"
6731
                                 " synchronized on target node,"
6732
                                 " aborting migrate." % dev.iv_name)
6733

    
6734
    # First get the migration information from the remote node
6735
    result = self.rpc.call_migration_info(source_node, instance)
6736
    msg = result.fail_msg
6737
    if msg:
6738
      log_err = ("Failed fetching source migration information from %s: %s" %
6739
                 (source_node, msg))
6740
      logging.error(log_err)
6741
      raise errors.OpExecError(log_err)
6742

    
6743
    self.migration_info = migration_info = result.payload
6744

    
6745
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6746
      # Then switch the disks to master/master mode
6747
      self._EnsureSecondary(target_node)
6748
      self._GoStandalone()
6749
      self._GoReconnect(True)
6750
      self._WaitUntilSync()
6751

    
6752
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6753
    result = self.rpc.call_accept_instance(target_node,
6754
                                           instance,
6755
                                           migration_info,
6756
                                           self.nodes_ip[target_node])
6757

    
6758
    msg = result.fail_msg
6759
    if msg:
6760
      logging.error("Instance pre-migration failed, trying to revert"
6761
                    " disk status: %s", msg)
6762
      self.feedback_fn("Pre-migration failed, aborting")
6763
      self._AbortMigration()
6764
      self._RevertDiskStatus()
6765
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6766
                               (instance.name, msg))
6767

    
6768
    self.feedback_fn("* migrating instance to %s" % target_node)
6769
    result = self.rpc.call_instance_migrate(source_node, instance,
6770
                                            self.nodes_ip[target_node],
6771
                                            self.live)
6772
    msg = result.fail_msg
6773
    if msg:
6774
      logging.error("Instance migration failed, trying to revert"
6775
                    " disk status: %s", msg)
6776
      self.feedback_fn("Migration failed, aborting")
6777
      self._AbortMigration()
6778
      self._RevertDiskStatus()
6779
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6780
                               (instance.name, msg))
6781

    
6782
    instance.primary_node = target_node
6783
    # distribute new instance config to the other nodes
6784
    self.cfg.Update(instance, self.feedback_fn)
6785

    
6786
    result = self.rpc.call_finalize_migration(target_node,
6787
                                              instance,
6788
                                              migration_info,
6789
                                              True)
6790
    msg = result.fail_msg
6791
    if msg:
6792
      logging.error("Instance migration succeeded, but finalization failed:"
6793
                    " %s", msg)
6794
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6795
                               msg)
6796

    
6797
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6798
      self._EnsureSecondary(source_node)
6799
      self._WaitUntilSync()
6800
      self._GoStandalone()
6801
      self._GoReconnect(False)
6802
      self._WaitUntilSync()
6803

    
6804
    self.feedback_fn("* done")
6805

    
6806
  def _ExecFailover(self):
6807
    """Failover an instance.
6808

6809
    The failover is done by shutting it down on its present node and
6810
    starting it on the secondary.
6811

6812
    """
6813
    instance = self.instance
6814
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
6815

    
6816
    source_node = instance.primary_node
6817
    target_node = self.target_node
6818

    
6819
    if instance.admin_up:
6820
      self.feedback_fn("* checking disk consistency between source and target")
6821
      for dev in instance.disks:
6822
        # for drbd, these are drbd over lvm
6823
        if not _CheckDiskConsistency(self, dev, target_node, False):
6824
          if not self.ignore_consistency:
6825
            raise errors.OpExecError("Disk %s is degraded on target node,"
6826
                                     " aborting failover." % dev.iv_name)
6827
    else:
6828
      self.feedback_fn("* not checking disk consistency as instance is not"
6829
                       " running")
6830

    
6831
    self.feedback_fn("* shutting down instance on source node")
6832
    logging.info("Shutting down instance %s on node %s",
6833
                 instance.name, source_node)
6834

    
6835
    result = self.rpc.call_instance_shutdown(source_node, instance,
6836
                                             self.shutdown_timeout)
6837
    msg = result.fail_msg
6838
    if msg:
6839
      if self.ignore_consistency or primary_node.offline:
6840
        self.lu.LogWarning("Could not shutdown instance %s on node %s."
6841
                           " Proceeding anyway. Please make sure node"
6842
                           " %s is down. Error details: %s",
6843
                           instance.name, source_node, source_node, msg)
6844
      else:
6845
        raise errors.OpExecError("Could not shutdown instance %s on"
6846
                                 " node %s: %s" %
6847
                                 (instance.name, source_node, msg))
6848

    
6849
    self.feedback_fn("* deactivating the instance's disks on source node")
6850
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
6851
      raise errors.OpExecError("Can't shut down the instance's disks.")
6852

    
6853
    instance.primary_node = target_node
6854
    # distribute new instance config to the other nodes
6855
    self.cfg.Update(instance, self.feedback_fn)
6856

    
6857
    # Only start the instance if it's marked as up
6858
    if instance.admin_up:
6859
      self.feedback_fn("* activating the instance's disks on target node")
6860
      logging.info("Starting instance %s on node %s",
6861
                   instance.name, target_node)
6862

    
6863
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6864
                                           ignore_secondaries=True)
6865
      if not disks_ok:
6866
        _ShutdownInstanceDisks(self, instance)
6867
        raise errors.OpExecError("Can't activate the instance's disks")
6868

    
6869
      self.feedback_fn("* starting the instance on the target node")
6870
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6871
      msg = result.fail_msg
6872
      if msg:
6873
        _ShutdownInstanceDisks(self, instance)
6874
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6875
                                 (instance.name, target_node, msg))
6876

    
6877
  def Exec(self, feedback_fn):
6878
    """Perform the migration.
6879

6880
    """
6881
    self.feedback_fn = feedback_fn
6882
    self.source_node = self.instance.primary_node
6883

    
6884
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6885
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
6886
      self.target_node = self.instance.secondary_nodes[0]
6887
      # Otherwise self.target_node has been populated either
6888
      # directly, or through an iallocator.
6889

    
6890
    self.all_nodes = [self.source_node, self.target_node]
6891
    self.nodes_ip = {
6892
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6893
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6894
      }
6895

    
6896
    if self.failover:
6897
      feedback_fn("Failover instance %s" % self.instance.name)
6898
      self._ExecFailover()
6899
    else:
6900
      feedback_fn("Migrating instance %s" % self.instance.name)
6901

    
6902
      if self.cleanup:
6903
        return self._ExecCleanup()
6904
      else:
6905
        return self._ExecMigration()
6906

    
6907

    
6908
def _CreateBlockDev(lu, node, instance, device, force_create,
6909
                    info, force_open):
6910
  """Create a tree of block devices on a given node.
6911

6912
  If this device type has to be created on secondaries, create it and
6913
  all its children.
6914

6915
  If not, just recurse to children keeping the same 'force' value.
6916

6917
  @param lu: the lu on whose behalf we execute
6918
  @param node: the node on which to create the device
6919
  @type instance: L{objects.Instance}
6920
  @param instance: the instance which owns the device
6921
  @type device: L{objects.Disk}
6922
  @param device: the device to create
6923
  @type force_create: boolean
6924
  @param force_create: whether to force creation of this device; this
6925
      will be change to True whenever we find a device which has
6926
      CreateOnSecondary() attribute
6927
  @param info: the extra 'metadata' we should attach to the device
6928
      (this will be represented as a LVM tag)
6929
  @type force_open: boolean
6930
  @param force_open: this parameter will be passes to the
6931
      L{backend.BlockdevCreate} function where it specifies
6932
      whether we run on primary or not, and it affects both
6933
      the child assembly and the device own Open() execution
6934

6935
  """
6936
  if device.CreateOnSecondary():
6937
    force_create = True
6938

    
6939
  if device.children:
6940
    for child in device.children:
6941
      _CreateBlockDev(lu, node, instance, child, force_create,
6942
                      info, force_open)
6943

    
6944
  if not force_create:
6945
    return
6946

    
6947
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6948

    
6949

    
6950
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6951
  """Create a single block device on a given node.
6952

6953
  This will not recurse over children of the device, so they must be
6954
  created in advance.
6955

6956
  @param lu: the lu on whose behalf we execute
6957
  @param node: the node on which to create the device
6958
  @type instance: L{objects.Instance}
6959
  @param instance: the instance which owns the device
6960
  @type device: L{objects.Disk}
6961
  @param device: the device to create
6962
  @param info: the extra 'metadata' we should attach to the device
6963
      (this will be represented as a LVM tag)
6964
  @type force_open: boolean
6965
  @param force_open: this parameter will be passes to the
6966
      L{backend.BlockdevCreate} function where it specifies
6967
      whether we run on primary or not, and it affects both
6968
      the child assembly and the device own Open() execution
6969

6970
  """
6971
  lu.cfg.SetDiskID(device, node)
6972
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6973
                                       instance.name, force_open, info)
6974
  result.Raise("Can't create block device %s on"
6975
               " node %s for instance %s" % (device, node, instance.name))
6976
  if device.physical_id is None:
6977
    device.physical_id = result.payload
6978

    
6979

    
6980
def _GenerateUniqueNames(lu, exts):
6981
  """Generate a suitable LV name.
6982

6983
  This will generate a logical volume name for the given instance.
6984

6985
  """
6986
  results = []
6987
  for val in exts:
6988
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6989
    results.append("%s%s" % (new_id, val))
6990
  return results
6991

    
6992

    
6993
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6994
                         p_minor, s_minor):
6995
  """Generate a drbd8 device complete with its children.
6996

6997
  """
6998
  port = lu.cfg.AllocatePort()
6999
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7000
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7001
                          logical_id=(vgname, names[0]))
7002
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7003
                          logical_id=(vgname, names[1]))
7004
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7005
                          logical_id=(primary, secondary, port,
7006
                                      p_minor, s_minor,
7007
                                      shared_secret),
7008
                          children=[dev_data, dev_meta],
7009
                          iv_name=iv_name)
7010
  return drbd_dev
7011

    
7012

    
7013
def _GenerateDiskTemplate(lu, template_name,
7014
                          instance_name, primary_node,
7015
                          secondary_nodes, disk_info,
7016
                          file_storage_dir, file_driver,
7017
                          base_index, feedback_fn):
7018
  """Generate the entire disk layout for a given template type.
7019

7020
  """
7021
  #TODO: compute space requirements
7022

    
7023
  vgname = lu.cfg.GetVGName()
7024
  disk_count = len(disk_info)
7025
  disks = []
7026
  if template_name == constants.DT_DISKLESS:
7027
    pass
7028
  elif template_name == constants.DT_PLAIN:
7029
    if len(secondary_nodes) != 0:
7030
      raise errors.ProgrammerError("Wrong template configuration")
7031

    
7032
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7033
                                      for i in range(disk_count)])
7034
    for idx, disk in enumerate(disk_info):
7035
      disk_index = idx + base_index
7036
      vg = disk.get(constants.IDISK_VG, vgname)
7037
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7038
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7039
                              size=disk[constants.IDISK_SIZE],
7040
                              logical_id=(vg, names[idx]),
7041
                              iv_name="disk/%d" % disk_index,
7042
                              mode=disk[constants.IDISK_MODE])
7043
      disks.append(disk_dev)
7044
  elif template_name == constants.DT_DRBD8:
7045
    if len(secondary_nodes) != 1:
7046
      raise errors.ProgrammerError("Wrong template configuration")
7047
    remote_node = secondary_nodes[0]
7048
    minors = lu.cfg.AllocateDRBDMinor(
7049
      [primary_node, remote_node] * len(disk_info), instance_name)
7050

    
7051
    names = []
7052
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7053
                                               for i in range(disk_count)]):
7054
      names.append(lv_prefix + "_data")
7055
      names.append(lv_prefix + "_meta")
7056
    for idx, disk in enumerate(disk_info):
7057
      disk_index = idx + base_index
7058
      vg = disk.get(constants.IDISK_VG, vgname)
7059
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7060
                                      disk[constants.IDISK_SIZE], vg,
7061
                                      names[idx * 2:idx * 2 + 2],
7062
                                      "disk/%d" % disk_index,
7063
                                      minors[idx * 2], minors[idx * 2 + 1])
7064
      disk_dev.mode = disk[constants.IDISK_MODE]
7065
      disks.append(disk_dev)
7066
  elif template_name == constants.DT_FILE:
7067
    if len(secondary_nodes) != 0:
7068
      raise errors.ProgrammerError("Wrong template configuration")
7069

    
7070
    opcodes.RequireFileStorage()
7071

    
7072
    for idx, disk in enumerate(disk_info):
7073
      disk_index = idx + base_index
7074
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7075
                              size=disk[constants.IDISK_SIZE],
7076
                              iv_name="disk/%d" % disk_index,
7077
                              logical_id=(file_driver,
7078
                                          "%s/disk%d" % (file_storage_dir,
7079
                                                         disk_index)),
7080
                              mode=disk[constants.IDISK_MODE])
7081
      disks.append(disk_dev)
7082
  elif template_name == constants.DT_SHARED_FILE:
7083
    if len(secondary_nodes) != 0:
7084
      raise errors.ProgrammerError("Wrong template configuration")
7085

    
7086
    opcodes.RequireSharedFileStorage()
7087

    
7088
    for idx, disk in enumerate(disk_info):
7089
      disk_index = idx + base_index
7090
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7091
                              size=disk[constants.IDISK_SIZE],
7092
                              iv_name="disk/%d" % disk_index,
7093
                              logical_id=(file_driver,
7094
                                          "%s/disk%d" % (file_storage_dir,
7095
                                                         disk_index)),
7096
                              mode=disk[constants.IDISK_MODE])
7097
      disks.append(disk_dev)
7098
  elif template_name == constants.DT_BLOCK:
7099
    if len(secondary_nodes) != 0:
7100
      raise errors.ProgrammerError("Wrong template configuration")
7101

    
7102
    for idx, disk in enumerate(disk_info):
7103
      disk_index = idx + base_index
7104
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7105
                              size=disk[constants.IDISK_SIZE],
7106
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7107
                                          disk[constants.IDISK_ADOPT]),
7108
                              iv_name="disk/%d" % disk_index,
7109
                              mode=disk[constants.IDISK_MODE])
7110
      disks.append(disk_dev)
7111

    
7112
  else:
7113
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7114
  return disks
7115

    
7116

    
7117
def _GetInstanceInfoText(instance):
7118
  """Compute that text that should be added to the disk's metadata.
7119

7120
  """
7121
  return "originstname+%s" % instance.name
7122

    
7123

    
7124
def _CalcEta(time_taken, written, total_size):
7125
  """Calculates the ETA based on size written and total size.
7126

7127
  @param time_taken: The time taken so far
7128
  @param written: amount written so far
7129
  @param total_size: The total size of data to be written
7130
  @return: The remaining time in seconds
7131

7132
  """
7133
  avg_time = time_taken / float(written)
7134
  return (total_size - written) * avg_time
7135

    
7136

    
7137
def _WipeDisks(lu, instance):
7138
  """Wipes instance disks.
7139

7140
  @type lu: L{LogicalUnit}
7141
  @param lu: the logical unit on whose behalf we execute
7142
  @type instance: L{objects.Instance}
7143
  @param instance: the instance whose disks we should create
7144
  @return: the success of the wipe
7145

7146
  """
7147
  node = instance.primary_node
7148

    
7149
  for device in instance.disks:
7150
    lu.cfg.SetDiskID(device, node)
7151

    
7152
  logging.info("Pause sync of instance %s disks", instance.name)
7153
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7154

    
7155
  for idx, success in enumerate(result.payload):
7156
    if not success:
7157
      logging.warn("pause-sync of instance %s for disks %d failed",
7158
                   instance.name, idx)
7159

    
7160
  try:
7161
    for idx, device in enumerate(instance.disks):
7162
      lu.LogInfo("* Wiping disk %d", idx)
7163
      logging.info("Wiping disk %d for instance %s, node %s",
7164
                   idx, instance.name, node)
7165

    
7166
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7167
      # MAX_WIPE_CHUNK at max
7168
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7169
                            constants.MIN_WIPE_CHUNK_PERCENT)
7170

    
7171
      offset = 0
7172
      size = device.size
7173
      last_output = 0
7174
      start_time = time.time()
7175

    
7176
      while offset < size:
7177
        wipe_size = min(wipe_chunk_size, size - offset)
7178
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7179
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7180
                     (idx, offset, wipe_size))
7181
        now = time.time()
7182
        offset += wipe_size
7183
        if now - last_output >= 60:
7184
          eta = _CalcEta(now - start_time, offset, size)
7185
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7186
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7187
          last_output = now
7188
  finally:
7189
    logging.info("Resume sync of instance %s disks", instance.name)
7190

    
7191
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7192

    
7193
    for idx, success in enumerate(result.payload):
7194
      if not success:
7195
        lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
7196
                      " look at the status and troubleshoot the issue.", idx)
7197
        logging.warn("resume-sync of instance %s for disks %d failed",
7198
                     instance.name, idx)
7199

    
7200

    
7201
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7202
  """Create all disks for an instance.
7203

7204
  This abstracts away some work from AddInstance.
7205

7206
  @type lu: L{LogicalUnit}
7207
  @param lu: the logical unit on whose behalf we execute
7208
  @type instance: L{objects.Instance}
7209
  @param instance: the instance whose disks we should create
7210
  @type to_skip: list
7211
  @param to_skip: list of indices to skip
7212
  @type target_node: string
7213
  @param target_node: if passed, overrides the target node for creation
7214
  @rtype: boolean
7215
  @return: the success of the creation
7216

7217
  """
7218
  info = _GetInstanceInfoText(instance)
7219
  if target_node is None:
7220
    pnode = instance.primary_node
7221
    all_nodes = instance.all_nodes
7222
  else:
7223
    pnode = target_node
7224
    all_nodes = [pnode]
7225

    
7226
  if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7227
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7228
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7229

    
7230
    result.Raise("Failed to create directory '%s' on"
7231
                 " node %s" % (file_storage_dir, pnode))
7232

    
7233
  # Note: this needs to be kept in sync with adding of disks in
7234
  # LUInstanceSetParams
7235
  for idx, device in enumerate(instance.disks):
7236
    if to_skip and idx in to_skip:
7237
      continue
7238
    logging.info("Creating volume %s for instance %s",
7239
                 device.iv_name, instance.name)
7240
    #HARDCODE
7241
    for node in all_nodes:
7242
      f_create = node == pnode
7243
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7244

    
7245

    
7246
def _RemoveDisks(lu, instance, target_node=None):
7247
  """Remove all disks for an instance.
7248

7249
  This abstracts away some work from `AddInstance()` and
7250
  `RemoveInstance()`. Note that in case some of the devices couldn't
7251
  be removed, the removal will continue with the other ones (compare
7252
  with `_CreateDisks()`).
7253

7254
  @type lu: L{LogicalUnit}
7255
  @param lu: the logical unit on whose behalf we execute
7256
  @type instance: L{objects.Instance}
7257
  @param instance: the instance whose disks we should remove
7258
  @type target_node: string
7259
  @param target_node: used to override the node on which to remove the disks
7260
  @rtype: boolean
7261
  @return: the success of the removal
7262

7263
  """
7264
  logging.info("Removing block devices for instance %s", instance.name)
7265

    
7266
  all_result = True
7267
  for device in instance.disks:
7268
    if target_node:
7269
      edata = [(target_node, device)]
7270
    else:
7271
      edata = device.ComputeNodeTree(instance.primary_node)
7272
    for node, disk in edata:
7273
      lu.cfg.SetDiskID(disk, node)
7274
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7275
      if msg:
7276
        lu.LogWarning("Could not remove block device %s on node %s,"
7277
                      " continuing anyway: %s", device.iv_name, node, msg)
7278
        all_result = False
7279

    
7280
  if instance.disk_template == constants.DT_FILE:
7281
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7282
    if target_node:
7283
      tgt = target_node
7284
    else:
7285
      tgt = instance.primary_node
7286
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7287
    if result.fail_msg:
7288
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7289
                    file_storage_dir, instance.primary_node, result.fail_msg)
7290
      all_result = False
7291

    
7292
  return all_result
7293

    
7294

    
7295
def _ComputeDiskSizePerVG(disk_template, disks):
7296
  """Compute disk size requirements in the volume group
7297

7298
  """
7299
  def _compute(disks, payload):
7300
    """Universal algorithm.
7301

7302
    """
7303
    vgs = {}
7304
    for disk in disks:
7305
      vgs[disk[constants.IDISK_VG]] = \
7306
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7307

    
7308
    return vgs
7309

    
7310
  # Required free disk space as a function of disk and swap space
7311
  req_size_dict = {
7312
    constants.DT_DISKLESS: {},
7313
    constants.DT_PLAIN: _compute(disks, 0),
7314
    # 128 MB are added for drbd metadata for each disk
7315
    constants.DT_DRBD8: _compute(disks, 128),
7316
    constants.DT_FILE: {},
7317
    constants.DT_SHARED_FILE: {},
7318
  }
7319

    
7320
  if disk_template not in req_size_dict:
7321
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7322
                                 " is unknown" %  disk_template)
7323

    
7324
  return req_size_dict[disk_template]
7325

    
7326

    
7327
def _ComputeDiskSize(disk_template, disks):
7328
  """Compute disk size requirements in the volume group
7329

7330
  """
7331
  # Required free disk space as a function of disk and swap space
7332
  req_size_dict = {
7333
    constants.DT_DISKLESS: None,
7334
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7335
    # 128 MB are added for drbd metadata for each disk
7336
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7337
    constants.DT_FILE: None,
7338
    constants.DT_SHARED_FILE: 0,
7339
    constants.DT_BLOCK: 0,
7340
  }
7341

    
7342
  if disk_template not in req_size_dict:
7343
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7344
                                 " is unknown" %  disk_template)
7345

    
7346
  return req_size_dict[disk_template]
7347

    
7348

    
7349
def _FilterVmNodes(lu, nodenames):
7350
  """Filters out non-vm_capable nodes from a list.
7351

7352
  @type lu: L{LogicalUnit}
7353
  @param lu: the logical unit for which we check
7354
  @type nodenames: list
7355
  @param nodenames: the list of nodes on which we should check
7356
  @rtype: list
7357
  @return: the list of vm-capable nodes
7358

7359
  """
7360
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7361
  return [name for name in nodenames if name not in vm_nodes]
7362

    
7363

    
7364
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7365
  """Hypervisor parameter validation.
7366

7367
  This function abstract the hypervisor parameter validation to be
7368
  used in both instance create and instance modify.
7369

7370
  @type lu: L{LogicalUnit}
7371
  @param lu: the logical unit for which we check
7372
  @type nodenames: list
7373
  @param nodenames: the list of nodes on which we should check
7374
  @type hvname: string
7375
  @param hvname: the name of the hypervisor we should use
7376
  @type hvparams: dict
7377
  @param hvparams: the parameters which we need to check
7378
  @raise errors.OpPrereqError: if the parameters are not valid
7379

7380
  """
7381
  nodenames = _FilterVmNodes(lu, nodenames)
7382
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7383
                                                  hvname,
7384
                                                  hvparams)
7385
  for node in nodenames:
7386
    info = hvinfo[node]
7387
    if info.offline:
7388
      continue
7389
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7390

    
7391

    
7392
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7393
  """OS parameters validation.
7394

7395
  @type lu: L{LogicalUnit}
7396
  @param lu: the logical unit for which we check
7397
  @type required: boolean
7398
  @param required: whether the validation should fail if the OS is not
7399
      found
7400
  @type nodenames: list
7401
  @param nodenames: the list of nodes on which we should check
7402
  @type osname: string
7403
  @param osname: the name of the hypervisor we should use
7404
  @type osparams: dict
7405
  @param osparams: the parameters which we need to check
7406
  @raise errors.OpPrereqError: if the parameters are not valid
7407

7408
  """
7409
  nodenames = _FilterVmNodes(lu, nodenames)
7410
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7411
                                   [constants.OS_VALIDATE_PARAMETERS],
7412
                                   osparams)
7413
  for node, nres in result.items():
7414
    # we don't check for offline cases since this should be run only
7415
    # against the master node and/or an instance's nodes
7416
    nres.Raise("OS Parameters validation failed on node %s" % node)
7417
    if not nres.payload:
7418
      lu.LogInfo("OS %s not found on node %s, validation skipped",
7419
                 osname, node)
7420

    
7421

    
7422
class LUInstanceCreate(LogicalUnit):
7423
  """Create an instance.
7424

7425
  """
7426
  HPATH = "instance-add"
7427
  HTYPE = constants.HTYPE_INSTANCE
7428
  REQ_BGL = False
7429

    
7430
  def CheckArguments(self):
7431
    """Check arguments.
7432

7433
    """
7434
    # do not require name_check to ease forward/backward compatibility
7435
    # for tools
7436
    if self.op.no_install and self.op.start:
7437
      self.LogInfo("No-installation mode selected, disabling startup")
7438
      self.op.start = False
7439
    # validate/normalize the instance name
7440
    self.op.instance_name = \
7441
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7442

    
7443
    if self.op.ip_check and not self.op.name_check:
7444
      # TODO: make the ip check more flexible and not depend on the name check
7445
      raise errors.OpPrereqError("Cannot do ip check without a name check",
7446
                                 errors.ECODE_INVAL)
7447

    
7448
    # check nics' parameter names
7449
    for nic in self.op.nics:
7450
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7451

    
7452
    # check disks. parameter names and consistent adopt/no-adopt strategy
7453
    has_adopt = has_no_adopt = False
7454
    for disk in self.op.disks:
7455
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7456
      if constants.IDISK_ADOPT in disk:
7457
        has_adopt = True
7458
      else:
7459
        has_no_adopt = True
7460
    if has_adopt and has_no_adopt:
7461
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7462
                                 errors.ECODE_INVAL)
7463
    if has_adopt:
7464
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7465
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7466
                                   " '%s' disk template" %
7467
                                   self.op.disk_template,
7468
                                   errors.ECODE_INVAL)
7469
      if self.op.iallocator is not None:
7470
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7471
                                   " iallocator script", errors.ECODE_INVAL)
7472
      if self.op.mode == constants.INSTANCE_IMPORT:
7473
        raise errors.OpPrereqError("Disk adoption not allowed for"
7474
                                   " instance import", errors.ECODE_INVAL)
7475
    else:
7476
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
7477
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7478
                                   " but no 'adopt' parameter given" %
7479
                                   self.op.disk_template,
7480
                                   errors.ECODE_INVAL)
7481

    
7482
    self.adopt_disks = has_adopt
7483

    
7484
    # instance name verification
7485
    if self.op.name_check:
7486
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7487
      self.op.instance_name = self.hostname1.name
7488
      # used in CheckPrereq for ip ping check
7489
      self.check_ip = self.hostname1.ip
7490
    else:
7491
      self.check_ip = None
7492

    
7493
    # file storage checks
7494
    if (self.op.file_driver and
7495
        not self.op.file_driver in constants.FILE_DRIVER):
7496
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7497
                                 self.op.file_driver, errors.ECODE_INVAL)
7498

    
7499
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7500
      raise errors.OpPrereqError("File storage directory path not absolute",
7501
                                 errors.ECODE_INVAL)
7502

    
7503
    ### Node/iallocator related checks
7504
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7505

    
7506
    if self.op.pnode is not None:
7507
      if self.op.disk_template in constants.DTS_INT_MIRROR:
7508
        if self.op.snode is None:
7509
          raise errors.OpPrereqError("The networked disk templates need"
7510
                                     " a mirror node", errors.ECODE_INVAL)
7511
      elif self.op.snode:
7512
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7513
                        " template")
7514
        self.op.snode = None
7515

    
7516
    self._cds = _GetClusterDomainSecret()
7517

    
7518
    if self.op.mode == constants.INSTANCE_IMPORT:
7519
      # On import force_variant must be True, because if we forced it at
7520
      # initial install, our only chance when importing it back is that it
7521
      # works again!
7522
      self.op.force_variant = True
7523

    
7524
      if self.op.no_install:
7525
        self.LogInfo("No-installation mode has no effect during import")
7526

    
7527
    elif self.op.mode == constants.INSTANCE_CREATE:
7528
      if self.op.os_type is None:
7529
        raise errors.OpPrereqError("No guest OS specified",
7530
                                   errors.ECODE_INVAL)
7531
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7532
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7533
                                   " installation" % self.op.os_type,
7534
                                   errors.ECODE_STATE)
7535
      if self.op.disk_template is None:
7536
        raise errors.OpPrereqError("No disk template specified",
7537
                                   errors.ECODE_INVAL)
7538

    
7539
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7540
      # Check handshake to ensure both clusters have the same domain secret
7541
      src_handshake = self.op.source_handshake
7542
      if not src_handshake:
7543
        raise errors.OpPrereqError("Missing source handshake",
7544
                                   errors.ECODE_INVAL)
7545

    
7546
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7547
                                                           src_handshake)
7548
      if errmsg:
7549
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7550
                                   errors.ECODE_INVAL)
7551

    
7552
      # Load and check source CA
7553
      self.source_x509_ca_pem = self.op.source_x509_ca
7554
      if not self.source_x509_ca_pem:
7555
        raise errors.OpPrereqError("Missing source X509 CA",
7556
                                   errors.ECODE_INVAL)
7557

    
7558
      try:
7559
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7560
                                                    self._cds)
7561
      except OpenSSL.crypto.Error, err:
7562
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7563
                                   (err, ), errors.ECODE_INVAL)
7564

    
7565
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7566
      if errcode is not None:
7567
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7568
                                   errors.ECODE_INVAL)
7569

    
7570
      self.source_x509_ca = cert
7571

    
7572
      src_instance_name = self.op.source_instance_name
7573
      if not src_instance_name:
7574
        raise errors.OpPrereqError("Missing source instance name",
7575
                                   errors.ECODE_INVAL)
7576

    
7577
      self.source_instance_name = \
7578
          netutils.GetHostname(name=src_instance_name).name
7579

    
7580
    else:
7581
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
7582
                                 self.op.mode, errors.ECODE_INVAL)
7583

    
7584
  def ExpandNames(self):
7585
    """ExpandNames for CreateInstance.
7586

7587
    Figure out the right locks for instance creation.
7588

7589
    """
7590
    self.needed_locks = {}
7591

    
7592
    instance_name = self.op.instance_name
7593
    # this is just a preventive check, but someone might still add this
7594
    # instance in the meantime, and creation will fail at lock-add time
7595
    if instance_name in self.cfg.GetInstanceList():
7596
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7597
                                 instance_name, errors.ECODE_EXISTS)
7598

    
7599
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7600

    
7601
    if self.op.iallocator:
7602
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7603
    else:
7604
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7605
      nodelist = [self.op.pnode]
7606
      if self.op.snode is not None:
7607
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7608
        nodelist.append(self.op.snode)
7609
      self.needed_locks[locking.LEVEL_NODE] = nodelist
7610

    
7611
    # in case of import lock the source node too
7612
    if self.op.mode == constants.INSTANCE_IMPORT:
7613
      src_node = self.op.src_node
7614
      src_path = self.op.src_path
7615

    
7616
      if src_path is None:
7617
        self.op.src_path = src_path = self.op.instance_name
7618

    
7619
      if src_node is None:
7620
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7621
        self.op.src_node = None
7622
        if os.path.isabs(src_path):
7623
          raise errors.OpPrereqError("Importing an instance from an absolute"
7624
                                     " path requires a source node option.",
7625
                                     errors.ECODE_INVAL)
7626
      else:
7627
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7628
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7629
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
7630
        if not os.path.isabs(src_path):
7631
          self.op.src_path = src_path = \
7632
            utils.PathJoin(constants.EXPORT_DIR, src_path)
7633

    
7634
  def _RunAllocator(self):
7635
    """Run the allocator based on input opcode.
7636

7637
    """
7638
    nics = [n.ToDict() for n in self.nics]
7639
    ial = IAllocator(self.cfg, self.rpc,
7640
                     mode=constants.IALLOCATOR_MODE_ALLOC,
7641
                     name=self.op.instance_name,
7642
                     disk_template=self.op.disk_template,
7643
                     tags=[],
7644
                     os=self.op.os_type,
7645
                     vcpus=self.be_full[constants.BE_VCPUS],
7646
                     mem_size=self.be_full[constants.BE_MEMORY],
7647
                     disks=self.disks,
7648
                     nics=nics,
7649
                     hypervisor=self.op.hypervisor,
7650
                     )
7651

    
7652
    ial.Run(self.op.iallocator)
7653

    
7654
    if not ial.success:
7655
      raise errors.OpPrereqError("Can't compute nodes using"
7656
                                 " iallocator '%s': %s" %
7657
                                 (self.op.iallocator, ial.info),
7658
                                 errors.ECODE_NORES)
7659
    if len(ial.result) != ial.required_nodes:
7660
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7661
                                 " of nodes (%s), required %s" %
7662
                                 (self.op.iallocator, len(ial.result),
7663
                                  ial.required_nodes), errors.ECODE_FAULT)
7664
    self.op.pnode = ial.result[0]
7665
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7666
                 self.op.instance_name, self.op.iallocator,
7667
                 utils.CommaJoin(ial.result))
7668
    if ial.required_nodes == 2:
7669
      self.op.snode = ial.result[1]
7670

    
7671
  def BuildHooksEnv(self):
7672
    """Build hooks env.
7673

7674
    This runs on master, primary and secondary nodes of the instance.
7675

7676
    """
7677
    env = {
7678
      "ADD_MODE": self.op.mode,
7679
      }
7680
    if self.op.mode == constants.INSTANCE_IMPORT:
7681
      env["SRC_NODE"] = self.op.src_node
7682
      env["SRC_PATH"] = self.op.src_path
7683
      env["SRC_IMAGES"] = self.src_images
7684

    
7685
    env.update(_BuildInstanceHookEnv(
7686
      name=self.op.instance_name,
7687
      primary_node=self.op.pnode,
7688
      secondary_nodes=self.secondaries,
7689
      status=self.op.start,
7690
      os_type=self.op.os_type,
7691
      memory=self.be_full[constants.BE_MEMORY],
7692
      vcpus=self.be_full[constants.BE_VCPUS],
7693
      nics=_NICListToTuple(self, self.nics),
7694
      disk_template=self.op.disk_template,
7695
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
7696
             for d in self.disks],
7697
      bep=self.be_full,
7698
      hvp=self.hv_full,
7699
      hypervisor_name=self.op.hypervisor,
7700
    ))
7701

    
7702
    return env
7703

    
7704
  def BuildHooksNodes(self):
7705
    """Build hooks nodes.
7706

7707
    """
7708
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7709
    return nl, nl
7710

    
7711
  def _ReadExportInfo(self):
7712
    """Reads the export information from disk.
7713

7714
    It will override the opcode source node and path with the actual
7715
    information, if these two were not specified before.
7716

7717
    @return: the export information
7718

7719
    """
7720
    assert self.op.mode == constants.INSTANCE_IMPORT
7721

    
7722
    src_node = self.op.src_node
7723
    src_path = self.op.src_path
7724

    
7725
    if src_node is None:
7726
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7727
      exp_list = self.rpc.call_export_list(locked_nodes)
7728
      found = False
7729
      for node in exp_list:
7730
        if exp_list[node].fail_msg:
7731
          continue
7732
        if src_path in exp_list[node].payload:
7733
          found = True
7734
          self.op.src_node = src_node = node
7735
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7736
                                                       src_path)
7737
          break
7738
      if not found:
7739
        raise errors.OpPrereqError("No export found for relative path %s" %
7740
                                    src_path, errors.ECODE_INVAL)
7741

    
7742
    _CheckNodeOnline(self, src_node)
7743
    result = self.rpc.call_export_info(src_node, src_path)
7744
    result.Raise("No export or invalid export found in dir %s" % src_path)
7745

    
7746
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7747
    if not export_info.has_section(constants.INISECT_EXP):
7748
      raise errors.ProgrammerError("Corrupted export config",
7749
                                   errors.ECODE_ENVIRON)
7750

    
7751
    ei_version = export_info.get(constants.INISECT_EXP, "version")
7752
    if (int(ei_version) != constants.EXPORT_VERSION):
7753
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7754
                                 (ei_version, constants.EXPORT_VERSION),
7755
                                 errors.ECODE_ENVIRON)
7756
    return export_info
7757

    
7758
  def _ReadExportParams(self, einfo):
7759
    """Use export parameters as defaults.
7760

7761
    In case the opcode doesn't specify (as in override) some instance
7762
    parameters, then try to use them from the export information, if
7763
    that declares them.
7764

7765
    """
7766
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7767

    
7768
    if self.op.disk_template is None:
7769
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
7770
        self.op.disk_template = einfo.get(constants.INISECT_INS,
7771
                                          "disk_template")
7772
      else:
7773
        raise errors.OpPrereqError("No disk template specified and the export"
7774
                                   " is missing the disk_template information",
7775
                                   errors.ECODE_INVAL)
7776

    
7777
    if not self.op.disks:
7778
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7779
        disks = []
7780
        # TODO: import the disk iv_name too
7781
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7782
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7783
          disks.append({constants.IDISK_SIZE: disk_sz})
7784
        self.op.disks = disks
7785
      else:
7786
        raise errors.OpPrereqError("No disk info specified and the export"
7787
                                   " is missing the disk information",
7788
                                   errors.ECODE_INVAL)
7789

    
7790
    if (not self.op.nics and
7791
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7792
      nics = []
7793
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7794
        ndict = {}
7795
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7796
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7797
          ndict[name] = v
7798
        nics.append(ndict)
7799
      self.op.nics = nics
7800

    
7801
    if (self.op.hypervisor is None and
7802
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7803
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7804
    if einfo.has_section(constants.INISECT_HYP):
7805
      # use the export parameters but do not override the ones
7806
      # specified by the user
7807
      for name, value in einfo.items(constants.INISECT_HYP):
7808
        if name not in self.op.hvparams:
7809
          self.op.hvparams[name] = value
7810

    
7811
    if einfo.has_section(constants.INISECT_BEP):
7812
      # use the parameters, without overriding
7813
      for name, value in einfo.items(constants.INISECT_BEP):
7814
        if name not in self.op.beparams:
7815
          self.op.beparams[name] = value
7816
    else:
7817
      # try to read the parameters old style, from the main section
7818
      for name in constants.BES_PARAMETERS:
7819
        if (name not in self.op.beparams and
7820
            einfo.has_option(constants.INISECT_INS, name)):
7821
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7822

    
7823
    if einfo.has_section(constants.INISECT_OSP):
7824
      # use the parameters, without overriding
7825
      for name, value in einfo.items(constants.INISECT_OSP):
7826
        if name not in self.op.osparams:
7827
          self.op.osparams[name] = value
7828

    
7829
  def _RevertToDefaults(self, cluster):
7830
    """Revert the instance parameters to the default values.
7831

7832
    """
7833
    # hvparams
7834
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7835
    for name in self.op.hvparams.keys():
7836
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7837
        del self.op.hvparams[name]
7838
    # beparams
7839
    be_defs = cluster.SimpleFillBE({})
7840
    for name in self.op.beparams.keys():
7841
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7842
        del self.op.beparams[name]
7843
    # nic params
7844
    nic_defs = cluster.SimpleFillNIC({})
7845
    for nic in self.op.nics:
7846
      for name in constants.NICS_PARAMETERS:
7847
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7848
          del nic[name]
7849
    # osparams
7850
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7851
    for name in self.op.osparams.keys():
7852
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7853
        del self.op.osparams[name]
7854

    
7855
  def CheckPrereq(self):
7856
    """Check prerequisites.
7857

7858
    """
7859
    if self.op.mode == constants.INSTANCE_IMPORT:
7860
      export_info = self._ReadExportInfo()
7861
      self._ReadExportParams(export_info)
7862

    
7863
    if (not self.cfg.GetVGName() and
7864
        self.op.disk_template not in constants.DTS_NOT_LVM):
7865
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7866
                                 " instances", errors.ECODE_STATE)
7867

    
7868
    if self.op.hypervisor is None:
7869
      self.op.hypervisor = self.cfg.GetHypervisorType()
7870

    
7871
    cluster = self.cfg.GetClusterInfo()
7872
    enabled_hvs = cluster.enabled_hypervisors
7873
    if self.op.hypervisor not in enabled_hvs:
7874
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7875
                                 " cluster (%s)" % (self.op.hypervisor,
7876
                                  ",".join(enabled_hvs)),
7877
                                 errors.ECODE_STATE)
7878

    
7879
    # check hypervisor parameter syntax (locally)
7880
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7881
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7882
                                      self.op.hvparams)
7883
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7884
    hv_type.CheckParameterSyntax(filled_hvp)
7885
    self.hv_full = filled_hvp
7886
    # check that we don't specify global parameters on an instance
7887
    _CheckGlobalHvParams(self.op.hvparams)
7888

    
7889
    # fill and remember the beparams dict
7890
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7891
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7892

    
7893
    # build os parameters
7894
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7895

    
7896
    # now that hvp/bep are in final format, let's reset to defaults,
7897
    # if told to do so
7898
    if self.op.identify_defaults:
7899
      self._RevertToDefaults(cluster)
7900

    
7901
    # NIC buildup
7902
    self.nics = []
7903
    for idx, nic in enumerate(self.op.nics):
7904
      nic_mode_req = nic.get(constants.INIC_MODE, None)
7905
      nic_mode = nic_mode_req
7906
      if nic_mode is None:
7907
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7908

    
7909
      # in routed mode, for the first nic, the default ip is 'auto'
7910
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7911
        default_ip_mode = constants.VALUE_AUTO
7912
      else:
7913
        default_ip_mode = constants.VALUE_NONE
7914

    
7915
      # ip validity checks
7916
      ip = nic.get(constants.INIC_IP, default_ip_mode)
7917
      if ip is None or ip.lower() == constants.VALUE_NONE:
7918
        nic_ip = None
7919
      elif ip.lower() == constants.VALUE_AUTO:
7920
        if not self.op.name_check:
7921
          raise errors.OpPrereqError("IP address set to auto but name checks"
7922
                                     " have been skipped",
7923
                                     errors.ECODE_INVAL)
7924
        nic_ip = self.hostname1.ip
7925
      else:
7926
        if not netutils.IPAddress.IsValid(ip):
7927
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7928
                                     errors.ECODE_INVAL)
7929
        nic_ip = ip
7930

    
7931
      # TODO: check the ip address for uniqueness
7932
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7933
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7934
                                   errors.ECODE_INVAL)
7935

    
7936
      # MAC address verification
7937
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
7938
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7939
        mac = utils.NormalizeAndValidateMac(mac)
7940

    
7941
        try:
7942
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7943
        except errors.ReservationError:
7944
          raise errors.OpPrereqError("MAC address %s already in use"
7945
                                     " in cluster" % mac,
7946
                                     errors.ECODE_NOTUNIQUE)
7947

    
7948
      #  Build nic parameters
7949
      link = nic.get(constants.INIC_LINK, None)
7950
      nicparams = {}
7951
      if nic_mode_req:
7952
        nicparams[constants.NIC_MODE] = nic_mode_req
7953
      if link:
7954
        nicparams[constants.NIC_LINK] = link
7955

    
7956
      check_params = cluster.SimpleFillNIC(nicparams)
7957
      objects.NIC.CheckParameterSyntax(check_params)
7958
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7959

    
7960
    # disk checks/pre-build
7961
    default_vg = self.cfg.GetVGName()
7962
    self.disks = []
7963
    for disk in self.op.disks:
7964
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
7965
      if mode not in constants.DISK_ACCESS_SET:
7966
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7967
                                   mode, errors.ECODE_INVAL)
7968
      size = disk.get(constants.IDISK_SIZE, None)
7969
      if size is None:
7970
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7971
      try:
7972
        size = int(size)
7973
      except (TypeError, ValueError):
7974
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7975
                                   errors.ECODE_INVAL)
7976
      new_disk = {
7977
        constants.IDISK_SIZE: size,
7978
        constants.IDISK_MODE: mode,
7979
        constants.IDISK_VG: disk.get(constants.IDISK_VG, default_vg),
7980
        }
7981
      if constants.IDISK_ADOPT in disk:
7982
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
7983
      self.disks.append(new_disk)
7984

    
7985
    if self.op.mode == constants.INSTANCE_IMPORT:
7986

    
7987
      # Check that the new instance doesn't have less disks than the export
7988
      instance_disks = len(self.disks)
7989
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7990
      if instance_disks < export_disks:
7991
        raise errors.OpPrereqError("Not enough disks to import."
7992
                                   " (instance: %d, export: %d)" %
7993
                                   (instance_disks, export_disks),
7994
                                   errors.ECODE_INVAL)
7995

    
7996
      disk_images = []
7997
      for idx in range(export_disks):
7998
        option = 'disk%d_dump' % idx
7999
        if export_info.has_option(constants.INISECT_INS, option):
8000
          # FIXME: are the old os-es, disk sizes, etc. useful?
8001
          export_name = export_info.get(constants.INISECT_INS, option)
8002
          image = utils.PathJoin(self.op.src_path, export_name)
8003
          disk_images.append(image)
8004
        else:
8005
          disk_images.append(False)
8006

    
8007
      self.src_images = disk_images
8008

    
8009
      old_name = export_info.get(constants.INISECT_INS, 'name')
8010
      try:
8011
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8012
      except (TypeError, ValueError), err:
8013
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8014
                                   " an integer: %s" % str(err),
8015
                                   errors.ECODE_STATE)
8016
      if self.op.instance_name == old_name:
8017
        for idx, nic in enumerate(self.nics):
8018
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8019
            nic_mac_ini = 'nic%d_mac' % idx
8020
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8021

    
8022
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8023

    
8024
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8025
    if self.op.ip_check:
8026
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8027
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8028
                                   (self.check_ip, self.op.instance_name),
8029
                                   errors.ECODE_NOTUNIQUE)
8030

    
8031
    #### mac address generation
8032
    # By generating here the mac address both the allocator and the hooks get
8033
    # the real final mac address rather than the 'auto' or 'generate' value.
8034
    # There is a race condition between the generation and the instance object
8035
    # creation, which means that we know the mac is valid now, but we're not
8036
    # sure it will be when we actually add the instance. If things go bad
8037
    # adding the instance will abort because of a duplicate mac, and the
8038
    # creation job will fail.
8039
    for nic in self.nics:
8040
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8041
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8042

    
8043
    #### allocator run
8044

    
8045
    if self.op.iallocator is not None:
8046
      self._RunAllocator()
8047

    
8048
    #### node related checks
8049

    
8050
    # check primary node
8051
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8052
    assert self.pnode is not None, \
8053
      "Cannot retrieve locked node %s" % self.op.pnode
8054
    if pnode.offline:
8055
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8056
                                 pnode.name, errors.ECODE_STATE)
8057
    if pnode.drained:
8058
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8059
                                 pnode.name, errors.ECODE_STATE)
8060
    if not pnode.vm_capable:
8061
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8062
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8063

    
8064
    self.secondaries = []
8065

    
8066
    # mirror node verification
8067
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8068
      if self.op.snode == pnode.name:
8069
        raise errors.OpPrereqError("The secondary node cannot be the"
8070
                                   " primary node.", errors.ECODE_INVAL)
8071
      _CheckNodeOnline(self, self.op.snode)
8072
      _CheckNodeNotDrained(self, self.op.snode)
8073
      _CheckNodeVmCapable(self, self.op.snode)
8074
      self.secondaries.append(self.op.snode)
8075

    
8076
    nodenames = [pnode.name] + self.secondaries
8077

    
8078
    if not self.adopt_disks:
8079
      # Check lv size requirements, if not adopting
8080
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8081
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8082

    
8083
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8084
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8085
                                disk[constants.IDISK_ADOPT])
8086
                     for disk in self.disks])
8087
      if len(all_lvs) != len(self.disks):
8088
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8089
                                   errors.ECODE_INVAL)
8090
      for lv_name in all_lvs:
8091
        try:
8092
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8093
          # to ReserveLV uses the same syntax
8094
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8095
        except errors.ReservationError:
8096
          raise errors.OpPrereqError("LV named %s used by another instance" %
8097
                                     lv_name, errors.ECODE_NOTUNIQUE)
8098

    
8099
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8100
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8101

    
8102
      node_lvs = self.rpc.call_lv_list([pnode.name],
8103
                                       vg_names.payload.keys())[pnode.name]
8104
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8105
      node_lvs = node_lvs.payload
8106

    
8107
      delta = all_lvs.difference(node_lvs.keys())
8108
      if delta:
8109
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8110
                                   utils.CommaJoin(delta),
8111
                                   errors.ECODE_INVAL)
8112
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8113
      if online_lvs:
8114
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8115
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8116
                                   errors.ECODE_STATE)
8117
      # update the size of disk based on what is found
8118
      for dsk in self.disks:
8119
        dsk[constants.IDISK_SIZE] = \
8120
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8121
                                        dsk[constants.IDISK_ADOPT])][0]))
8122

    
8123
    elif self.op.disk_template == constants.DT_BLOCK:
8124
      # Normalize and de-duplicate device paths
8125
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8126
                       for disk in self.disks])
8127
      if len(all_disks) != len(self.disks):
8128
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8129
                                   errors.ECODE_INVAL)
8130
      baddisks = [d for d in all_disks
8131
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8132
      if baddisks:
8133
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8134
                                   " cannot be adopted" %
8135
                                   (", ".join(baddisks),
8136
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8137
                                   errors.ECODE_INVAL)
8138

    
8139
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8140
                                            list(all_disks))[pnode.name]
8141
      node_disks.Raise("Cannot get block device information from node %s" %
8142
                       pnode.name)
8143
      node_disks = node_disks.payload
8144
      delta = all_disks.difference(node_disks.keys())
8145
      if delta:
8146
        raise errors.OpPrereqError("Missing block device(s): %s" %
8147
                                   utils.CommaJoin(delta),
8148
                                   errors.ECODE_INVAL)
8149
      for dsk in self.disks:
8150
        dsk[constants.IDISK_SIZE] = \
8151
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8152

    
8153
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8154

    
8155
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8156
    # check OS parameters (remotely)
8157
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8158

    
8159
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8160

    
8161
    # memory check on primary node
8162
    if self.op.start:
8163
      _CheckNodeFreeMemory(self, self.pnode.name,
8164
                           "creating instance %s" % self.op.instance_name,
8165
                           self.be_full[constants.BE_MEMORY],
8166
                           self.op.hypervisor)
8167

    
8168
    self.dry_run_result = list(nodenames)
8169

    
8170
  def Exec(self, feedback_fn):
8171
    """Create and add the instance to the cluster.
8172

8173
    """
8174
    instance = self.op.instance_name
8175
    pnode_name = self.pnode.name
8176

    
8177
    ht_kind = self.op.hypervisor
8178
    if ht_kind in constants.HTS_REQ_PORT:
8179
      network_port = self.cfg.AllocatePort()
8180
    else:
8181
      network_port = None
8182

    
8183
    if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8184
      # this is needed because os.path.join does not accept None arguments
8185
      if self.op.file_storage_dir is None:
8186
        string_file_storage_dir = ""
8187
      else:
8188
        string_file_storage_dir = self.op.file_storage_dir
8189

    
8190
      # build the full file storage dir path
8191
      if self.op.disk_template == constants.DT_SHARED_FILE:
8192
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8193
      else:
8194
        get_fsd_fn = self.cfg.GetFileStorageDir
8195

    
8196
      file_storage_dir = utils.PathJoin(get_fsd_fn(),
8197
                                        string_file_storage_dir, instance)
8198
    else:
8199
      file_storage_dir = ""
8200

    
8201
    disks = _GenerateDiskTemplate(self,
8202
                                  self.op.disk_template,
8203
                                  instance, pnode_name,
8204
                                  self.secondaries,
8205
                                  self.disks,
8206
                                  file_storage_dir,
8207
                                  self.op.file_driver,
8208
                                  0,
8209
                                  feedback_fn)
8210

    
8211
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8212
                            primary_node=pnode_name,
8213
                            nics=self.nics, disks=disks,
8214
                            disk_template=self.op.disk_template,
8215
                            admin_up=False,
8216
                            network_port=network_port,
8217
                            beparams=self.op.beparams,
8218
                            hvparams=self.op.hvparams,
8219
                            hypervisor=self.op.hypervisor,
8220
                            osparams=self.op.osparams,
8221
                            )
8222

    
8223
    if self.adopt_disks:
8224
      if self.op.disk_template == constants.DT_PLAIN:
8225
        # rename LVs to the newly-generated names; we need to construct
8226
        # 'fake' LV disks with the old data, plus the new unique_id
8227
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8228
        rename_to = []
8229
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8230
          rename_to.append(t_dsk.logical_id)
8231
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8232
          self.cfg.SetDiskID(t_dsk, pnode_name)
8233
        result = self.rpc.call_blockdev_rename(pnode_name,
8234
                                               zip(tmp_disks, rename_to))
8235
        result.Raise("Failed to rename adoped LVs")
8236
    else:
8237
      feedback_fn("* creating instance disks...")
8238
      try:
8239
        _CreateDisks(self, iobj)
8240
      except errors.OpExecError:
8241
        self.LogWarning("Device creation failed, reverting...")
8242
        try:
8243
          _RemoveDisks(self, iobj)
8244
        finally:
8245
          self.cfg.ReleaseDRBDMinors(instance)
8246
          raise
8247

    
8248
      if self.cfg.GetClusterInfo().prealloc_wipe_disks:
8249
        feedback_fn("* wiping instance disks...")
8250
        try:
8251
          _WipeDisks(self, iobj)
8252
        except errors.OpExecError:
8253
          self.LogWarning("Device wiping failed, reverting...")
8254
          try:
8255
            _RemoveDisks(self, iobj)
8256
          finally:
8257
            self.cfg.ReleaseDRBDMinors(instance)
8258
            raise
8259

    
8260
    feedback_fn("adding instance %s to cluster config" % instance)
8261

    
8262
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8263

    
8264
    # Declare that we don't want to remove the instance lock anymore, as we've
8265
    # added the instance to the config
8266
    del self.remove_locks[locking.LEVEL_INSTANCE]
8267
    # Unlock all the nodes
8268
    if self.op.mode == constants.INSTANCE_IMPORT:
8269
      nodes_keep = [self.op.src_node]
8270
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
8271
                       if node != self.op.src_node]
8272
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
8273
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
8274
    else:
8275
      self.context.glm.release(locking.LEVEL_NODE)
8276
      del self.acquired_locks[locking.LEVEL_NODE]
8277

    
8278
    if self.op.wait_for_sync:
8279
      disk_abort = not _WaitForSync(self, iobj)
8280
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8281
      # make sure the disks are not degraded (still sync-ing is ok)
8282
      time.sleep(15)
8283
      feedback_fn("* checking mirrors status")
8284
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8285
    else:
8286
      disk_abort = False
8287

    
8288
    if disk_abort:
8289
      _RemoveDisks(self, iobj)
8290
      self.cfg.RemoveInstance(iobj.name)
8291
      # Make sure the instance lock gets removed
8292
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8293
      raise errors.OpExecError("There are some degraded disks for"
8294
                               " this instance")
8295

    
8296
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8297
      if self.op.mode == constants.INSTANCE_CREATE:
8298
        if not self.op.no_install:
8299
          feedback_fn("* running the instance OS create scripts...")
8300
          # FIXME: pass debug option from opcode to backend
8301
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8302
                                                 self.op.debug_level)
8303
          result.Raise("Could not add os for instance %s"
8304
                       " on node %s" % (instance, pnode_name))
8305

    
8306
      elif self.op.mode == constants.INSTANCE_IMPORT:
8307
        feedback_fn("* running the instance OS import scripts...")
8308

    
8309
        transfers = []
8310

    
8311
        for idx, image in enumerate(self.src_images):
8312
          if not image:
8313
            continue
8314

    
8315
          # FIXME: pass debug option from opcode to backend
8316
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8317
                                             constants.IEIO_FILE, (image, ),
8318
                                             constants.IEIO_SCRIPT,
8319
                                             (iobj.disks[idx], idx),
8320
                                             None)
8321
          transfers.append(dt)
8322

    
8323
        import_result = \
8324
          masterd.instance.TransferInstanceData(self, feedback_fn,
8325
                                                self.op.src_node, pnode_name,
8326
                                                self.pnode.secondary_ip,
8327
                                                iobj, transfers)
8328
        if not compat.all(import_result):
8329
          self.LogWarning("Some disks for instance %s on node %s were not"
8330
                          " imported successfully" % (instance, pnode_name))
8331

    
8332
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8333
        feedback_fn("* preparing remote import...")
8334
        # The source cluster will stop the instance before attempting to make a
8335
        # connection. In some cases stopping an instance can take a long time,
8336
        # hence the shutdown timeout is added to the connection timeout.
8337
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8338
                           self.op.source_shutdown_timeout)
8339
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8340

    
8341
        assert iobj.primary_node == self.pnode.name
8342
        disk_results = \
8343
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8344
                                        self.source_x509_ca,
8345
                                        self._cds, timeouts)
8346
        if not compat.all(disk_results):
8347
          # TODO: Should the instance still be started, even if some disks
8348
          # failed to import (valid for local imports, too)?
8349
          self.LogWarning("Some disks for instance %s on node %s were not"
8350
                          " imported successfully" % (instance, pnode_name))
8351

    
8352
        # Run rename script on newly imported instance
8353
        assert iobj.name == instance
8354
        feedback_fn("Running rename script for %s" % instance)
8355
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8356
                                                   self.source_instance_name,
8357
                                                   self.op.debug_level)
8358
        if result.fail_msg:
8359
          self.LogWarning("Failed to run rename script for %s on node"
8360
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
8361

    
8362
      else:
8363
        # also checked in the prereq part
8364
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8365
                                     % self.op.mode)
8366

    
8367
    if self.op.start:
8368
      iobj.admin_up = True
8369
      self.cfg.Update(iobj, feedback_fn)
8370
      logging.info("Starting instance %s on node %s", instance, pnode_name)
8371
      feedback_fn("* starting instance...")
8372
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8373
      result.Raise("Could not start instance")
8374

    
8375
    return list(iobj.all_nodes)
8376

    
8377

    
8378
class LUInstanceConsole(NoHooksLU):
8379
  """Connect to an instance's console.
8380

8381
  This is somewhat special in that it returns the command line that
8382
  you need to run on the master node in order to connect to the
8383
  console.
8384

8385
  """
8386
  REQ_BGL = False
8387

    
8388
  def ExpandNames(self):
8389
    self._ExpandAndLockInstance()
8390

    
8391
  def CheckPrereq(self):
8392
    """Check prerequisites.
8393

8394
    This checks that the instance is in the cluster.
8395

8396
    """
8397
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8398
    assert self.instance is not None, \
8399
      "Cannot retrieve locked instance %s" % self.op.instance_name
8400
    _CheckNodeOnline(self, self.instance.primary_node)
8401

    
8402
  def Exec(self, feedback_fn):
8403
    """Connect to the console of an instance
8404

8405
    """
8406
    instance = self.instance
8407
    node = instance.primary_node
8408

    
8409
    node_insts = self.rpc.call_instance_list([node],
8410
                                             [instance.hypervisor])[node]
8411
    node_insts.Raise("Can't get node information from %s" % node)
8412

    
8413
    if instance.name not in node_insts.payload:
8414
      if instance.admin_up:
8415
        state = constants.INSTST_ERRORDOWN
8416
      else:
8417
        state = constants.INSTST_ADMINDOWN
8418
      raise errors.OpExecError("Instance %s is not running (state %s)" %
8419
                               (instance.name, state))
8420

    
8421
    logging.debug("Connecting to console of %s on %s", instance.name, node)
8422

    
8423
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8424

    
8425

    
8426
def _GetInstanceConsole(cluster, instance):
8427
  """Returns console information for an instance.
8428

8429
  @type cluster: L{objects.Cluster}
8430
  @type instance: L{objects.Instance}
8431
  @rtype: dict
8432

8433
  """
8434
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
8435
  # beparams and hvparams are passed separately, to avoid editing the
8436
  # instance and then saving the defaults in the instance itself.
8437
  hvparams = cluster.FillHV(instance)
8438
  beparams = cluster.FillBE(instance)
8439
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8440

    
8441
  assert console.instance == instance.name
8442
  assert console.Validate()
8443

    
8444
  return console.ToDict()
8445

    
8446

    
8447
class LUInstanceReplaceDisks(LogicalUnit):
8448
  """Replace the disks of an instance.
8449

8450
  """
8451
  HPATH = "mirrors-replace"
8452
  HTYPE = constants.HTYPE_INSTANCE
8453
  REQ_BGL = False
8454

    
8455
  def CheckArguments(self):
8456
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8457
                                  self.op.iallocator)
8458

    
8459
  def ExpandNames(self):
8460
    self._ExpandAndLockInstance()
8461

    
8462
    if self.op.iallocator is not None:
8463
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8464

    
8465
    elif self.op.remote_node is not None:
8466
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8467
      self.op.remote_node = remote_node
8468

    
8469
      # Warning: do not remove the locking of the new secondary here
8470
      # unless DRBD8.AddChildren is changed to work in parallel;
8471
      # currently it doesn't since parallel invocations of
8472
      # FindUnusedMinor will conflict
8473
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8474
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8475

    
8476
    else:
8477
      self.needed_locks[locking.LEVEL_NODE] = []
8478
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8479

    
8480
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8481
                                   self.op.iallocator, self.op.remote_node,
8482
                                   self.op.disks, False, self.op.early_release)
8483

    
8484
    self.tasklets = [self.replacer]
8485

    
8486
  def DeclareLocks(self, level):
8487
    # If we're not already locking all nodes in the set we have to declare the
8488
    # instance's primary/secondary nodes.
8489
    if (level == locking.LEVEL_NODE and
8490
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8491
      self._LockInstancesNodes()
8492

    
8493
  def BuildHooksEnv(self):
8494
    """Build hooks env.
8495

8496
    This runs on the master, the primary and all the secondaries.
8497

8498
    """
8499
    instance = self.replacer.instance
8500
    env = {
8501
      "MODE": self.op.mode,
8502
      "NEW_SECONDARY": self.op.remote_node,
8503
      "OLD_SECONDARY": instance.secondary_nodes[0],
8504
      }
8505
    env.update(_BuildInstanceHookEnvByObject(self, instance))
8506
    return env
8507

    
8508
  def BuildHooksNodes(self):
8509
    """Build hooks nodes.
8510

8511
    """
8512
    instance = self.replacer.instance
8513
    nl = [
8514
      self.cfg.GetMasterNode(),
8515
      instance.primary_node,
8516
      ]
8517
    if self.op.remote_node is not None:
8518
      nl.append(self.op.remote_node)
8519
    return nl, nl
8520

    
8521

    
8522
class TLReplaceDisks(Tasklet):
8523
  """Replaces disks for an instance.
8524

8525
  Note: Locking is not within the scope of this class.
8526

8527
  """
8528
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8529
               disks, delay_iallocator, early_release):
8530
    """Initializes this class.
8531

8532
    """
8533
    Tasklet.__init__(self, lu)
8534

    
8535
    # Parameters
8536
    self.instance_name = instance_name
8537
    self.mode = mode
8538
    self.iallocator_name = iallocator_name
8539
    self.remote_node = remote_node
8540
    self.disks = disks
8541
    self.delay_iallocator = delay_iallocator
8542
    self.early_release = early_release
8543

    
8544
    # Runtime data
8545
    self.instance = None
8546
    self.new_node = None
8547
    self.target_node = None
8548
    self.other_node = None
8549
    self.remote_node_info = None
8550
    self.node_secondary_ip = None
8551

    
8552
  @staticmethod
8553
  def CheckArguments(mode, remote_node, iallocator):
8554
    """Helper function for users of this class.
8555

8556
    """
8557
    # check for valid parameter combination
8558
    if mode == constants.REPLACE_DISK_CHG:
8559
      if remote_node is None and iallocator is None:
8560
        raise errors.OpPrereqError("When changing the secondary either an"
8561
                                   " iallocator script must be used or the"
8562
                                   " new node given", errors.ECODE_INVAL)
8563

    
8564
      if remote_node is not None and iallocator is not None:
8565
        raise errors.OpPrereqError("Give either the iallocator or the new"
8566
                                   " secondary, not both", errors.ECODE_INVAL)
8567

    
8568
    elif remote_node is not None or iallocator is not None:
8569
      # Not replacing the secondary
8570
      raise errors.OpPrereqError("The iallocator and new node options can"
8571
                                 " only be used when changing the"
8572
                                 " secondary node", errors.ECODE_INVAL)
8573

    
8574
  @staticmethod
8575
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8576
    """Compute a new secondary node using an IAllocator.
8577

8578
    """
8579
    ial = IAllocator(lu.cfg, lu.rpc,
8580
                     mode=constants.IALLOCATOR_MODE_RELOC,
8581
                     name=instance_name,
8582
                     relocate_from=relocate_from)
8583

    
8584
    ial.Run(iallocator_name)
8585

    
8586
    if not ial.success:
8587
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8588
                                 " %s" % (iallocator_name, ial.info),
8589
                                 errors.ECODE_NORES)
8590

    
8591
    if len(ial.result) != ial.required_nodes:
8592
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8593
                                 " of nodes (%s), required %s" %
8594
                                 (iallocator_name,
8595
                                  len(ial.result), ial.required_nodes),
8596
                                 errors.ECODE_FAULT)
8597

    
8598
    remote_node_name = ial.result[0]
8599

    
8600
    lu.LogInfo("Selected new secondary for instance '%s': %s",
8601
               instance_name, remote_node_name)
8602

    
8603
    return remote_node_name
8604

    
8605
  def _FindFaultyDisks(self, node_name):
8606
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8607
                                    node_name, True)
8608

    
8609
  def _CheckDisksActivated(self, instance):
8610
    """Checks if the instance disks are activated.
8611

8612
    @param instance: The instance to check disks
8613
    @return: True if they are activated, False otherwise
8614

8615
    """
8616
    nodes = instance.all_nodes
8617

    
8618
    for idx, dev in enumerate(instance.disks):
8619
      for node in nodes:
8620
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8621
        self.cfg.SetDiskID(dev, node)
8622

    
8623
        result = self.rpc.call_blockdev_find(node, dev)
8624

    
8625
        if result.offline:
8626
          continue
8627
        elif result.fail_msg or not result.payload:
8628
          return False
8629

    
8630
    return True
8631

    
8632

    
8633
  def CheckPrereq(self):
8634
    """Check prerequisites.
8635

8636
    This checks that the instance is in the cluster.
8637

8638
    """
8639
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8640
    assert instance is not None, \
8641
      "Cannot retrieve locked instance %s" % self.instance_name
8642

    
8643
    if instance.disk_template != constants.DT_DRBD8:
8644
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8645
                                 " instances", errors.ECODE_INVAL)
8646

    
8647
    if len(instance.secondary_nodes) != 1:
8648
      raise errors.OpPrereqError("The instance has a strange layout,"
8649
                                 " expected one secondary but found %d" %
8650
                                 len(instance.secondary_nodes),
8651
                                 errors.ECODE_FAULT)
8652

    
8653
    if not self.delay_iallocator:
8654
      self._CheckPrereq2()
8655

    
8656
  def _CheckPrereq2(self):
8657
    """Check prerequisites, second part.
8658

8659
    This function should always be part of CheckPrereq. It was separated and is
8660
    now called from Exec because during node evacuation iallocator was only
8661
    called with an unmodified cluster model, not taking planned changes into
8662
    account.
8663

8664
    """
8665
    instance = self.instance
8666
    secondary_node = instance.secondary_nodes[0]
8667

    
8668
    if self.iallocator_name is None:
8669
      remote_node = self.remote_node
8670
    else:
8671
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8672
                                       instance.name, instance.secondary_nodes)
8673

    
8674
    if remote_node is not None:
8675
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8676
      assert self.remote_node_info is not None, \
8677
        "Cannot retrieve locked node %s" % remote_node
8678
    else:
8679
      self.remote_node_info = None
8680

    
8681
    if remote_node == self.instance.primary_node:
8682
      raise errors.OpPrereqError("The specified node is the primary node of"
8683
                                 " the instance.", errors.ECODE_INVAL)
8684

    
8685
    if remote_node == secondary_node:
8686
      raise errors.OpPrereqError("The specified node is already the"
8687
                                 " secondary node of the instance.",
8688
                                 errors.ECODE_INVAL)
8689

    
8690
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8691
                                    constants.REPLACE_DISK_CHG):
8692
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
8693
                                 errors.ECODE_INVAL)
8694

    
8695
    if self.mode == constants.REPLACE_DISK_AUTO:
8696
      if not self._CheckDisksActivated(instance):
8697
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
8698
                                   " first" % self.instance_name,
8699
                                   errors.ECODE_STATE)
8700
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
8701
      faulty_secondary = self._FindFaultyDisks(secondary_node)
8702

    
8703
      if faulty_primary and faulty_secondary:
8704
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8705
                                   " one node and can not be repaired"
8706
                                   " automatically" % self.instance_name,
8707
                                   errors.ECODE_STATE)
8708

    
8709
      if faulty_primary:
8710
        self.disks = faulty_primary
8711
        self.target_node = instance.primary_node
8712
        self.other_node = secondary_node
8713
        check_nodes = [self.target_node, self.other_node]
8714
      elif faulty_secondary:
8715
        self.disks = faulty_secondary
8716
        self.target_node = secondary_node
8717
        self.other_node = instance.primary_node
8718
        check_nodes = [self.target_node, self.other_node]
8719
      else:
8720
        self.disks = []
8721
        check_nodes = []
8722

    
8723
    else:
8724
      # Non-automatic modes
8725
      if self.mode == constants.REPLACE_DISK_PRI:
8726
        self.target_node = instance.primary_node
8727
        self.other_node = secondary_node
8728
        check_nodes = [self.target_node, self.other_node]
8729

    
8730
      elif self.mode == constants.REPLACE_DISK_SEC:
8731
        self.target_node = secondary_node
8732
        self.other_node = instance.primary_node
8733
        check_nodes = [self.target_node, self.other_node]
8734

    
8735
      elif self.mode == constants.REPLACE_DISK_CHG:
8736
        self.new_node = remote_node
8737
        self.other_node = instance.primary_node
8738
        self.target_node = secondary_node
8739
        check_nodes = [self.new_node, self.other_node]
8740

    
8741
        _CheckNodeNotDrained(self.lu, remote_node)
8742
        _CheckNodeVmCapable(self.lu, remote_node)
8743

    
8744
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
8745
        assert old_node_info is not None
8746
        if old_node_info.offline and not self.early_release:
8747
          # doesn't make sense to delay the release
8748
          self.early_release = True
8749
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8750
                          " early-release mode", secondary_node)
8751

    
8752
      else:
8753
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8754
                                     self.mode)
8755

    
8756
      # If not specified all disks should be replaced
8757
      if not self.disks:
8758
        self.disks = range(len(self.instance.disks))
8759

    
8760
    for node in check_nodes:
8761
      _CheckNodeOnline(self.lu, node)
8762

    
8763
    # Check whether disks are valid
8764
    for disk_idx in self.disks:
8765
      instance.FindDisk(disk_idx)
8766

    
8767
    # Get secondary node IP addresses
8768
    node_2nd_ip = {}
8769

    
8770
    for node_name in [self.target_node, self.other_node, self.new_node]:
8771
      if node_name is not None:
8772
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8773

    
8774
    self.node_secondary_ip = node_2nd_ip
8775

    
8776
  def Exec(self, feedback_fn):
8777
    """Execute disk replacement.
8778

8779
    This dispatches the disk replacement to the appropriate handler.
8780

8781
    """
8782
    if self.delay_iallocator:
8783
      self._CheckPrereq2()
8784

    
8785
    if not self.disks:
8786
      feedback_fn("No disks need replacement")
8787
      return
8788

    
8789
    feedback_fn("Replacing disk(s) %s for %s" %
8790
                (utils.CommaJoin(self.disks), self.instance.name))
8791

    
8792
    activate_disks = (not self.instance.admin_up)
8793

    
8794
    # Activate the instance disks if we're replacing them on a down instance
8795
    if activate_disks:
8796
      _StartInstanceDisks(self.lu, self.instance, True)
8797

    
8798
    try:
8799
      # Should we replace the secondary node?
8800
      if self.new_node is not None:
8801
        fn = self._ExecDrbd8Secondary
8802
      else:
8803
        fn = self._ExecDrbd8DiskOnly
8804

    
8805
      return fn(feedback_fn)
8806

    
8807
    finally:
8808
      # Deactivate the instance disks if we're replacing them on a
8809
      # down instance
8810
      if activate_disks:
8811
        _SafeShutdownInstanceDisks(self.lu, self.instance)
8812

    
8813
  def _CheckVolumeGroup(self, nodes):
8814
    self.lu.LogInfo("Checking volume groups")
8815

    
8816
    vgname = self.cfg.GetVGName()
8817

    
8818
    # Make sure volume group exists on all involved nodes
8819
    results = self.rpc.call_vg_list(nodes)
8820
    if not results:
8821
      raise errors.OpExecError("Can't list volume groups on the nodes")
8822

    
8823
    for node in nodes:
8824
      res = results[node]
8825
      res.Raise("Error checking node %s" % node)
8826
      if vgname not in res.payload:
8827
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
8828
                                 (vgname, node))
8829

    
8830
  def _CheckDisksExistence(self, nodes):
8831
    # Check disk existence
8832
    for idx, dev in enumerate(self.instance.disks):
8833
      if idx not in self.disks:
8834
        continue
8835

    
8836
      for node in nodes:
8837
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8838
        self.cfg.SetDiskID(dev, node)
8839

    
8840
        result = self.rpc.call_blockdev_find(node, dev)
8841

    
8842
        msg = result.fail_msg
8843
        if msg or not result.payload:
8844
          if not msg:
8845
            msg = "disk not found"
8846
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8847
                                   (idx, node, msg))
8848

    
8849
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8850
    for idx, dev in enumerate(self.instance.disks):
8851
      if idx not in self.disks:
8852
        continue
8853

    
8854
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8855
                      (idx, node_name))
8856

    
8857
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8858
                                   ldisk=ldisk):
8859
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8860
                                 " replace disks for instance %s" %
8861
                                 (node_name, self.instance.name))
8862

    
8863
  def _CreateNewStorage(self, node_name):
8864
    vgname = self.cfg.GetVGName()
8865
    iv_names = {}
8866

    
8867
    for idx, dev in enumerate(self.instance.disks):
8868
      if idx not in self.disks:
8869
        continue
8870

    
8871
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8872

    
8873
      self.cfg.SetDiskID(dev, node_name)
8874

    
8875
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8876
      names = _GenerateUniqueNames(self.lu, lv_names)
8877

    
8878
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8879
                             logical_id=(vgname, names[0]))
8880
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8881
                             logical_id=(vgname, names[1]))
8882

    
8883
      new_lvs = [lv_data, lv_meta]
8884
      old_lvs = dev.children
8885
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8886

    
8887
      # we pass force_create=True to force the LVM creation
8888
      for new_lv in new_lvs:
8889
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8890
                        _GetInstanceInfoText(self.instance), False)
8891

    
8892
    return iv_names
8893

    
8894
  def _CheckDevices(self, node_name, iv_names):
8895
    for name, (dev, _, _) in iv_names.iteritems():
8896
      self.cfg.SetDiskID(dev, node_name)
8897

    
8898
      result = self.rpc.call_blockdev_find(node_name, dev)
8899

    
8900
      msg = result.fail_msg
8901
      if msg or not result.payload:
8902
        if not msg:
8903
          msg = "disk not found"
8904
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
8905
                                 (name, msg))
8906

    
8907
      if result.payload.is_degraded:
8908
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
8909

    
8910
  def _RemoveOldStorage(self, node_name, iv_names):
8911
    for name, (_, old_lvs, _) in iv_names.iteritems():
8912
      self.lu.LogInfo("Remove logical volumes for %s" % name)
8913

    
8914
      for lv in old_lvs:
8915
        self.cfg.SetDiskID(lv, node_name)
8916

    
8917
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8918
        if msg:
8919
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
8920
                             hint="remove unused LVs manually")
8921

    
8922
  def _ReleaseNodeLock(self, node_name):
8923
    """Releases the lock for a given node."""
8924
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8925

    
8926
  def _ExecDrbd8DiskOnly(self, feedback_fn):
8927
    """Replace a disk on the primary or secondary for DRBD 8.
8928

8929
    The algorithm for replace is quite complicated:
8930

8931
      1. for each disk to be replaced:
8932

8933
        1. create new LVs on the target node with unique names
8934
        1. detach old LVs from the drbd device
8935
        1. rename old LVs to name_replaced.<time_t>
8936
        1. rename new LVs to old LVs
8937
        1. attach the new LVs (with the old names now) to the drbd device
8938

8939
      1. wait for sync across all devices
8940

8941
      1. for each modified disk:
8942

8943
        1. remove old LVs (which have the name name_replaces.<time_t>)
8944

8945
    Failures are not very well handled.
8946

8947
    """
8948
    steps_total = 6
8949

    
8950
    # Step: check device activation
8951
    self.lu.LogStep(1, steps_total, "Check device existence")
8952
    self._CheckDisksExistence([self.other_node, self.target_node])
8953
    self._CheckVolumeGroup([self.target_node, self.other_node])
8954

    
8955
    # Step: check other node consistency
8956
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8957
    self._CheckDisksConsistency(self.other_node,
8958
                                self.other_node == self.instance.primary_node,
8959
                                False)
8960

    
8961
    # Step: create new storage
8962
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8963
    iv_names = self._CreateNewStorage(self.target_node)
8964

    
8965
    # Step: for each lv, detach+rename*2+attach
8966
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8967
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8968
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8969

    
8970
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8971
                                                     old_lvs)
8972
      result.Raise("Can't detach drbd from local storage on node"
8973
                   " %s for device %s" % (self.target_node, dev.iv_name))
8974
      #dev.children = []
8975
      #cfg.Update(instance)
8976

    
8977
      # ok, we created the new LVs, so now we know we have the needed
8978
      # storage; as such, we proceed on the target node to rename
8979
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8980
      # using the assumption that logical_id == physical_id (which in
8981
      # turn is the unique_id on that node)
8982

    
8983
      # FIXME(iustin): use a better name for the replaced LVs
8984
      temp_suffix = int(time.time())
8985
      ren_fn = lambda d, suff: (d.physical_id[0],
8986
                                d.physical_id[1] + "_replaced-%s" % suff)
8987

    
8988
      # Build the rename list based on what LVs exist on the node
8989
      rename_old_to_new = []
8990
      for to_ren in old_lvs:
8991
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8992
        if not result.fail_msg and result.payload:
8993
          # device exists
8994
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8995

    
8996
      self.lu.LogInfo("Renaming the old LVs on the target node")
8997
      result = self.rpc.call_blockdev_rename(self.target_node,
8998
                                             rename_old_to_new)
8999
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9000

    
9001
      # Now we rename the new LVs to the old LVs
9002
      self.lu.LogInfo("Renaming the new LVs on the target node")
9003
      rename_new_to_old = [(new, old.physical_id)
9004
                           for old, new in zip(old_lvs, new_lvs)]
9005
      result = self.rpc.call_blockdev_rename(self.target_node,
9006
                                             rename_new_to_old)
9007
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9008

    
9009
      for old, new in zip(old_lvs, new_lvs):
9010
        new.logical_id = old.logical_id
9011
        self.cfg.SetDiskID(new, self.target_node)
9012

    
9013
      for disk in old_lvs:
9014
        disk.logical_id = ren_fn(disk, temp_suffix)
9015
        self.cfg.SetDiskID(disk, self.target_node)
9016

    
9017
      # Now that the new lvs have the old name, we can add them to the device
9018
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9019
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9020
                                                  new_lvs)
9021
      msg = result.fail_msg
9022
      if msg:
9023
        for new_lv in new_lvs:
9024
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9025
                                               new_lv).fail_msg
9026
          if msg2:
9027
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9028
                               hint=("cleanup manually the unused logical"
9029
                                     "volumes"))
9030
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9031

    
9032
      dev.children = new_lvs
9033

    
9034
      self.cfg.Update(self.instance, feedback_fn)
9035

    
9036
    cstep = 5
9037
    if self.early_release:
9038
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9039
      cstep += 1
9040
      self._RemoveOldStorage(self.target_node, iv_names)
9041
      # WARNING: we release both node locks here, do not do other RPCs
9042
      # than WaitForSync to the primary node
9043
      self._ReleaseNodeLock([self.target_node, self.other_node])
9044

    
9045
    # Wait for sync
9046
    # This can fail as the old devices are degraded and _WaitForSync
9047
    # does a combined result over all disks, so we don't check its return value
9048
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9049
    cstep += 1
9050
    _WaitForSync(self.lu, self.instance)
9051

    
9052
    # Check all devices manually
9053
    self._CheckDevices(self.instance.primary_node, iv_names)
9054

    
9055
    # Step: remove old storage
9056
    if not self.early_release:
9057
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9058
      cstep += 1
9059
      self._RemoveOldStorage(self.target_node, iv_names)
9060

    
9061
  def _ExecDrbd8Secondary(self, feedback_fn):
9062
    """Replace the secondary node for DRBD 8.
9063

9064
    The algorithm for replace is quite complicated:
9065
      - for all disks of the instance:
9066
        - create new LVs on the new node with same names
9067
        - shutdown the drbd device on the old secondary
9068
        - disconnect the drbd network on the primary
9069
        - create the drbd device on the new secondary
9070
        - network attach the drbd on the primary, using an artifice:
9071
          the drbd code for Attach() will connect to the network if it
9072
          finds a device which is connected to the good local disks but
9073
          not network enabled
9074
      - wait for sync across all devices
9075
      - remove all disks from the old secondary
9076

9077
    Failures are not very well handled.
9078

9079
    """
9080
    steps_total = 6
9081

    
9082
    # Step: check device activation
9083
    self.lu.LogStep(1, steps_total, "Check device existence")
9084
    self._CheckDisksExistence([self.instance.primary_node])
9085
    self._CheckVolumeGroup([self.instance.primary_node])
9086

    
9087
    # Step: check other node consistency
9088
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9089
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9090

    
9091
    # Step: create new storage
9092
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9093
    for idx, dev in enumerate(self.instance.disks):
9094
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9095
                      (self.new_node, idx))
9096
      # we pass force_create=True to force LVM creation
9097
      for new_lv in dev.children:
9098
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9099
                        _GetInstanceInfoText(self.instance), False)
9100

    
9101
    # Step 4: dbrd minors and drbd setups changes
9102
    # after this, we must manually remove the drbd minors on both the
9103
    # error and the success paths
9104
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9105
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9106
                                         for dev in self.instance.disks],
9107
                                        self.instance.name)
9108
    logging.debug("Allocated minors %r", minors)
9109

    
9110
    iv_names = {}
9111
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9112
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9113
                      (self.new_node, idx))
9114
      # create new devices on new_node; note that we create two IDs:
9115
      # one without port, so the drbd will be activated without
9116
      # networking information on the new node at this stage, and one
9117
      # with network, for the latter activation in step 4
9118
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9119
      if self.instance.primary_node == o_node1:
9120
        p_minor = o_minor1
9121
      else:
9122
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9123
        p_minor = o_minor2
9124

    
9125
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9126
                      p_minor, new_minor, o_secret)
9127
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9128
                    p_minor, new_minor, o_secret)
9129

    
9130
      iv_names[idx] = (dev, dev.children, new_net_id)
9131
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9132
                    new_net_id)
9133
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9134
                              logical_id=new_alone_id,
9135
                              children=dev.children,
9136
                              size=dev.size)
9137
      try:
9138
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9139
                              _GetInstanceInfoText(self.instance), False)
9140
      except errors.GenericError:
9141
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9142
        raise
9143

    
9144
    # We have new devices, shutdown the drbd on the old secondary
9145
    for idx, dev in enumerate(self.instance.disks):
9146
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9147
      self.cfg.SetDiskID(dev, self.target_node)
9148
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9149
      if msg:
9150
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9151
                           "node: %s" % (idx, msg),
9152
                           hint=("Please cleanup this device manually as"
9153
                                 " soon as possible"))
9154

    
9155
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9156
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9157
                                               self.node_secondary_ip,
9158
                                               self.instance.disks)\
9159
                                              [self.instance.primary_node]
9160

    
9161
    msg = result.fail_msg
9162
    if msg:
9163
      # detaches didn't succeed (unlikely)
9164
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9165
      raise errors.OpExecError("Can't detach the disks from the network on"
9166
                               " old node: %s" % (msg,))
9167

    
9168
    # if we managed to detach at least one, we update all the disks of
9169
    # the instance to point to the new secondary
9170
    self.lu.LogInfo("Updating instance configuration")
9171
    for dev, _, new_logical_id in iv_names.itervalues():
9172
      dev.logical_id = new_logical_id
9173
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9174

    
9175
    self.cfg.Update(self.instance, feedback_fn)
9176

    
9177
    # and now perform the drbd attach
9178
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9179
                    " (standalone => connected)")
9180
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9181
                                            self.new_node],
9182
                                           self.node_secondary_ip,
9183
                                           self.instance.disks,
9184
                                           self.instance.name,
9185
                                           False)
9186
    for to_node, to_result in result.items():
9187
      msg = to_result.fail_msg
9188
      if msg:
9189
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9190
                           to_node, msg,
9191
                           hint=("please do a gnt-instance info to see the"
9192
                                 " status of disks"))
9193
    cstep = 5
9194
    if self.early_release:
9195
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9196
      cstep += 1
9197
      self._RemoveOldStorage(self.target_node, iv_names)
9198
      # WARNING: we release all node locks here, do not do other RPCs
9199
      # than WaitForSync to the primary node
9200
      self._ReleaseNodeLock([self.instance.primary_node,
9201
                             self.target_node,
9202
                             self.new_node])
9203

    
9204
    # Wait for sync
9205
    # This can fail as the old devices are degraded and _WaitForSync
9206
    # does a combined result over all disks, so we don't check its return value
9207
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9208
    cstep += 1
9209
    _WaitForSync(self.lu, self.instance)
9210

    
9211
    # Check all devices manually
9212
    self._CheckDevices(self.instance.primary_node, iv_names)
9213

    
9214
    # Step: remove old storage
9215
    if not self.early_release:
9216
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9217
      self._RemoveOldStorage(self.target_node, iv_names)
9218

    
9219

    
9220
class LURepairNodeStorage(NoHooksLU):
9221
  """Repairs the volume group on a node.
9222

9223
  """
9224
  REQ_BGL = False
9225

    
9226
  def CheckArguments(self):
9227
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9228

    
9229
    storage_type = self.op.storage_type
9230

    
9231
    if (constants.SO_FIX_CONSISTENCY not in
9232
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9233
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
9234
                                 " repaired" % storage_type,
9235
                                 errors.ECODE_INVAL)
9236

    
9237
  def ExpandNames(self):
9238
    self.needed_locks = {
9239
      locking.LEVEL_NODE: [self.op.node_name],
9240
      }
9241

    
9242
  def _CheckFaultyDisks(self, instance, node_name):
9243
    """Ensure faulty disks abort the opcode or at least warn."""
9244
    try:
9245
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9246
                                  node_name, True):
9247
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9248
                                   " node '%s'" % (instance.name, node_name),
9249
                                   errors.ECODE_STATE)
9250
    except errors.OpPrereqError, err:
9251
      if self.op.ignore_consistency:
9252
        self.proc.LogWarning(str(err.args[0]))
9253
      else:
9254
        raise
9255

    
9256
  def CheckPrereq(self):
9257
    """Check prerequisites.
9258

9259
    """
9260
    # Check whether any instance on this node has faulty disks
9261
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9262
      if not inst.admin_up:
9263
        continue
9264
      check_nodes = set(inst.all_nodes)
9265
      check_nodes.discard(self.op.node_name)
9266
      for inst_node_name in check_nodes:
9267
        self._CheckFaultyDisks(inst, inst_node_name)
9268

    
9269
  def Exec(self, feedback_fn):
9270
    feedback_fn("Repairing storage unit '%s' on %s ..." %
9271
                (self.op.name, self.op.node_name))
9272

    
9273
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9274
    result = self.rpc.call_storage_execute(self.op.node_name,
9275
                                           self.op.storage_type, st_args,
9276
                                           self.op.name,
9277
                                           constants.SO_FIX_CONSISTENCY)
9278
    result.Raise("Failed to repair storage unit '%s' on %s" %
9279
                 (self.op.name, self.op.node_name))
9280

    
9281

    
9282
class LUNodeEvacStrategy(NoHooksLU):
9283
  """Computes the node evacuation strategy.
9284

9285
  """
9286
  REQ_BGL = False
9287

    
9288
  def CheckArguments(self):
9289
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9290

    
9291
  def ExpandNames(self):
9292
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9293
    self.needed_locks = locks = {}
9294
    if self.op.remote_node is None:
9295
      locks[locking.LEVEL_NODE] = locking.ALL_SET
9296
    else:
9297
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9298
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9299

    
9300
  def Exec(self, feedback_fn):
9301
    if self.op.remote_node is not None:
9302
      instances = []
9303
      for node in self.op.nodes:
9304
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9305
      result = []
9306
      for i in instances:
9307
        if i.primary_node == self.op.remote_node:
9308
          raise errors.OpPrereqError("Node %s is the primary node of"
9309
                                     " instance %s, cannot use it as"
9310
                                     " secondary" %
9311
                                     (self.op.remote_node, i.name),
9312
                                     errors.ECODE_INVAL)
9313
        result.append([i.name, self.op.remote_node])
9314
    else:
9315
      ial = IAllocator(self.cfg, self.rpc,
9316
                       mode=constants.IALLOCATOR_MODE_MEVAC,
9317
                       evac_nodes=self.op.nodes)
9318
      ial.Run(self.op.iallocator, validate=True)
9319
      if not ial.success:
9320
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9321
                                 errors.ECODE_NORES)
9322
      result = ial.result
9323
    return result
9324

    
9325

    
9326
class LUInstanceGrowDisk(LogicalUnit):
9327
  """Grow a disk of an instance.
9328

9329
  """
9330
  HPATH = "disk-grow"
9331
  HTYPE = constants.HTYPE_INSTANCE
9332
  REQ_BGL = False
9333

    
9334
  def ExpandNames(self):
9335
    self._ExpandAndLockInstance()
9336
    self.needed_locks[locking.LEVEL_NODE] = []
9337
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9338

    
9339
  def DeclareLocks(self, level):
9340
    if level == locking.LEVEL_NODE:
9341
      self._LockInstancesNodes()
9342

    
9343
  def BuildHooksEnv(self):
9344
    """Build hooks env.
9345

9346
    This runs on the master, the primary and all the secondaries.
9347

9348
    """
9349
    env = {
9350
      "DISK": self.op.disk,
9351
      "AMOUNT": self.op.amount,
9352
      }
9353
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9354
    return env
9355

    
9356
  def BuildHooksNodes(self):
9357
    """Build hooks nodes.
9358

9359
    """
9360
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9361
    return (nl, nl)
9362

    
9363
  def CheckPrereq(self):
9364
    """Check prerequisites.
9365

9366
    This checks that the instance is in the cluster.
9367

9368
    """
9369
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9370
    assert instance is not None, \
9371
      "Cannot retrieve locked instance %s" % self.op.instance_name
9372
    nodenames = list(instance.all_nodes)
9373
    for node in nodenames:
9374
      _CheckNodeOnline(self, node)
9375

    
9376
    self.instance = instance
9377

    
9378
    if instance.disk_template not in constants.DTS_GROWABLE:
9379
      raise errors.OpPrereqError("Instance's disk layout does not support"
9380
                                 " growing.", errors.ECODE_INVAL)
9381

    
9382
    self.disk = instance.FindDisk(self.op.disk)
9383

    
9384
    if instance.disk_template not in (constants.DT_FILE,
9385
                                      constants.DT_SHARED_FILE):
9386
      # TODO: check the free disk space for file, when that feature will be
9387
      # supported
9388
      _CheckNodesFreeDiskPerVG(self, nodenames,
9389
                               self.disk.ComputeGrowth(self.op.amount))
9390

    
9391
  def Exec(self, feedback_fn):
9392
    """Execute disk grow.
9393

9394
    """
9395
    instance = self.instance
9396
    disk = self.disk
9397

    
9398
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9399
    if not disks_ok:
9400
      raise errors.OpExecError("Cannot activate block device to grow")
9401

    
9402
    for node in instance.all_nodes:
9403
      self.cfg.SetDiskID(disk, node)
9404
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9405
      result.Raise("Grow request failed to node %s" % node)
9406

    
9407
      # TODO: Rewrite code to work properly
9408
      # DRBD goes into sync mode for a short amount of time after executing the
9409
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9410
      # calling "resize" in sync mode fails. Sleeping for a short amount of
9411
      # time is a work-around.
9412
      time.sleep(5)
9413

    
9414
    disk.RecordGrow(self.op.amount)
9415
    self.cfg.Update(instance, feedback_fn)
9416
    if self.op.wait_for_sync:
9417
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
9418
      if disk_abort:
9419
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
9420
                             " status.\nPlease check the instance.")
9421
      if not instance.admin_up:
9422
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9423
    elif not instance.admin_up:
9424
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
9425
                           " not supposed to be running because no wait for"
9426
                           " sync mode was requested.")
9427

    
9428

    
9429
class LUInstanceQueryData(NoHooksLU):
9430
  """Query runtime instance data.
9431

9432
  """
9433
  REQ_BGL = False
9434

    
9435
  def ExpandNames(self):
9436
    self.needed_locks = {}
9437

    
9438
    # Use locking if requested or when non-static information is wanted
9439
    if not (self.op.static or self.op.use_locking):
9440
      self.LogWarning("Non-static data requested, locks need to be acquired")
9441
      self.op.use_locking = True
9442

    
9443
    if self.op.instances or not self.op.use_locking:
9444
      # Expand instance names right here
9445
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
9446
    else:
9447
      # Will use acquired locks
9448
      self.wanted_names = None
9449

    
9450
    if self.op.use_locking:
9451
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9452

    
9453
      if self.wanted_names is None:
9454
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9455
      else:
9456
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9457

    
9458
      self.needed_locks[locking.LEVEL_NODE] = []
9459
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9460
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9461

    
9462
  def DeclareLocks(self, level):
9463
    if self.op.use_locking and level == locking.LEVEL_NODE:
9464
      self._LockInstancesNodes()
9465

    
9466
  def CheckPrereq(self):
9467
    """Check prerequisites.
9468

9469
    This only checks the optional instance list against the existing names.
9470

9471
    """
9472
    if self.wanted_names is None:
9473
      assert self.op.use_locking, "Locking was not used"
9474
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9475

    
9476
    self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9477
                             for name in self.wanted_names]
9478

    
9479
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
9480
    """Returns the status of a block device
9481

9482
    """
9483
    if self.op.static or not node:
9484
      return None
9485

    
9486
    self.cfg.SetDiskID(dev, node)
9487

    
9488
    result = self.rpc.call_blockdev_find(node, dev)
9489
    if result.offline:
9490
      return None
9491

    
9492
    result.Raise("Can't compute disk status for %s" % instance_name)
9493

    
9494
    status = result.payload
9495
    if status is None:
9496
      return None
9497

    
9498
    return (status.dev_path, status.major, status.minor,
9499
            status.sync_percent, status.estimated_time,
9500
            status.is_degraded, status.ldisk_status)
9501

    
9502
  def _ComputeDiskStatus(self, instance, snode, dev):
9503
    """Compute block device status.
9504

9505
    """
9506
    if dev.dev_type in constants.LDS_DRBD:
9507
      # we change the snode then (otherwise we use the one passed in)
9508
      if dev.logical_id[0] == instance.primary_node:
9509
        snode = dev.logical_id[1]
9510
      else:
9511
        snode = dev.logical_id[0]
9512

    
9513
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9514
                                              instance.name, dev)
9515
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9516

    
9517
    if dev.children:
9518
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
9519
                      for child in dev.children]
9520
    else:
9521
      dev_children = []
9522

    
9523
    return {
9524
      "iv_name": dev.iv_name,
9525
      "dev_type": dev.dev_type,
9526
      "logical_id": dev.logical_id,
9527
      "physical_id": dev.physical_id,
9528
      "pstatus": dev_pstatus,
9529
      "sstatus": dev_sstatus,
9530
      "children": dev_children,
9531
      "mode": dev.mode,
9532
      "size": dev.size,
9533
      }
9534

    
9535
  def Exec(self, feedback_fn):
9536
    """Gather and return data"""
9537
    result = {}
9538

    
9539
    cluster = self.cfg.GetClusterInfo()
9540

    
9541
    for instance in self.wanted_instances:
9542
      if not self.op.static:
9543
        remote_info = self.rpc.call_instance_info(instance.primary_node,
9544
                                                  instance.name,
9545
                                                  instance.hypervisor)
9546
        remote_info.Raise("Error checking node %s" % instance.primary_node)
9547
        remote_info = remote_info.payload
9548
        if remote_info and "state" in remote_info:
9549
          remote_state = "up"
9550
        else:
9551
          remote_state = "down"
9552
      else:
9553
        remote_state = None
9554
      if instance.admin_up:
9555
        config_state = "up"
9556
      else:
9557
        config_state = "down"
9558

    
9559
      disks = [self._ComputeDiskStatus(instance, None, device)
9560
               for device in instance.disks]
9561

    
9562
      result[instance.name] = {
9563
        "name": instance.name,
9564
        "config_state": config_state,
9565
        "run_state": remote_state,
9566
        "pnode": instance.primary_node,
9567
        "snodes": instance.secondary_nodes,
9568
        "os": instance.os,
9569
        # this happens to be the same format used for hooks
9570
        "nics": _NICListToTuple(self, instance.nics),
9571
        "disk_template": instance.disk_template,
9572
        "disks": disks,
9573
        "hypervisor": instance.hypervisor,
9574
        "network_port": instance.network_port,
9575
        "hv_instance": instance.hvparams,
9576
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
9577
        "be_instance": instance.beparams,
9578
        "be_actual": cluster.FillBE(instance),
9579
        "os_instance": instance.osparams,
9580
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9581
        "serial_no": instance.serial_no,
9582
        "mtime": instance.mtime,
9583
        "ctime": instance.ctime,
9584
        "uuid": instance.uuid,
9585
        }
9586

    
9587
    return result
9588

    
9589

    
9590
class LUInstanceSetParams(LogicalUnit):
9591
  """Modifies an instances's parameters.
9592

9593
  """
9594
  HPATH = "instance-modify"
9595
  HTYPE = constants.HTYPE_INSTANCE
9596
  REQ_BGL = False
9597

    
9598
  def CheckArguments(self):
9599
    if not (self.op.nics or self.op.disks or self.op.disk_template or
9600
            self.op.hvparams or self.op.beparams or self.op.os_name):
9601
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9602

    
9603
    if self.op.hvparams:
9604
      _CheckGlobalHvParams(self.op.hvparams)
9605

    
9606
    # Disk validation
9607
    disk_addremove = 0
9608
    for disk_op, disk_dict in self.op.disks:
9609
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9610
      if disk_op == constants.DDM_REMOVE:
9611
        disk_addremove += 1
9612
        continue
9613
      elif disk_op == constants.DDM_ADD:
9614
        disk_addremove += 1
9615
      else:
9616
        if not isinstance(disk_op, int):
9617
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9618
        if not isinstance(disk_dict, dict):
9619
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9620
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9621

    
9622
      if disk_op == constants.DDM_ADD:
9623
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9624
        if mode not in constants.DISK_ACCESS_SET:
9625
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9626
                                     errors.ECODE_INVAL)
9627
        size = disk_dict.get(constants.IDISK_SIZE, None)
9628
        if size is None:
9629
          raise errors.OpPrereqError("Required disk parameter size missing",
9630
                                     errors.ECODE_INVAL)
9631
        try:
9632
          size = int(size)
9633
        except (TypeError, ValueError), err:
9634
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9635
                                     str(err), errors.ECODE_INVAL)
9636
        disk_dict[constants.IDISK_SIZE] = size
9637
      else:
9638
        # modification of disk
9639
        if constants.IDISK_SIZE in disk_dict:
9640
          raise errors.OpPrereqError("Disk size change not possible, use"
9641
                                     " grow-disk", errors.ECODE_INVAL)
9642

    
9643
    if disk_addremove > 1:
9644
      raise errors.OpPrereqError("Only one disk add or remove operation"
9645
                                 " supported at a time", errors.ECODE_INVAL)
9646

    
9647
    if self.op.disks and self.op.disk_template is not None:
9648
      raise errors.OpPrereqError("Disk template conversion and other disk"
9649
                                 " changes not supported at the same time",
9650
                                 errors.ECODE_INVAL)
9651

    
9652
    if (self.op.disk_template and
9653
        self.op.disk_template in constants.DTS_INT_MIRROR and
9654
        self.op.remote_node is None):
9655
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
9656
                                 " one requires specifying a secondary node",
9657
                                 errors.ECODE_INVAL)
9658

    
9659
    # NIC validation
9660
    nic_addremove = 0
9661
    for nic_op, nic_dict in self.op.nics:
9662
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9663
      if nic_op == constants.DDM_REMOVE:
9664
        nic_addremove += 1
9665
        continue
9666
      elif nic_op == constants.DDM_ADD:
9667
        nic_addremove += 1
9668
      else:
9669
        if not isinstance(nic_op, int):
9670
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9671
        if not isinstance(nic_dict, dict):
9672
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9673
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9674

    
9675
      # nic_dict should be a dict
9676
      nic_ip = nic_dict.get(constants.INIC_IP, None)
9677
      if nic_ip is not None:
9678
        if nic_ip.lower() == constants.VALUE_NONE:
9679
          nic_dict[constants.INIC_IP] = None
9680
        else:
9681
          if not netutils.IPAddress.IsValid(nic_ip):
9682
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9683
                                       errors.ECODE_INVAL)
9684

    
9685
      nic_bridge = nic_dict.get('bridge', None)
9686
      nic_link = nic_dict.get(constants.INIC_LINK, None)
9687
      if nic_bridge and nic_link:
9688
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9689
                                   " at the same time", errors.ECODE_INVAL)
9690
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9691
        nic_dict['bridge'] = None
9692
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9693
        nic_dict[constants.INIC_LINK] = None
9694

    
9695
      if nic_op == constants.DDM_ADD:
9696
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
9697
        if nic_mac is None:
9698
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
9699

    
9700
      if constants.INIC_MAC in nic_dict:
9701
        nic_mac = nic_dict[constants.INIC_MAC]
9702
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9703
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9704

    
9705
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9706
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9707
                                     " modifying an existing nic",
9708
                                     errors.ECODE_INVAL)
9709

    
9710
    if nic_addremove > 1:
9711
      raise errors.OpPrereqError("Only one NIC add or remove operation"
9712
                                 " supported at a time", errors.ECODE_INVAL)
9713

    
9714
  def ExpandNames(self):
9715
    self._ExpandAndLockInstance()
9716
    self.needed_locks[locking.LEVEL_NODE] = []
9717
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9718

    
9719
  def DeclareLocks(self, level):
9720
    if level == locking.LEVEL_NODE:
9721
      self._LockInstancesNodes()
9722
      if self.op.disk_template and self.op.remote_node:
9723
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9724
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9725

    
9726
  def BuildHooksEnv(self):
9727
    """Build hooks env.
9728

9729
    This runs on the master, primary and secondaries.
9730

9731
    """
9732
    args = dict()
9733
    if constants.BE_MEMORY in self.be_new:
9734
      args['memory'] = self.be_new[constants.BE_MEMORY]
9735
    if constants.BE_VCPUS in self.be_new:
9736
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
9737
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9738
    # information at all.
9739
    if self.op.nics:
9740
      args['nics'] = []
9741
      nic_override = dict(self.op.nics)
9742
      for idx, nic in enumerate(self.instance.nics):
9743
        if idx in nic_override:
9744
          this_nic_override = nic_override[idx]
9745
        else:
9746
          this_nic_override = {}
9747
        if constants.INIC_IP in this_nic_override:
9748
          ip = this_nic_override[constants.INIC_IP]
9749
        else:
9750
          ip = nic.ip
9751
        if constants.INIC_MAC in this_nic_override:
9752
          mac = this_nic_override[constants.INIC_MAC]
9753
        else:
9754
          mac = nic.mac
9755
        if idx in self.nic_pnew:
9756
          nicparams = self.nic_pnew[idx]
9757
        else:
9758
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9759
        mode = nicparams[constants.NIC_MODE]
9760
        link = nicparams[constants.NIC_LINK]
9761
        args['nics'].append((ip, mac, mode, link))
9762
      if constants.DDM_ADD in nic_override:
9763
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
9764
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
9765
        nicparams = self.nic_pnew[constants.DDM_ADD]
9766
        mode = nicparams[constants.NIC_MODE]
9767
        link = nicparams[constants.NIC_LINK]
9768
        args['nics'].append((ip, mac, mode, link))
9769
      elif constants.DDM_REMOVE in nic_override:
9770
        del args['nics'][-1]
9771

    
9772
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9773
    if self.op.disk_template:
9774
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9775

    
9776
    return env
9777

    
9778
  def BuildHooksNodes(self):
9779
    """Build hooks nodes.
9780

9781
    """
9782
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9783
    return (nl, nl)
9784

    
9785
  def CheckPrereq(self):
9786
    """Check prerequisites.
9787

9788
    This only checks the instance list against the existing names.
9789

9790
    """
9791
    # checking the new params on the primary/secondary nodes
9792

    
9793
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9794
    cluster = self.cluster = self.cfg.GetClusterInfo()
9795
    assert self.instance is not None, \
9796
      "Cannot retrieve locked instance %s" % self.op.instance_name
9797
    pnode = instance.primary_node
9798
    nodelist = list(instance.all_nodes)
9799

    
9800
    # OS change
9801
    if self.op.os_name and not self.op.force:
9802
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9803
                      self.op.force_variant)
9804
      instance_os = self.op.os_name
9805
    else:
9806
      instance_os = instance.os
9807

    
9808
    if self.op.disk_template:
9809
      if instance.disk_template == self.op.disk_template:
9810
        raise errors.OpPrereqError("Instance already has disk template %s" %
9811
                                   instance.disk_template, errors.ECODE_INVAL)
9812

    
9813
      if (instance.disk_template,
9814
          self.op.disk_template) not in self._DISK_CONVERSIONS:
9815
        raise errors.OpPrereqError("Unsupported disk template conversion from"
9816
                                   " %s to %s" % (instance.disk_template,
9817
                                                  self.op.disk_template),
9818
                                   errors.ECODE_INVAL)
9819
      _CheckInstanceDown(self, instance, "cannot change disk template")
9820
      if self.op.disk_template in constants.DTS_INT_MIRROR:
9821
        if self.op.remote_node == pnode:
9822
          raise errors.OpPrereqError("Given new secondary node %s is the same"
9823
                                     " as the primary node of the instance" %
9824
                                     self.op.remote_node, errors.ECODE_STATE)
9825
        _CheckNodeOnline(self, self.op.remote_node)
9826
        _CheckNodeNotDrained(self, self.op.remote_node)
9827
        # FIXME: here we assume that the old instance type is DT_PLAIN
9828
        assert instance.disk_template == constants.DT_PLAIN
9829
        disks = [{constants.IDISK_SIZE: d.size,
9830
                  constants.IDISK_VG: d.logical_id[0]}
9831
                 for d in instance.disks]
9832
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9833
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9834

    
9835
    # hvparams processing
9836
    if self.op.hvparams:
9837
      hv_type = instance.hypervisor
9838
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9839
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9840
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9841

    
9842
      # local check
9843
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9844
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9845
      self.hv_new = hv_new # the new actual values
9846
      self.hv_inst = i_hvdict # the new dict (without defaults)
9847
    else:
9848
      self.hv_new = self.hv_inst = {}
9849

    
9850
    # beparams processing
9851
    if self.op.beparams:
9852
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9853
                                   use_none=True)
9854
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9855
      be_new = cluster.SimpleFillBE(i_bedict)
9856
      self.be_new = be_new # the new actual values
9857
      self.be_inst = i_bedict # the new dict (without defaults)
9858
    else:
9859
      self.be_new = self.be_inst = {}
9860

    
9861
    # osparams processing
9862
    if self.op.osparams:
9863
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9864
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9865
      self.os_inst = i_osdict # the new dict (without defaults)
9866
    else:
9867
      self.os_inst = {}
9868

    
9869
    self.warn = []
9870

    
9871
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9872
      mem_check_list = [pnode]
9873
      if be_new[constants.BE_AUTO_BALANCE]:
9874
        # either we changed auto_balance to yes or it was from before
9875
        mem_check_list.extend(instance.secondary_nodes)
9876
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9877
                                                  instance.hypervisor)
9878
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9879
                                         instance.hypervisor)
9880
      pninfo = nodeinfo[pnode]
9881
      msg = pninfo.fail_msg
9882
      if msg:
9883
        # Assume the primary node is unreachable and go ahead
9884
        self.warn.append("Can't get info from primary node %s: %s" %
9885
                         (pnode,  msg))
9886
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
9887
        self.warn.append("Node data from primary node %s doesn't contain"
9888
                         " free memory information" % pnode)
9889
      elif instance_info.fail_msg:
9890
        self.warn.append("Can't get instance runtime information: %s" %
9891
                        instance_info.fail_msg)
9892
      else:
9893
        if instance_info.payload:
9894
          current_mem = int(instance_info.payload['memory'])
9895
        else:
9896
          # Assume instance not running
9897
          # (there is a slight race condition here, but it's not very probable,
9898
          # and we have no other way to check)
9899
          current_mem = 0
9900
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9901
                    pninfo.payload['memory_free'])
9902
        if miss_mem > 0:
9903
          raise errors.OpPrereqError("This change will prevent the instance"
9904
                                     " from starting, due to %d MB of memory"
9905
                                     " missing on its primary node" % miss_mem,
9906
                                     errors.ECODE_NORES)
9907

    
9908
      if be_new[constants.BE_AUTO_BALANCE]:
9909
        for node, nres in nodeinfo.items():
9910
          if node not in instance.secondary_nodes:
9911
            continue
9912
          msg = nres.fail_msg
9913
          if msg:
9914
            self.warn.append("Can't get info from secondary node %s: %s" %
9915
                             (node, msg))
9916
          elif not isinstance(nres.payload.get('memory_free', None), int):
9917
            self.warn.append("Secondary node %s didn't return free"
9918
                             " memory information" % node)
9919
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9920
            self.warn.append("Not enough memory to failover instance to"
9921
                             " secondary node %s" % node)
9922

    
9923
    # NIC processing
9924
    self.nic_pnew = {}
9925
    self.nic_pinst = {}
9926
    for nic_op, nic_dict in self.op.nics:
9927
      if nic_op == constants.DDM_REMOVE:
9928
        if not instance.nics:
9929
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9930
                                     errors.ECODE_INVAL)
9931
        continue
9932
      if nic_op != constants.DDM_ADD:
9933
        # an existing nic
9934
        if not instance.nics:
9935
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9936
                                     " no NICs" % nic_op,
9937
                                     errors.ECODE_INVAL)
9938
        if nic_op < 0 or nic_op >= len(instance.nics):
9939
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9940
                                     " are 0 to %d" %
9941
                                     (nic_op, len(instance.nics) - 1),
9942
                                     errors.ECODE_INVAL)
9943
        old_nic_params = instance.nics[nic_op].nicparams
9944
        old_nic_ip = instance.nics[nic_op].ip
9945
      else:
9946
        old_nic_params = {}
9947
        old_nic_ip = None
9948

    
9949
      update_params_dict = dict([(key, nic_dict[key])
9950
                                 for key in constants.NICS_PARAMETERS
9951
                                 if key in nic_dict])
9952

    
9953
      if 'bridge' in nic_dict:
9954
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9955

    
9956
      new_nic_params = _GetUpdatedParams(old_nic_params,
9957
                                         update_params_dict)
9958
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9959
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9960
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9961
      self.nic_pinst[nic_op] = new_nic_params
9962
      self.nic_pnew[nic_op] = new_filled_nic_params
9963
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9964

    
9965
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
9966
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9967
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9968
        if msg:
9969
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9970
          if self.op.force:
9971
            self.warn.append(msg)
9972
          else:
9973
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9974
      if new_nic_mode == constants.NIC_MODE_ROUTED:
9975
        if constants.INIC_IP in nic_dict:
9976
          nic_ip = nic_dict[constants.INIC_IP]
9977
        else:
9978
          nic_ip = old_nic_ip
9979
        if nic_ip is None:
9980
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9981
                                     ' on a routed nic', errors.ECODE_INVAL)
9982
      if constants.INIC_MAC in nic_dict:
9983
        nic_mac = nic_dict[constants.INIC_MAC]
9984
        if nic_mac is None:
9985
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9986
                                     errors.ECODE_INVAL)
9987
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9988
          # otherwise generate the mac
9989
          nic_dict[constants.INIC_MAC] = \
9990
            self.cfg.GenerateMAC(self.proc.GetECId())
9991
        else:
9992
          # or validate/reserve the current one
9993
          try:
9994
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9995
          except errors.ReservationError:
9996
            raise errors.OpPrereqError("MAC address %s already in use"
9997
                                       " in cluster" % nic_mac,
9998
                                       errors.ECODE_NOTUNIQUE)
9999

    
10000
    # DISK processing
10001
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10002
      raise errors.OpPrereqError("Disk operations not supported for"
10003
                                 " diskless instances",
10004
                                 errors.ECODE_INVAL)
10005
    for disk_op, _ in self.op.disks:
10006
      if disk_op == constants.DDM_REMOVE:
10007
        if len(instance.disks) == 1:
10008
          raise errors.OpPrereqError("Cannot remove the last disk of"
10009
                                     " an instance", errors.ECODE_INVAL)
10010
        _CheckInstanceDown(self, instance, "cannot remove disks")
10011

    
10012
      if (disk_op == constants.DDM_ADD and
10013
          len(instance.disks) >= constants.MAX_DISKS):
10014
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10015
                                   " add more" % constants.MAX_DISKS,
10016
                                   errors.ECODE_STATE)
10017
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10018
        # an existing disk
10019
        if disk_op < 0 or disk_op >= len(instance.disks):
10020
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
10021
                                     " are 0 to %d" %
10022
                                     (disk_op, len(instance.disks)),
10023
                                     errors.ECODE_INVAL)
10024

    
10025
    return
10026

    
10027
  def _ConvertPlainToDrbd(self, feedback_fn):
10028
    """Converts an instance from plain to drbd.
10029

10030
    """
10031
    feedback_fn("Converting template to drbd")
10032
    instance = self.instance
10033
    pnode = instance.primary_node
10034
    snode = self.op.remote_node
10035

    
10036
    # create a fake disk info for _GenerateDiskTemplate
10037
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode}
10038
                 for d in instance.disks]
10039
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10040
                                      instance.name, pnode, [snode],
10041
                                      disk_info, None, None, 0, feedback_fn)
10042
    info = _GetInstanceInfoText(instance)
10043
    feedback_fn("Creating aditional volumes...")
10044
    # first, create the missing data and meta devices
10045
    for disk in new_disks:
10046
      # unfortunately this is... not too nice
10047
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10048
                            info, True)
10049
      for child in disk.children:
10050
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
10051
    # at this stage, all new LVs have been created, we can rename the
10052
    # old ones
10053
    feedback_fn("Renaming original volumes...")
10054
    rename_list = [(o, n.children[0].logical_id)
10055
                   for (o, n) in zip(instance.disks, new_disks)]
10056
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10057
    result.Raise("Failed to rename original LVs")
10058

    
10059
    feedback_fn("Initializing DRBD devices...")
10060
    # all child devices are in place, we can now create the DRBD devices
10061
    for disk in new_disks:
10062
      for node in [pnode, snode]:
10063
        f_create = node == pnode
10064
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10065

    
10066
    # at this point, the instance has been modified
10067
    instance.disk_template = constants.DT_DRBD8
10068
    instance.disks = new_disks
10069
    self.cfg.Update(instance, feedback_fn)
10070

    
10071
    # disks are created, waiting for sync
10072
    disk_abort = not _WaitForSync(self, instance)
10073
    if disk_abort:
10074
      raise errors.OpExecError("There are some degraded disks for"
10075
                               " this instance, please cleanup manually")
10076

    
10077
  def _ConvertDrbdToPlain(self, feedback_fn):
10078
    """Converts an instance from drbd to plain.
10079

10080
    """
10081
    instance = self.instance
10082
    assert len(instance.secondary_nodes) == 1
10083
    pnode = instance.primary_node
10084
    snode = instance.secondary_nodes[0]
10085
    feedback_fn("Converting template to plain")
10086

    
10087
    old_disks = instance.disks
10088
    new_disks = [d.children[0] for d in old_disks]
10089

    
10090
    # copy over size and mode
10091
    for parent, child in zip(old_disks, new_disks):
10092
      child.size = parent.size
10093
      child.mode = parent.mode
10094

    
10095
    # update instance structure
10096
    instance.disks = new_disks
10097
    instance.disk_template = constants.DT_PLAIN
10098
    self.cfg.Update(instance, feedback_fn)
10099

    
10100
    feedback_fn("Removing volumes on the secondary node...")
10101
    for disk in old_disks:
10102
      self.cfg.SetDiskID(disk, snode)
10103
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10104
      if msg:
10105
        self.LogWarning("Could not remove block device %s on node %s,"
10106
                        " continuing anyway: %s", disk.iv_name, snode, msg)
10107

    
10108
    feedback_fn("Removing unneeded volumes on the primary node...")
10109
    for idx, disk in enumerate(old_disks):
10110
      meta = disk.children[1]
10111
      self.cfg.SetDiskID(meta, pnode)
10112
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10113
      if msg:
10114
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
10115
                        " continuing anyway: %s", idx, pnode, msg)
10116

    
10117
  def Exec(self, feedback_fn):
10118
    """Modifies an instance.
10119

10120
    All parameters take effect only at the next restart of the instance.
10121

10122
    """
10123
    # Process here the warnings from CheckPrereq, as we don't have a
10124
    # feedback_fn there.
10125
    for warn in self.warn:
10126
      feedback_fn("WARNING: %s" % warn)
10127

    
10128
    result = []
10129
    instance = self.instance
10130
    # disk changes
10131
    for disk_op, disk_dict in self.op.disks:
10132
      if disk_op == constants.DDM_REMOVE:
10133
        # remove the last disk
10134
        device = instance.disks.pop()
10135
        device_idx = len(instance.disks)
10136
        for node, disk in device.ComputeNodeTree(instance.primary_node):
10137
          self.cfg.SetDiskID(disk, node)
10138
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10139
          if msg:
10140
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
10141
                            " continuing anyway", device_idx, node, msg)
10142
        result.append(("disk/%d" % device_idx, "remove"))
10143
      elif disk_op == constants.DDM_ADD:
10144
        # add a new disk
10145
        if instance.disk_template in (constants.DT_FILE,
10146
                                        constants.DT_SHARED_FILE):
10147
          file_driver, file_path = instance.disks[0].logical_id
10148
          file_path = os.path.dirname(file_path)
10149
        else:
10150
          file_driver = file_path = None
10151
        disk_idx_base = len(instance.disks)
10152
        new_disk = _GenerateDiskTemplate(self,
10153
                                         instance.disk_template,
10154
                                         instance.name, instance.primary_node,
10155
                                         instance.secondary_nodes,
10156
                                         [disk_dict],
10157
                                         file_path,
10158
                                         file_driver,
10159
                                         disk_idx_base, feedback_fn)[0]
10160
        instance.disks.append(new_disk)
10161
        info = _GetInstanceInfoText(instance)
10162

    
10163
        logging.info("Creating volume %s for instance %s",
10164
                     new_disk.iv_name, instance.name)
10165
        # Note: this needs to be kept in sync with _CreateDisks
10166
        #HARDCODE
10167
        for node in instance.all_nodes:
10168
          f_create = node == instance.primary_node
10169
          try:
10170
            _CreateBlockDev(self, node, instance, new_disk,
10171
                            f_create, info, f_create)
10172
          except errors.OpExecError, err:
10173
            self.LogWarning("Failed to create volume %s (%s) on"
10174
                            " node %s: %s",
10175
                            new_disk.iv_name, new_disk, node, err)
10176
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10177
                       (new_disk.size, new_disk.mode)))
10178
      else:
10179
        # change a given disk
10180
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10181
        result.append(("disk.mode/%d" % disk_op,
10182
                       disk_dict[constants.IDISK_MODE]))
10183

    
10184
    if self.op.disk_template:
10185
      r_shut = _ShutdownInstanceDisks(self, instance)
10186
      if not r_shut:
10187
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10188
                                 " proceed with disk template conversion")
10189
      mode = (instance.disk_template, self.op.disk_template)
10190
      try:
10191
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
10192
      except:
10193
        self.cfg.ReleaseDRBDMinors(instance.name)
10194
        raise
10195
      result.append(("disk_template", self.op.disk_template))
10196

    
10197
    # NIC changes
10198
    for nic_op, nic_dict in self.op.nics:
10199
      if nic_op == constants.DDM_REMOVE:
10200
        # remove the last nic
10201
        del instance.nics[-1]
10202
        result.append(("nic.%d" % len(instance.nics), "remove"))
10203
      elif nic_op == constants.DDM_ADD:
10204
        # mac and bridge should be set, by now
10205
        mac = nic_dict[constants.INIC_MAC]
10206
        ip = nic_dict.get(constants.INIC_IP, None)
10207
        nicparams = self.nic_pinst[constants.DDM_ADD]
10208
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10209
        instance.nics.append(new_nic)
10210
        result.append(("nic.%d" % (len(instance.nics) - 1),
10211
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
10212
                       (new_nic.mac, new_nic.ip,
10213
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10214
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10215
                       )))
10216
      else:
10217
        for key in (constants.INIC_MAC, constants.INIC_IP):
10218
          if key in nic_dict:
10219
            setattr(instance.nics[nic_op], key, nic_dict[key])
10220
        if nic_op in self.nic_pinst:
10221
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10222
        for key, val in nic_dict.iteritems():
10223
          result.append(("nic.%s/%d" % (key, nic_op), val))
10224

    
10225
    # hvparams changes
10226
    if self.op.hvparams:
10227
      instance.hvparams = self.hv_inst
10228
      for key, val in self.op.hvparams.iteritems():
10229
        result.append(("hv/%s" % key, val))
10230

    
10231
    # beparams changes
10232
    if self.op.beparams:
10233
      instance.beparams = self.be_inst
10234
      for key, val in self.op.beparams.iteritems():
10235
        result.append(("be/%s" % key, val))
10236

    
10237
    # OS change
10238
    if self.op.os_name:
10239
      instance.os = self.op.os_name
10240

    
10241
    # osparams changes
10242
    if self.op.osparams:
10243
      instance.osparams = self.os_inst
10244
      for key, val in self.op.osparams.iteritems():
10245
        result.append(("os/%s" % key, val))
10246

    
10247
    self.cfg.Update(instance, feedback_fn)
10248

    
10249
    return result
10250

    
10251
  _DISK_CONVERSIONS = {
10252
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10253
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10254
    }
10255

    
10256

    
10257
class LUBackupQuery(NoHooksLU):
10258
  """Query the exports list
10259

10260
  """
10261
  REQ_BGL = False
10262

    
10263
  def ExpandNames(self):
10264
    self.needed_locks = {}
10265
    self.share_locks[locking.LEVEL_NODE] = 1
10266
    if not self.op.nodes:
10267
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10268
    else:
10269
      self.needed_locks[locking.LEVEL_NODE] = \
10270
        _GetWantedNodes(self, self.op.nodes)
10271

    
10272
  def Exec(self, feedback_fn):
10273
    """Compute the list of all the exported system images.
10274

10275
    @rtype: dict
10276
    @return: a dictionary with the structure node->(export-list)
10277
        where export-list is a list of the instances exported on
10278
        that node.
10279

10280
    """
10281
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
10282
    rpcresult = self.rpc.call_export_list(self.nodes)
10283
    result = {}
10284
    for node in rpcresult:
10285
      if rpcresult[node].fail_msg:
10286
        result[node] = False
10287
      else:
10288
        result[node] = rpcresult[node].payload
10289

    
10290
    return result
10291

    
10292

    
10293
class LUBackupPrepare(NoHooksLU):
10294
  """Prepares an instance for an export and returns useful information.
10295

10296
  """
10297
  REQ_BGL = False
10298

    
10299
  def ExpandNames(self):
10300
    self._ExpandAndLockInstance()
10301

    
10302
  def CheckPrereq(self):
10303
    """Check prerequisites.
10304

10305
    """
10306
    instance_name = self.op.instance_name
10307

    
10308
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10309
    assert self.instance is not None, \
10310
          "Cannot retrieve locked instance %s" % self.op.instance_name
10311
    _CheckNodeOnline(self, self.instance.primary_node)
10312

    
10313
    self._cds = _GetClusterDomainSecret()
10314

    
10315
  def Exec(self, feedback_fn):
10316
    """Prepares an instance for an export.
10317

10318
    """
10319
    instance = self.instance
10320

    
10321
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10322
      salt = utils.GenerateSecret(8)
10323

    
10324
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10325
      result = self.rpc.call_x509_cert_create(instance.primary_node,
10326
                                              constants.RIE_CERT_VALIDITY)
10327
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
10328

    
10329
      (name, cert_pem) = result.payload
10330

    
10331
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10332
                                             cert_pem)
10333

    
10334
      return {
10335
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10336
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10337
                          salt),
10338
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10339
        }
10340

    
10341
    return None
10342

    
10343

    
10344
class LUBackupExport(LogicalUnit):
10345
  """Export an instance to an image in the cluster.
10346

10347
  """
10348
  HPATH = "instance-export"
10349
  HTYPE = constants.HTYPE_INSTANCE
10350
  REQ_BGL = False
10351

    
10352
  def CheckArguments(self):
10353
    """Check the arguments.
10354

10355
    """
10356
    self.x509_key_name = self.op.x509_key_name
10357
    self.dest_x509_ca_pem = self.op.destination_x509_ca
10358

    
10359
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10360
      if not self.x509_key_name:
10361
        raise errors.OpPrereqError("Missing X509 key name for encryption",
10362
                                   errors.ECODE_INVAL)
10363

    
10364
      if not self.dest_x509_ca_pem:
10365
        raise errors.OpPrereqError("Missing destination X509 CA",
10366
                                   errors.ECODE_INVAL)
10367

    
10368
  def ExpandNames(self):
10369
    self._ExpandAndLockInstance()
10370

    
10371
    # Lock all nodes for local exports
10372
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10373
      # FIXME: lock only instance primary and destination node
10374
      #
10375
      # Sad but true, for now we have do lock all nodes, as we don't know where
10376
      # the previous export might be, and in this LU we search for it and
10377
      # remove it from its current node. In the future we could fix this by:
10378
      #  - making a tasklet to search (share-lock all), then create the
10379
      #    new one, then one to remove, after
10380
      #  - removing the removal operation altogether
10381
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10382

    
10383
  def DeclareLocks(self, level):
10384
    """Last minute lock declaration."""
10385
    # All nodes are locked anyway, so nothing to do here.
10386

    
10387
  def BuildHooksEnv(self):
10388
    """Build hooks env.
10389

10390
    This will run on the master, primary node and target node.
10391

10392
    """
10393
    env = {
10394
      "EXPORT_MODE": self.op.mode,
10395
      "EXPORT_NODE": self.op.target_node,
10396
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10397
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10398
      # TODO: Generic function for boolean env variables
10399
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10400
      }
10401

    
10402
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10403

    
10404
    return env
10405

    
10406
  def BuildHooksNodes(self):
10407
    """Build hooks nodes.
10408

10409
    """
10410
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10411

    
10412
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10413
      nl.append(self.op.target_node)
10414

    
10415
    return (nl, nl)
10416

    
10417
  def CheckPrereq(self):
10418
    """Check prerequisites.
10419

10420
    This checks that the instance and node names are valid.
10421

10422
    """
10423
    instance_name = self.op.instance_name
10424

    
10425
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10426
    assert self.instance is not None, \
10427
          "Cannot retrieve locked instance %s" % self.op.instance_name
10428
    _CheckNodeOnline(self, self.instance.primary_node)
10429

    
10430
    if (self.op.remove_instance and self.instance.admin_up and
10431
        not self.op.shutdown):
10432
      raise errors.OpPrereqError("Can not remove instance without shutting it"
10433
                                 " down before")
10434

    
10435
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10436
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10437
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10438
      assert self.dst_node is not None
10439

    
10440
      _CheckNodeOnline(self, self.dst_node.name)
10441
      _CheckNodeNotDrained(self, self.dst_node.name)
10442

    
10443
      self._cds = None
10444
      self.dest_disk_info = None
10445
      self.dest_x509_ca = None
10446

    
10447
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10448
      self.dst_node = None
10449

    
10450
      if len(self.op.target_node) != len(self.instance.disks):
10451
        raise errors.OpPrereqError(("Received destination information for %s"
10452
                                    " disks, but instance %s has %s disks") %
10453
                                   (len(self.op.target_node), instance_name,
10454
                                    len(self.instance.disks)),
10455
                                   errors.ECODE_INVAL)
10456

    
10457
      cds = _GetClusterDomainSecret()
10458

    
10459
      # Check X509 key name
10460
      try:
10461
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10462
      except (TypeError, ValueError), err:
10463
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10464

    
10465
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10466
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10467
                                   errors.ECODE_INVAL)
10468

    
10469
      # Load and verify CA
10470
      try:
10471
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10472
      except OpenSSL.crypto.Error, err:
10473
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10474
                                   (err, ), errors.ECODE_INVAL)
10475

    
10476
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10477
      if errcode is not None:
10478
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10479
                                   (msg, ), errors.ECODE_INVAL)
10480

    
10481
      self.dest_x509_ca = cert
10482

    
10483
      # Verify target information
10484
      disk_info = []
10485
      for idx, disk_data in enumerate(self.op.target_node):
10486
        try:
10487
          (host, port, magic) = \
10488
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10489
        except errors.GenericError, err:
10490
          raise errors.OpPrereqError("Target info for disk %s: %s" %
10491
                                     (idx, err), errors.ECODE_INVAL)
10492

    
10493
        disk_info.append((host, port, magic))
10494

    
10495
      assert len(disk_info) == len(self.op.target_node)
10496
      self.dest_disk_info = disk_info
10497

    
10498
    else:
10499
      raise errors.ProgrammerError("Unhandled export mode %r" %
10500
                                   self.op.mode)
10501

    
10502
    # instance disk type verification
10503
    # TODO: Implement export support for file-based disks
10504
    for disk in self.instance.disks:
10505
      if disk.dev_type == constants.LD_FILE:
10506
        raise errors.OpPrereqError("Export not supported for instances with"
10507
                                   " file-based disks", errors.ECODE_INVAL)
10508

    
10509
  def _CleanupExports(self, feedback_fn):
10510
    """Removes exports of current instance from all other nodes.
10511

10512
    If an instance in a cluster with nodes A..D was exported to node C, its
10513
    exports will be removed from the nodes A, B and D.
10514

10515
    """
10516
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
10517

    
10518
    nodelist = self.cfg.GetNodeList()
10519
    nodelist.remove(self.dst_node.name)
10520

    
10521
    # on one-node clusters nodelist will be empty after the removal
10522
    # if we proceed the backup would be removed because OpBackupQuery
10523
    # substitutes an empty list with the full cluster node list.
10524
    iname = self.instance.name
10525
    if nodelist:
10526
      feedback_fn("Removing old exports for instance %s" % iname)
10527
      exportlist = self.rpc.call_export_list(nodelist)
10528
      for node in exportlist:
10529
        if exportlist[node].fail_msg:
10530
          continue
10531
        if iname in exportlist[node].payload:
10532
          msg = self.rpc.call_export_remove(node, iname).fail_msg
10533
          if msg:
10534
            self.LogWarning("Could not remove older export for instance %s"
10535
                            " on node %s: %s", iname, node, msg)
10536

    
10537
  def Exec(self, feedback_fn):
10538
    """Export an instance to an image in the cluster.
10539

10540
    """
10541
    assert self.op.mode in constants.EXPORT_MODES
10542

    
10543
    instance = self.instance
10544
    src_node = instance.primary_node
10545

    
10546
    if self.op.shutdown:
10547
      # shutdown the instance, but not the disks
10548
      feedback_fn("Shutting down instance %s" % instance.name)
10549
      result = self.rpc.call_instance_shutdown(src_node, instance,
10550
                                               self.op.shutdown_timeout)
10551
      # TODO: Maybe ignore failures if ignore_remove_failures is set
10552
      result.Raise("Could not shutdown instance %s on"
10553
                   " node %s" % (instance.name, src_node))
10554

    
10555
    # set the disks ID correctly since call_instance_start needs the
10556
    # correct drbd minor to create the symlinks
10557
    for disk in instance.disks:
10558
      self.cfg.SetDiskID(disk, src_node)
10559

    
10560
    activate_disks = (not instance.admin_up)
10561

    
10562
    if activate_disks:
10563
      # Activate the instance disks if we'exporting a stopped instance
10564
      feedback_fn("Activating disks for %s" % instance.name)
10565
      _StartInstanceDisks(self, instance, None)
10566

    
10567
    try:
10568
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10569
                                                     instance)
10570

    
10571
      helper.CreateSnapshots()
10572
      try:
10573
        if (self.op.shutdown and instance.admin_up and
10574
            not self.op.remove_instance):
10575
          assert not activate_disks
10576
          feedback_fn("Starting instance %s" % instance.name)
10577
          result = self.rpc.call_instance_start(src_node, instance, None, None)
10578
          msg = result.fail_msg
10579
          if msg:
10580
            feedback_fn("Failed to start instance: %s" % msg)
10581
            _ShutdownInstanceDisks(self, instance)
10582
            raise errors.OpExecError("Could not start instance: %s" % msg)
10583

    
10584
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
10585
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10586
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10587
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
10588
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10589

    
10590
          (key_name, _, _) = self.x509_key_name
10591

    
10592
          dest_ca_pem = \
10593
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10594
                                            self.dest_x509_ca)
10595

    
10596
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10597
                                                     key_name, dest_ca_pem,
10598
                                                     timeouts)
10599
      finally:
10600
        helper.Cleanup()
10601

    
10602
      # Check for backwards compatibility
10603
      assert len(dresults) == len(instance.disks)
10604
      assert compat.all(isinstance(i, bool) for i in dresults), \
10605
             "Not all results are boolean: %r" % dresults
10606

    
10607
    finally:
10608
      if activate_disks:
10609
        feedback_fn("Deactivating disks for %s" % instance.name)
10610
        _ShutdownInstanceDisks(self, instance)
10611

    
10612
    if not (compat.all(dresults) and fin_resu):
10613
      failures = []
10614
      if not fin_resu:
10615
        failures.append("export finalization")
10616
      if not compat.all(dresults):
10617
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10618
                               if not dsk)
10619
        failures.append("disk export: disk(s) %s" % fdsk)
10620

    
10621
      raise errors.OpExecError("Export failed, errors in %s" %
10622
                               utils.CommaJoin(failures))
10623

    
10624
    # At this point, the export was successful, we can cleanup/finish
10625

    
10626
    # Remove instance if requested
10627
    if self.op.remove_instance:
10628
      feedback_fn("Removing instance %s" % instance.name)
10629
      _RemoveInstance(self, feedback_fn, instance,
10630
                      self.op.ignore_remove_failures)
10631

    
10632
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10633
      self._CleanupExports(feedback_fn)
10634

    
10635
    return fin_resu, dresults
10636

    
10637

    
10638
class LUBackupRemove(NoHooksLU):
10639
  """Remove exports related to the named instance.
10640

10641
  """
10642
  REQ_BGL = False
10643

    
10644
  def ExpandNames(self):
10645
    self.needed_locks = {}
10646
    # We need all nodes to be locked in order for RemoveExport to work, but we
10647
    # don't need to lock the instance itself, as nothing will happen to it (and
10648
    # we can remove exports also for a removed instance)
10649
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10650

    
10651
  def Exec(self, feedback_fn):
10652
    """Remove any export.
10653

10654
    """
10655
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10656
    # If the instance was not found we'll try with the name that was passed in.
10657
    # This will only work if it was an FQDN, though.
10658
    fqdn_warn = False
10659
    if not instance_name:
10660
      fqdn_warn = True
10661
      instance_name = self.op.instance_name
10662

    
10663
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10664
    exportlist = self.rpc.call_export_list(locked_nodes)
10665
    found = False
10666
    for node in exportlist:
10667
      msg = exportlist[node].fail_msg
10668
      if msg:
10669
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10670
        continue
10671
      if instance_name in exportlist[node].payload:
10672
        found = True
10673
        result = self.rpc.call_export_remove(node, instance_name)
10674
        msg = result.fail_msg
10675
        if msg:
10676
          logging.error("Could not remove export for instance %s"
10677
                        " on node %s: %s", instance_name, node, msg)
10678

    
10679
    if fqdn_warn and not found:
10680
      feedback_fn("Export not found. If trying to remove an export belonging"
10681
                  " to a deleted instance please use its Fully Qualified"
10682
                  " Domain Name.")
10683

    
10684

    
10685
class LUGroupAdd(LogicalUnit):
10686
  """Logical unit for creating node groups.
10687

10688
  """
10689
  HPATH = "group-add"
10690
  HTYPE = constants.HTYPE_GROUP
10691
  REQ_BGL = False
10692

    
10693
  def ExpandNames(self):
10694
    # We need the new group's UUID here so that we can create and acquire the
10695
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10696
    # that it should not check whether the UUID exists in the configuration.
10697
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10698
    self.needed_locks = {}
10699
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10700

    
10701
  def CheckPrereq(self):
10702
    """Check prerequisites.
10703

10704
    This checks that the given group name is not an existing node group
10705
    already.
10706

10707
    """
10708
    try:
10709
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10710
    except errors.OpPrereqError:
10711
      pass
10712
    else:
10713
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10714
                                 " node group (UUID: %s)" %
10715
                                 (self.op.group_name, existing_uuid),
10716
                                 errors.ECODE_EXISTS)
10717

    
10718
    if self.op.ndparams:
10719
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10720

    
10721
  def BuildHooksEnv(self):
10722
    """Build hooks env.
10723

10724
    """
10725
    return {
10726
      "GROUP_NAME": self.op.group_name,
10727
      }
10728

    
10729
  def BuildHooksNodes(self):
10730
    """Build hooks nodes.
10731

10732
    """
10733
    mn = self.cfg.GetMasterNode()
10734
    return ([mn], [mn])
10735

    
10736
  def Exec(self, feedback_fn):
10737
    """Add the node group to the cluster.
10738

10739
    """
10740
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10741
                                  uuid=self.group_uuid,
10742
                                  alloc_policy=self.op.alloc_policy,
10743
                                  ndparams=self.op.ndparams)
10744

    
10745
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10746
    del self.remove_locks[locking.LEVEL_NODEGROUP]
10747

    
10748

    
10749
class LUGroupAssignNodes(NoHooksLU):
10750
  """Logical unit for assigning nodes to groups.
10751

10752
  """
10753
  REQ_BGL = False
10754

    
10755
  def ExpandNames(self):
10756
    # These raise errors.OpPrereqError on their own:
10757
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10758
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10759

    
10760
    # We want to lock all the affected nodes and groups. We have readily
10761
    # available the list of nodes, and the *destination* group. To gather the
10762
    # list of "source" groups, we need to fetch node information.
10763
    self.node_data = self.cfg.GetAllNodesInfo()
10764
    affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10765
    affected_groups.add(self.group_uuid)
10766

    
10767
    self.needed_locks = {
10768
      locking.LEVEL_NODEGROUP: list(affected_groups),
10769
      locking.LEVEL_NODE: self.op.nodes,
10770
      }
10771

    
10772
  def CheckPrereq(self):
10773
    """Check prerequisites.
10774

10775
    """
10776
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10777
    instance_data = self.cfg.GetAllInstancesInfo()
10778

    
10779
    if self.group is None:
10780
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10781
                               (self.op.group_name, self.group_uuid))
10782

    
10783
    (new_splits, previous_splits) = \
10784
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10785
                                             for node in self.op.nodes],
10786
                                            self.node_data, instance_data)
10787

    
10788
    if new_splits:
10789
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10790

    
10791
      if not self.op.force:
10792
        raise errors.OpExecError("The following instances get split by this"
10793
                                 " change and --force was not given: %s" %
10794
                                 fmt_new_splits)
10795
      else:
10796
        self.LogWarning("This operation will split the following instances: %s",
10797
                        fmt_new_splits)
10798

    
10799
        if previous_splits:
10800
          self.LogWarning("In addition, these already-split instances continue"
10801
                          " to be spit across groups: %s",
10802
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
10803

    
10804
  def Exec(self, feedback_fn):
10805
    """Assign nodes to a new group.
10806

10807
    """
10808
    for node in self.op.nodes:
10809
      self.node_data[node].group = self.group_uuid
10810

    
10811
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10812

    
10813
  @staticmethod
10814
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10815
    """Check for split instances after a node assignment.
10816

10817
    This method considers a series of node assignments as an atomic operation,
10818
    and returns information about split instances after applying the set of
10819
    changes.
10820

10821
    In particular, it returns information about newly split instances, and
10822
    instances that were already split, and remain so after the change.
10823

10824
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10825
    considered.
10826

10827
    @type changes: list of (node_name, new_group_uuid) pairs.
10828
    @param changes: list of node assignments to consider.
10829
    @param node_data: a dict with data for all nodes
10830
    @param instance_data: a dict with all instances to consider
10831
    @rtype: a two-tuple
10832
    @return: a list of instances that were previously okay and result split as a
10833
      consequence of this change, and a list of instances that were previously
10834
      split and this change does not fix.
10835

10836
    """
10837
    changed_nodes = dict((node, group) for node, group in changes
10838
                         if node_data[node].group != group)
10839

    
10840
    all_split_instances = set()
10841
    previously_split_instances = set()
10842

    
10843
    def InstanceNodes(instance):
10844
      return [instance.primary_node] + list(instance.secondary_nodes)
10845

    
10846
    for inst in instance_data.values():
10847
      if inst.disk_template not in constants.DTS_INT_MIRROR:
10848
        continue
10849

    
10850
      instance_nodes = InstanceNodes(inst)
10851

    
10852
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
10853
        previously_split_instances.add(inst.name)
10854

    
10855
      if len(set(changed_nodes.get(node, node_data[node].group)
10856
                 for node in instance_nodes)) > 1:
10857
        all_split_instances.add(inst.name)
10858

    
10859
    return (list(all_split_instances - previously_split_instances),
10860
            list(previously_split_instances & all_split_instances))
10861

    
10862

    
10863
class _GroupQuery(_QueryBase):
10864
  FIELDS = query.GROUP_FIELDS
10865

    
10866
  def ExpandNames(self, lu):
10867
    lu.needed_locks = {}
10868

    
10869
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10870
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10871

    
10872
    if not self.names:
10873
      self.wanted = [name_to_uuid[name]
10874
                     for name in utils.NiceSort(name_to_uuid.keys())]
10875
    else:
10876
      # Accept names to be either names or UUIDs.
10877
      missing = []
10878
      self.wanted = []
10879
      all_uuid = frozenset(self._all_groups.keys())
10880

    
10881
      for name in self.names:
10882
        if name in all_uuid:
10883
          self.wanted.append(name)
10884
        elif name in name_to_uuid:
10885
          self.wanted.append(name_to_uuid[name])
10886
        else:
10887
          missing.append(name)
10888

    
10889
      if missing:
10890
        raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10891
                                   errors.ECODE_NOENT)
10892

    
10893
  def DeclareLocks(self, lu, level):
10894
    pass
10895

    
10896
  def _GetQueryData(self, lu):
10897
    """Computes the list of node groups and their attributes.
10898

10899
    """
10900
    do_nodes = query.GQ_NODE in self.requested_data
10901
    do_instances = query.GQ_INST in self.requested_data
10902

    
10903
    group_to_nodes = None
10904
    group_to_instances = None
10905

    
10906
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10907
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10908
    # latter GetAllInstancesInfo() is not enough, for we have to go through
10909
    # instance->node. Hence, we will need to process nodes even if we only need
10910
    # instance information.
10911
    if do_nodes or do_instances:
10912
      all_nodes = lu.cfg.GetAllNodesInfo()
10913
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10914
      node_to_group = {}
10915

    
10916
      for node in all_nodes.values():
10917
        if node.group in group_to_nodes:
10918
          group_to_nodes[node.group].append(node.name)
10919
          node_to_group[node.name] = node.group
10920

    
10921
      if do_instances:
10922
        all_instances = lu.cfg.GetAllInstancesInfo()
10923
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
10924

    
10925
        for instance in all_instances.values():
10926
          node = instance.primary_node
10927
          if node in node_to_group:
10928
            group_to_instances[node_to_group[node]].append(instance.name)
10929

    
10930
        if not do_nodes:
10931
          # Do not pass on node information if it was not requested.
10932
          group_to_nodes = None
10933

    
10934
    return query.GroupQueryData([self._all_groups[uuid]
10935
                                 for uuid in self.wanted],
10936
                                group_to_nodes, group_to_instances)
10937

    
10938

    
10939
class LUGroupQuery(NoHooksLU):
10940
  """Logical unit for querying node groups.
10941

10942
  """
10943
  REQ_BGL = False
10944

    
10945
  def CheckArguments(self):
10946
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
10947
                          self.op.output_fields, False)
10948

    
10949
  def ExpandNames(self):
10950
    self.gq.ExpandNames(self)
10951

    
10952
  def Exec(self, feedback_fn):
10953
    return self.gq.OldStyleQuery(self)
10954

    
10955

    
10956
class LUGroupSetParams(LogicalUnit):
10957
  """Modifies the parameters of a node group.
10958

10959
  """
10960
  HPATH = "group-modify"
10961
  HTYPE = constants.HTYPE_GROUP
10962
  REQ_BGL = False
10963

    
10964
  def CheckArguments(self):
10965
    all_changes = [
10966
      self.op.ndparams,
10967
      self.op.alloc_policy,
10968
      ]
10969

    
10970
    if all_changes.count(None) == len(all_changes):
10971
      raise errors.OpPrereqError("Please pass at least one modification",
10972
                                 errors.ECODE_INVAL)
10973

    
10974
  def ExpandNames(self):
10975
    # This raises errors.OpPrereqError on its own:
10976
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10977

    
10978
    self.needed_locks = {
10979
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10980
      }
10981

    
10982
  def CheckPrereq(self):
10983
    """Check prerequisites.
10984

10985
    """
10986
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10987

    
10988
    if self.group is None:
10989
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10990
                               (self.op.group_name, self.group_uuid))
10991

    
10992
    if self.op.ndparams:
10993
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10994
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10995
      self.new_ndparams = new_ndparams
10996

    
10997
  def BuildHooksEnv(self):
10998
    """Build hooks env.
10999

11000
    """
11001
    return {
11002
      "GROUP_NAME": self.op.group_name,
11003
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
11004
      }
11005

    
11006
  def BuildHooksNodes(self):
11007
    """Build hooks nodes.
11008

11009
    """
11010
    mn = self.cfg.GetMasterNode()
11011
    return ([mn], [mn])
11012

    
11013
  def Exec(self, feedback_fn):
11014
    """Modifies the node group.
11015

11016
    """
11017
    result = []
11018

    
11019
    if self.op.ndparams:
11020
      self.group.ndparams = self.new_ndparams
11021
      result.append(("ndparams", str(self.group.ndparams)))
11022

    
11023
    if self.op.alloc_policy:
11024
      self.group.alloc_policy = self.op.alloc_policy
11025

    
11026
    self.cfg.Update(self.group, feedback_fn)
11027
    return result
11028

    
11029

    
11030

    
11031
class LUGroupRemove(LogicalUnit):
11032
  HPATH = "group-remove"
11033
  HTYPE = constants.HTYPE_GROUP
11034
  REQ_BGL = False
11035

    
11036
  def ExpandNames(self):
11037
    # This will raises errors.OpPrereqError on its own:
11038
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11039
    self.needed_locks = {
11040
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11041
      }
11042

    
11043
  def CheckPrereq(self):
11044
    """Check prerequisites.
11045

11046
    This checks that the given group name exists as a node group, that is
11047
    empty (i.e., contains no nodes), and that is not the last group of the
11048
    cluster.
11049

11050
    """
11051
    # Verify that the group is empty.
11052
    group_nodes = [node.name
11053
                   for node in self.cfg.GetAllNodesInfo().values()
11054
                   if node.group == self.group_uuid]
11055

    
11056
    if group_nodes:
11057
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
11058
                                 " nodes: %s" %
11059
                                 (self.op.group_name,
11060
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
11061
                                 errors.ECODE_STATE)
11062

    
11063
    # Verify the cluster would not be left group-less.
11064
    if len(self.cfg.GetNodeGroupList()) == 1:
11065
      raise errors.OpPrereqError("Group '%s' is the only group,"
11066
                                 " cannot be removed" %
11067
                                 self.op.group_name,
11068
                                 errors.ECODE_STATE)
11069

    
11070
  def BuildHooksEnv(self):
11071
    """Build hooks env.
11072

11073
    """
11074
    return {
11075
      "GROUP_NAME": self.op.group_name,
11076
      }
11077

    
11078
  def BuildHooksNodes(self):
11079
    """Build hooks nodes.
11080

11081
    """
11082
    mn = self.cfg.GetMasterNode()
11083
    return ([mn], [mn])
11084

    
11085
  def Exec(self, feedback_fn):
11086
    """Remove the node group.
11087

11088
    """
11089
    try:
11090
      self.cfg.RemoveNodeGroup(self.group_uuid)
11091
    except errors.ConfigurationError:
11092
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11093
                               (self.op.group_name, self.group_uuid))
11094

    
11095
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11096

    
11097

    
11098
class LUGroupRename(LogicalUnit):
11099
  HPATH = "group-rename"
11100
  HTYPE = constants.HTYPE_GROUP
11101
  REQ_BGL = False
11102

    
11103
  def ExpandNames(self):
11104
    # This raises errors.OpPrereqError on its own:
11105
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11106

    
11107
    self.needed_locks = {
11108
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11109
      }
11110

    
11111
  def CheckPrereq(self):
11112
    """Check prerequisites.
11113

11114
    Ensures requested new name is not yet used.
11115

11116
    """
11117
    try:
11118
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11119
    except errors.OpPrereqError:
11120
      pass
11121
    else:
11122
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11123
                                 " node group (UUID: %s)" %
11124
                                 (self.op.new_name, new_name_uuid),
11125
                                 errors.ECODE_EXISTS)
11126

    
11127
  def BuildHooksEnv(self):
11128
    """Build hooks env.
11129

11130
    """
11131
    return {
11132
      "OLD_NAME": self.op.group_name,
11133
      "NEW_NAME": self.op.new_name,
11134
      }
11135

    
11136
  def BuildHooksNodes(self):
11137
    """Build hooks nodes.
11138

11139
    """
11140
    mn = self.cfg.GetMasterNode()
11141

    
11142
    all_nodes = self.cfg.GetAllNodesInfo()
11143
    all_nodes.pop(mn, None)
11144

    
11145
    run_nodes = [mn]
11146
    run_nodes.extend(node.name for node in all_nodes.values()
11147
                     if node.group == self.group_uuid)
11148

    
11149
    return (run_nodes, run_nodes)
11150

    
11151
  def Exec(self, feedback_fn):
11152
    """Rename the node group.
11153

11154
    """
11155
    group = self.cfg.GetNodeGroup(self.group_uuid)
11156

    
11157
    if group is None:
11158
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11159
                               (self.op.group_name, self.group_uuid))
11160

    
11161
    group.name = self.op.new_name
11162
    self.cfg.Update(group, feedback_fn)
11163

    
11164
    return self.op.new_name
11165

    
11166

    
11167
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11168
  """Generic tags LU.
11169

11170
  This is an abstract class which is the parent of all the other tags LUs.
11171

11172
  """
11173
  def ExpandNames(self):
11174
    self.group_uuid = None
11175
    self.needed_locks = {}
11176
    if self.op.kind == constants.TAG_NODE:
11177
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11178
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
11179
    elif self.op.kind == constants.TAG_INSTANCE:
11180
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11181
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11182
    elif self.op.kind == constants.TAG_NODEGROUP:
11183
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11184

    
11185
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11186
    # not possible to acquire the BGL based on opcode parameters)
11187

    
11188
  def CheckPrereq(self):
11189
    """Check prerequisites.
11190

11191
    """
11192
    if self.op.kind == constants.TAG_CLUSTER:
11193
      self.target = self.cfg.GetClusterInfo()
11194
    elif self.op.kind == constants.TAG_NODE:
11195
      self.target = self.cfg.GetNodeInfo(self.op.name)
11196
    elif self.op.kind == constants.TAG_INSTANCE:
11197
      self.target = self.cfg.GetInstanceInfo(self.op.name)
11198
    elif self.op.kind == constants.TAG_NODEGROUP:
11199
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
11200
    else:
11201
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11202
                                 str(self.op.kind), errors.ECODE_INVAL)
11203

    
11204

    
11205
class LUTagsGet(TagsLU):
11206
  """Returns the tags of a given object.
11207

11208
  """
11209
  REQ_BGL = False
11210

    
11211
  def ExpandNames(self):
11212
    TagsLU.ExpandNames(self)
11213

    
11214
    # Share locks as this is only a read operation
11215
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11216

    
11217
  def Exec(self, feedback_fn):
11218
    """Returns the tag list.
11219

11220
    """
11221
    return list(self.target.GetTags())
11222

    
11223

    
11224
class LUTagsSearch(NoHooksLU):
11225
  """Searches the tags for a given pattern.
11226

11227
  """
11228
  REQ_BGL = False
11229

    
11230
  def ExpandNames(self):
11231
    self.needed_locks = {}
11232

    
11233
  def CheckPrereq(self):
11234
    """Check prerequisites.
11235

11236
    This checks the pattern passed for validity by compiling it.
11237

11238
    """
11239
    try:
11240
      self.re = re.compile(self.op.pattern)
11241
    except re.error, err:
11242
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11243
                                 (self.op.pattern, err), errors.ECODE_INVAL)
11244

    
11245
  def Exec(self, feedback_fn):
11246
    """Returns the tag list.
11247

11248
    """
11249
    cfg = self.cfg
11250
    tgts = [("/cluster", cfg.GetClusterInfo())]
11251
    ilist = cfg.GetAllInstancesInfo().values()
11252
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11253
    nlist = cfg.GetAllNodesInfo().values()
11254
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11255
    tgts.extend(("/nodegroup/%s" % n.name, n)
11256
                for n in cfg.GetAllNodeGroupsInfo().values())
11257
    results = []
11258
    for path, target in tgts:
11259
      for tag in target.GetTags():
11260
        if self.re.search(tag):
11261
          results.append((path, tag))
11262
    return results
11263

    
11264

    
11265
class LUTagsSet(TagsLU):
11266
  """Sets a tag on a given object.
11267

11268
  """
11269
  REQ_BGL = False
11270

    
11271
  def CheckPrereq(self):
11272
    """Check prerequisites.
11273

11274
    This checks the type and length of the tag name and value.
11275

11276
    """
11277
    TagsLU.CheckPrereq(self)
11278
    for tag in self.op.tags:
11279
      objects.TaggableObject.ValidateTag(tag)
11280

    
11281
  def Exec(self, feedback_fn):
11282
    """Sets the tag.
11283

11284
    """
11285
    try:
11286
      for tag in self.op.tags:
11287
        self.target.AddTag(tag)
11288
    except errors.TagError, err:
11289
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
11290
    self.cfg.Update(self.target, feedback_fn)
11291

    
11292

    
11293
class LUTagsDel(TagsLU):
11294
  """Delete a list of tags from a given object.
11295

11296
  """
11297
  REQ_BGL = False
11298

    
11299
  def CheckPrereq(self):
11300
    """Check prerequisites.
11301

11302
    This checks that we have the given tag.
11303

11304
    """
11305
    TagsLU.CheckPrereq(self)
11306
    for tag in self.op.tags:
11307
      objects.TaggableObject.ValidateTag(tag)
11308
    del_tags = frozenset(self.op.tags)
11309
    cur_tags = self.target.GetTags()
11310

    
11311
    diff_tags = del_tags - cur_tags
11312
    if diff_tags:
11313
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
11314
      raise errors.OpPrereqError("Tag(s) %s not found" %
11315
                                 (utils.CommaJoin(diff_names), ),
11316
                                 errors.ECODE_NOENT)
11317

    
11318
  def Exec(self, feedback_fn):
11319
    """Remove the tag from the object.
11320

11321
    """
11322
    for tag in self.op.tags:
11323
      self.target.RemoveTag(tag)
11324
    self.cfg.Update(self.target, feedback_fn)
11325

    
11326

    
11327
class LUTestDelay(NoHooksLU):
11328
  """Sleep for a specified amount of time.
11329

11330
  This LU sleeps on the master and/or nodes for a specified amount of
11331
  time.
11332

11333
  """
11334
  REQ_BGL = False
11335

    
11336
  def ExpandNames(self):
11337
    """Expand names and set required locks.
11338

11339
    This expands the node list, if any.
11340

11341
    """
11342
    self.needed_locks = {}
11343
    if self.op.on_nodes:
11344
      # _GetWantedNodes can be used here, but is not always appropriate to use
11345
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11346
      # more information.
11347
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11348
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11349

    
11350
  def _TestDelay(self):
11351
    """Do the actual sleep.
11352

11353
    """
11354
    if self.op.on_master:
11355
      if not utils.TestDelay(self.op.duration):
11356
        raise errors.OpExecError("Error during master delay test")
11357
    if self.op.on_nodes:
11358
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11359
      for node, node_result in result.items():
11360
        node_result.Raise("Failure during rpc call to node %s" % node)
11361

    
11362
  def Exec(self, feedback_fn):
11363
    """Execute the test delay opcode, with the wanted repetitions.
11364

11365
    """
11366
    if self.op.repeat == 0:
11367
      self._TestDelay()
11368
    else:
11369
      top_value = self.op.repeat - 1
11370
      for i in range(self.op.repeat):
11371
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11372
        self._TestDelay()
11373

    
11374

    
11375
class LUTestJqueue(NoHooksLU):
11376
  """Utility LU to test some aspects of the job queue.
11377

11378
  """
11379
  REQ_BGL = False
11380

    
11381
  # Must be lower than default timeout for WaitForJobChange to see whether it
11382
  # notices changed jobs
11383
  _CLIENT_CONNECT_TIMEOUT = 20.0
11384
  _CLIENT_CONFIRM_TIMEOUT = 60.0
11385

    
11386
  @classmethod
11387
  def _NotifyUsingSocket(cls, cb, errcls):
11388
    """Opens a Unix socket and waits for another program to connect.
11389

11390
    @type cb: callable
11391
    @param cb: Callback to send socket name to client
11392
    @type errcls: class
11393
    @param errcls: Exception class to use for errors
11394

11395
    """
11396
    # Using a temporary directory as there's no easy way to create temporary
11397
    # sockets without writing a custom loop around tempfile.mktemp and
11398
    # socket.bind
11399
    tmpdir = tempfile.mkdtemp()
11400
    try:
11401
      tmpsock = utils.PathJoin(tmpdir, "sock")
11402

    
11403
      logging.debug("Creating temporary socket at %s", tmpsock)
11404
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11405
      try:
11406
        sock.bind(tmpsock)
11407
        sock.listen(1)
11408

    
11409
        # Send details to client
11410
        cb(tmpsock)
11411

    
11412
        # Wait for client to connect before continuing
11413
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11414
        try:
11415
          (conn, _) = sock.accept()
11416
        except socket.error, err:
11417
          raise errcls("Client didn't connect in time (%s)" % err)
11418
      finally:
11419
        sock.close()
11420
    finally:
11421
      # Remove as soon as client is connected
11422
      shutil.rmtree(tmpdir)
11423

    
11424
    # Wait for client to close
11425
    try:
11426
      try:
11427
        # pylint: disable-msg=E1101
11428
        # Instance of '_socketobject' has no ... member
11429
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11430
        conn.recv(1)
11431
      except socket.error, err:
11432
        raise errcls("Client failed to confirm notification (%s)" % err)
11433
    finally:
11434
      conn.close()
11435

    
11436
  def _SendNotification(self, test, arg, sockname):
11437
    """Sends a notification to the client.
11438

11439
    @type test: string
11440
    @param test: Test name
11441
    @param arg: Test argument (depends on test)
11442
    @type sockname: string
11443
    @param sockname: Socket path
11444

11445
    """
11446
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11447

    
11448
  def _Notify(self, prereq, test, arg):
11449
    """Notifies the client of a test.
11450

11451
    @type prereq: bool
11452
    @param prereq: Whether this is a prereq-phase test
11453
    @type test: string
11454
    @param test: Test name
11455
    @param arg: Test argument (depends on test)
11456

11457
    """
11458
    if prereq:
11459
      errcls = errors.OpPrereqError
11460
    else:
11461
      errcls = errors.OpExecError
11462

    
11463
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11464
                                                  test, arg),
11465
                                   errcls)
11466

    
11467
  def CheckArguments(self):
11468
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11469
    self.expandnames_calls = 0
11470

    
11471
  def ExpandNames(self):
11472
    checkargs_calls = getattr(self, "checkargs_calls", 0)
11473
    if checkargs_calls < 1:
11474
      raise errors.ProgrammerError("CheckArguments was not called")
11475

    
11476
    self.expandnames_calls += 1
11477

    
11478
    if self.op.notify_waitlock:
11479
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
11480

    
11481
    self.LogInfo("Expanding names")
11482

    
11483
    # Get lock on master node (just to get a lock, not for a particular reason)
11484
    self.needed_locks = {
11485
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11486
      }
11487

    
11488
  def Exec(self, feedback_fn):
11489
    if self.expandnames_calls < 1:
11490
      raise errors.ProgrammerError("ExpandNames was not called")
11491

    
11492
    if self.op.notify_exec:
11493
      self._Notify(False, constants.JQT_EXEC, None)
11494

    
11495
    self.LogInfo("Executing")
11496

    
11497
    if self.op.log_messages:
11498
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11499
      for idx, msg in enumerate(self.op.log_messages):
11500
        self.LogInfo("Sending log message %s", idx + 1)
11501
        feedback_fn(constants.JQT_MSGPREFIX + msg)
11502
        # Report how many test messages have been sent
11503
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11504

    
11505
    if self.op.fail:
11506
      raise errors.OpExecError("Opcode failure was requested")
11507

    
11508
    return True
11509

    
11510

    
11511
class IAllocator(object):
11512
  """IAllocator framework.
11513

11514
  An IAllocator instance has three sets of attributes:
11515
    - cfg that is needed to query the cluster
11516
    - input data (all members of the _KEYS class attribute are required)
11517
    - four buffer attributes (in|out_data|text), that represent the
11518
      input (to the external script) in text and data structure format,
11519
      and the output from it, again in two formats
11520
    - the result variables from the script (success, info, nodes) for
11521
      easy usage
11522

11523
  """
11524
  # pylint: disable-msg=R0902
11525
  # lots of instance attributes
11526
  _ALLO_KEYS = [
11527
    "name", "mem_size", "disks", "disk_template",
11528
    "os", "tags", "nics", "vcpus", "hypervisor",
11529
    ]
11530
  _RELO_KEYS = [
11531
    "name", "relocate_from",
11532
    ]
11533
  _EVAC_KEYS = [
11534
    "evac_nodes",
11535
    ]
11536

    
11537
  def __init__(self, cfg, rpc, mode, **kwargs):
11538
    self.cfg = cfg
11539
    self.rpc = rpc
11540
    # init buffer variables
11541
    self.in_text = self.out_text = self.in_data = self.out_data = None
11542
    # init all input fields so that pylint is happy
11543
    self.mode = mode
11544
    self.mem_size = self.disks = self.disk_template = None
11545
    self.os = self.tags = self.nics = self.vcpus = None
11546
    self.hypervisor = None
11547
    self.relocate_from = None
11548
    self.name = None
11549
    self.evac_nodes = None
11550
    # computed fields
11551
    self.required_nodes = None
11552
    # init result fields
11553
    self.success = self.info = self.result = None
11554
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11555
      keyset = self._ALLO_KEYS
11556
      fn = self._AddNewInstance
11557
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11558
      keyset = self._RELO_KEYS
11559
      fn = self._AddRelocateInstance
11560
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11561
      keyset = self._EVAC_KEYS
11562
      fn = self._AddEvacuateNodes
11563
    else:
11564
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11565
                                   " IAllocator" % self.mode)
11566
    for key in kwargs:
11567
      if key not in keyset:
11568
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
11569
                                     " IAllocator" % key)
11570
      setattr(self, key, kwargs[key])
11571

    
11572
    for key in keyset:
11573
      if key not in kwargs:
11574
        raise errors.ProgrammerError("Missing input parameter '%s' to"
11575
                                     " IAllocator" % key)
11576
    self._BuildInputData(fn)
11577

    
11578
  def _ComputeClusterData(self):
11579
    """Compute the generic allocator input data.
11580

11581
    This is the data that is independent of the actual operation.
11582

11583
    """
11584
    cfg = self.cfg
11585
    cluster_info = cfg.GetClusterInfo()
11586
    # cluster data
11587
    data = {
11588
      "version": constants.IALLOCATOR_VERSION,
11589
      "cluster_name": cfg.GetClusterName(),
11590
      "cluster_tags": list(cluster_info.GetTags()),
11591
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11592
      # we don't have job IDs
11593
      }
11594
    ninfo = cfg.GetAllNodesInfo()
11595
    iinfo = cfg.GetAllInstancesInfo().values()
11596
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11597

    
11598
    # node data
11599
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
11600

    
11601
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11602
      hypervisor_name = self.hypervisor
11603
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11604
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11605
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11606
      hypervisor_name = cluster_info.enabled_hypervisors[0]
11607

    
11608
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11609
                                        hypervisor_name)
11610
    node_iinfo = \
11611
      self.rpc.call_all_instances_info(node_list,
11612
                                       cluster_info.enabled_hypervisors)
11613

    
11614
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11615

    
11616
    config_ndata = self._ComputeBasicNodeData(ninfo)
11617
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11618
                                                 i_list, config_ndata)
11619
    assert len(data["nodes"]) == len(ninfo), \
11620
        "Incomplete node data computed"
11621

    
11622
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11623

    
11624
    self.in_data = data
11625

    
11626
  @staticmethod
11627
  def _ComputeNodeGroupData(cfg):
11628
    """Compute node groups data.
11629

11630
    """
11631
    ng = {}
11632
    for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11633
      ng[guuid] = {
11634
        "name": gdata.name,
11635
        "alloc_policy": gdata.alloc_policy,
11636
        }
11637
    return ng
11638

    
11639
  @staticmethod
11640
  def _ComputeBasicNodeData(node_cfg):
11641
    """Compute global node data.
11642

11643
    @rtype: dict
11644
    @returns: a dict of name: (node dict, node config)
11645

11646
    """
11647
    node_results = {}
11648
    for ninfo in node_cfg.values():
11649
      # fill in static (config-based) values
11650
      pnr = {
11651
        "tags": list(ninfo.GetTags()),
11652
        "primary_ip": ninfo.primary_ip,
11653
        "secondary_ip": ninfo.secondary_ip,
11654
        "offline": ninfo.offline,
11655
        "drained": ninfo.drained,
11656
        "master_candidate": ninfo.master_candidate,
11657
        "group": ninfo.group,
11658
        "master_capable": ninfo.master_capable,
11659
        "vm_capable": ninfo.vm_capable,
11660
        }
11661

    
11662
      node_results[ninfo.name] = pnr
11663

    
11664
    return node_results
11665

    
11666
  @staticmethod
11667
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11668
                              node_results):
11669
    """Compute global node data.
11670

11671
    @param node_results: the basic node structures as filled from the config
11672

11673
    """
11674
    # make a copy of the current dict
11675
    node_results = dict(node_results)
11676
    for nname, nresult in node_data.items():
11677
      assert nname in node_results, "Missing basic data for node %s" % nname
11678
      ninfo = node_cfg[nname]
11679

    
11680
      if not (ninfo.offline or ninfo.drained):
11681
        nresult.Raise("Can't get data for node %s" % nname)
11682
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11683
                                nname)
11684
        remote_info = nresult.payload
11685

    
11686
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
11687
                     'vg_size', 'vg_free', 'cpu_total']:
11688
          if attr not in remote_info:
11689
            raise errors.OpExecError("Node '%s' didn't return attribute"
11690
                                     " '%s'" % (nname, attr))
11691
          if not isinstance(remote_info[attr], int):
11692
            raise errors.OpExecError("Node '%s' returned invalid value"
11693
                                     " for '%s': %s" %
11694
                                     (nname, attr, remote_info[attr]))
11695
        # compute memory used by primary instances
11696
        i_p_mem = i_p_up_mem = 0
11697
        for iinfo, beinfo in i_list:
11698
          if iinfo.primary_node == nname:
11699
            i_p_mem += beinfo[constants.BE_MEMORY]
11700
            if iinfo.name not in node_iinfo[nname].payload:
11701
              i_used_mem = 0
11702
            else:
11703
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11704
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11705
            remote_info['memory_free'] -= max(0, i_mem_diff)
11706

    
11707
            if iinfo.admin_up:
11708
              i_p_up_mem += beinfo[constants.BE_MEMORY]
11709

    
11710
        # compute memory used by instances
11711
        pnr_dyn = {
11712
          "total_memory": remote_info['memory_total'],
11713
          "reserved_memory": remote_info['memory_dom0'],
11714
          "free_memory": remote_info['memory_free'],
11715
          "total_disk": remote_info['vg_size'],
11716
          "free_disk": remote_info['vg_free'],
11717
          "total_cpus": remote_info['cpu_total'],
11718
          "i_pri_memory": i_p_mem,
11719
          "i_pri_up_memory": i_p_up_mem,
11720
          }
11721
        pnr_dyn.update(node_results[nname])
11722
        node_results[nname] = pnr_dyn
11723

    
11724
    return node_results
11725

    
11726
  @staticmethod
11727
  def _ComputeInstanceData(cluster_info, i_list):
11728
    """Compute global instance data.
11729

11730
    """
11731
    instance_data = {}
11732
    for iinfo, beinfo in i_list:
11733
      nic_data = []
11734
      for nic in iinfo.nics:
11735
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11736
        nic_dict = {"mac": nic.mac,
11737
                    "ip": nic.ip,
11738
                    "mode": filled_params[constants.NIC_MODE],
11739
                    "link": filled_params[constants.NIC_LINK],
11740
                   }
11741
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11742
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11743
        nic_data.append(nic_dict)
11744
      pir = {
11745
        "tags": list(iinfo.GetTags()),
11746
        "admin_up": iinfo.admin_up,
11747
        "vcpus": beinfo[constants.BE_VCPUS],
11748
        "memory": beinfo[constants.BE_MEMORY],
11749
        "os": iinfo.os,
11750
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11751
        "nics": nic_data,
11752
        "disks": [{constants.IDISK_SIZE: dsk.size,
11753
                   constants.IDISK_MODE: dsk.mode}
11754
                  for dsk in iinfo.disks],
11755
        "disk_template": iinfo.disk_template,
11756
        "hypervisor": iinfo.hypervisor,
11757
        }
11758
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11759
                                                 pir["disks"])
11760
      instance_data[iinfo.name] = pir
11761

    
11762
    return instance_data
11763

    
11764
  def _AddNewInstance(self):
11765
    """Add new instance data to allocator structure.
11766

11767
    This in combination with _AllocatorGetClusterData will create the
11768
    correct structure needed as input for the allocator.
11769

11770
    The checks for the completeness of the opcode must have already been
11771
    done.
11772

11773
    """
11774
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11775

    
11776
    if self.disk_template in constants.DTS_INT_MIRROR:
11777
      self.required_nodes = 2
11778
    else:
11779
      self.required_nodes = 1
11780
    request = {
11781
      "name": self.name,
11782
      "disk_template": self.disk_template,
11783
      "tags": self.tags,
11784
      "os": self.os,
11785
      "vcpus": self.vcpus,
11786
      "memory": self.mem_size,
11787
      "disks": self.disks,
11788
      "disk_space_total": disk_space,
11789
      "nics": self.nics,
11790
      "required_nodes": self.required_nodes,
11791
      }
11792
    return request
11793

    
11794
  def _AddRelocateInstance(self):
11795
    """Add relocate instance data to allocator structure.
11796

11797
    This in combination with _IAllocatorGetClusterData will create the
11798
    correct structure needed as input for the allocator.
11799

11800
    The checks for the completeness of the opcode must have already been
11801
    done.
11802

11803
    """
11804
    instance = self.cfg.GetInstanceInfo(self.name)
11805
    if instance is None:
11806
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
11807
                                   " IAllocator" % self.name)
11808

    
11809
    if instance.disk_template not in constants.DTS_MIRRORED:
11810
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11811
                                 errors.ECODE_INVAL)
11812

    
11813
    if instance.disk_template in constants.DTS_INT_MIRROR and \
11814
        len(instance.secondary_nodes) != 1:
11815
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
11816
                                 errors.ECODE_STATE)
11817

    
11818
    self.required_nodes = 1
11819
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
11820
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11821

    
11822
    request = {
11823
      "name": self.name,
11824
      "disk_space_total": disk_space,
11825
      "required_nodes": self.required_nodes,
11826
      "relocate_from": self.relocate_from,
11827
      }
11828
    return request
11829

    
11830
  def _AddEvacuateNodes(self):
11831
    """Add evacuate nodes data to allocator structure.
11832

11833
    """
11834
    request = {
11835
      "evac_nodes": self.evac_nodes
11836
      }
11837
    return request
11838

    
11839
  def _BuildInputData(self, fn):
11840
    """Build input data structures.
11841

11842
    """
11843
    self._ComputeClusterData()
11844

    
11845
    request = fn()
11846
    request["type"] = self.mode
11847
    self.in_data["request"] = request
11848

    
11849
    self.in_text = serializer.Dump(self.in_data)
11850

    
11851
  def Run(self, name, validate=True, call_fn=None):
11852
    """Run an instance allocator and return the results.
11853

11854
    """
11855
    if call_fn is None:
11856
      call_fn = self.rpc.call_iallocator_runner
11857

    
11858
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11859
    result.Raise("Failure while running the iallocator script")
11860

    
11861
    self.out_text = result.payload
11862
    if validate:
11863
      self._ValidateResult()
11864

    
11865
  def _ValidateResult(self):
11866
    """Process the allocator results.
11867

11868
    This will process and if successful save the result in
11869
    self.out_data and the other parameters.
11870

11871
    """
11872
    try:
11873
      rdict = serializer.Load(self.out_text)
11874
    except Exception, err:
11875
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11876

    
11877
    if not isinstance(rdict, dict):
11878
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
11879

    
11880
    # TODO: remove backwards compatiblity in later versions
11881
    if "nodes" in rdict and "result" not in rdict:
11882
      rdict["result"] = rdict["nodes"]
11883
      del rdict["nodes"]
11884

    
11885
    for key in "success", "info", "result":
11886
      if key not in rdict:
11887
        raise errors.OpExecError("Can't parse iallocator results:"
11888
                                 " missing key '%s'" % key)
11889
      setattr(self, key, rdict[key])
11890

    
11891
    if not isinstance(rdict["result"], list):
11892
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11893
                               " is not a list")
11894

    
11895
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
11896
      assert self.relocate_from is not None
11897
      assert self.required_nodes == 1
11898

    
11899
      node2group = dict((name, ndata["group"])
11900
                        for (name, ndata) in self.in_data["nodes"].items())
11901

    
11902
      fn = compat.partial(self._NodesToGroups, node2group,
11903
                          self.in_data["nodegroups"])
11904

    
11905
      request_groups = fn(self.relocate_from)
11906
      result_groups = fn(rdict["result"])
11907

    
11908
      if result_groups != request_groups:
11909
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
11910
                                 " differ from original groups (%s)" %
11911
                                 (utils.CommaJoin(result_groups),
11912
                                  utils.CommaJoin(request_groups)))
11913

    
11914
    self.out_data = rdict
11915

    
11916
  @staticmethod
11917
  def _NodesToGroups(node2group, groups, nodes):
11918
    """Returns a list of unique group names for a list of nodes.
11919

11920
    @type node2group: dict
11921
    @param node2group: Map from node name to group UUID
11922
    @type groups: dict
11923
    @param groups: Group information
11924
    @type nodes: list
11925
    @param nodes: Node names
11926

11927
    """
11928
    result = set()
11929

    
11930
    for node in nodes:
11931
      try:
11932
        group_uuid = node2group[node]
11933
      except KeyError:
11934
        # Ignore unknown node
11935
        pass
11936
      else:
11937
        try:
11938
          group = groups[group_uuid]
11939
        except KeyError:
11940
          # Can't find group, let's use UUID
11941
          group_name = group_uuid
11942
        else:
11943
          group_name = group["name"]
11944

    
11945
        result.add(group_name)
11946

    
11947
    return sorted(result)
11948

    
11949

    
11950
class LUTestAllocator(NoHooksLU):
11951
  """Run allocator tests.
11952

11953
  This LU runs the allocator tests
11954

11955
  """
11956
  def CheckPrereq(self):
11957
    """Check prerequisites.
11958

11959
    This checks the opcode parameters depending on the director and mode test.
11960

11961
    """
11962
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11963
      for attr in ["mem_size", "disks", "disk_template",
11964
                   "os", "tags", "nics", "vcpus"]:
11965
        if not hasattr(self.op, attr):
11966
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11967
                                     attr, errors.ECODE_INVAL)
11968
      iname = self.cfg.ExpandInstanceName(self.op.name)
11969
      if iname is not None:
11970
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11971
                                   iname, errors.ECODE_EXISTS)
11972
      if not isinstance(self.op.nics, list):
11973
        raise errors.OpPrereqError("Invalid parameter 'nics'",
11974
                                   errors.ECODE_INVAL)
11975
      if not isinstance(self.op.disks, list):
11976
        raise errors.OpPrereqError("Invalid parameter 'disks'",
11977
                                   errors.ECODE_INVAL)
11978
      for row in self.op.disks:
11979
        if (not isinstance(row, dict) or
11980
            "size" not in row or
11981
            not isinstance(row["size"], int) or
11982
            "mode" not in row or
11983
            row["mode"] not in ['r', 'w']):
11984
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
11985
                                     " parameter", errors.ECODE_INVAL)
11986
      if self.op.hypervisor is None:
11987
        self.op.hypervisor = self.cfg.GetHypervisorType()
11988
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11989
      fname = _ExpandInstanceName(self.cfg, self.op.name)
11990
      self.op.name = fname
11991
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11992
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11993
      if not hasattr(self.op, "evac_nodes"):
11994
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11995
                                   " opcode input", errors.ECODE_INVAL)
11996
    else:
11997
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11998
                                 self.op.mode, errors.ECODE_INVAL)
11999

    
12000
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12001
      if self.op.allocator is None:
12002
        raise errors.OpPrereqError("Missing allocator name",
12003
                                   errors.ECODE_INVAL)
12004
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12005
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
12006
                                 self.op.direction, errors.ECODE_INVAL)
12007

    
12008
  def Exec(self, feedback_fn):
12009
    """Run the allocator test.
12010

12011
    """
12012
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12013
      ial = IAllocator(self.cfg, self.rpc,
12014
                       mode=self.op.mode,
12015
                       name=self.op.name,
12016
                       mem_size=self.op.mem_size,
12017
                       disks=self.op.disks,
12018
                       disk_template=self.op.disk_template,
12019
                       os=self.op.os,
12020
                       tags=self.op.tags,
12021
                       nics=self.op.nics,
12022
                       vcpus=self.op.vcpus,
12023
                       hypervisor=self.op.hypervisor,
12024
                       )
12025
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12026
      ial = IAllocator(self.cfg, self.rpc,
12027
                       mode=self.op.mode,
12028
                       name=self.op.name,
12029
                       relocate_from=list(self.relocate_from),
12030
                       )
12031
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12032
      ial = IAllocator(self.cfg, self.rpc,
12033
                       mode=self.op.mode,
12034
                       evac_nodes=self.op.evac_nodes)
12035
    else:
12036
      raise errors.ProgrammerError("Uncatched mode %s in"
12037
                                   " LUTestAllocator.Exec", self.op.mode)
12038

    
12039
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
12040
      result = ial.in_text
12041
    else:
12042
      ial.Run(self.op.allocator, validate=False)
12043
      result = ial.out_text
12044
    return result
12045

    
12046

    
12047
#: Query type implementations
12048
_QUERY_IMPL = {
12049
  constants.QR_INSTANCE: _InstanceQuery,
12050
  constants.QR_NODE: _NodeQuery,
12051
  constants.QR_GROUP: _GroupQuery,
12052
  constants.QR_OS: _OsQuery,
12053
  }
12054

    
12055
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12056

    
12057

    
12058
def _GetQueryImplementation(name):
12059
  """Returns the implemtnation for a query type.
12060

12061
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
12062

12063
  """
12064
  try:
12065
    return _QUERY_IMPL[name]
12066
  except KeyError:
12067
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12068
                               errors.ECODE_INVAL)