Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ c6627031

History | View | Annotate | Download (421.6 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import query
58
from ganeti import qlang
59
from ganeti import opcodes
60

    
61
import ganeti.masterd.instance # pylint: disable-msg=W0611
62

    
63

    
64
def _SupportsOob(cfg, node):
65
  """Tells if node supports OOB.
66

67
  @type cfg: L{config.ConfigWriter}
68
  @param cfg: The cluster configuration
69
  @type node: L{objects.Node}
70
  @param node: The node
71
  @return: The OOB script if supported or an empty string otherwise
72

73
  """
74
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
75

    
76

    
77
class ResultWithJobs:
78
  """Data container for LU results with jobs.
79

80
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
81
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
82
  contained in the C{jobs} attribute and include the job IDs in the opcode
83
  result.
84

85
  """
86
  def __init__(self, jobs, **kwargs):
87
    """Initializes this class.
88

89
    Additional return values can be specified as keyword arguments.
90

91
    @type jobs: list of lists of L{opcode.OpCode}
92
    @param jobs: A list of lists of opcode objects
93

94
    """
95
    self.jobs = jobs
96
    self.other = kwargs
97

    
98

    
99
class LogicalUnit(object):
100
  """Logical Unit base class.
101

102
  Subclasses must follow these rules:
103
    - implement ExpandNames
104
    - implement CheckPrereq (except when tasklets are used)
105
    - implement Exec (except when tasklets are used)
106
    - implement BuildHooksEnv
107
    - implement BuildHooksNodes
108
    - redefine HPATH and HTYPE
109
    - optionally redefine their run requirements:
110
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
111

112
  Note that all commands require root permissions.
113

114
  @ivar dry_run_result: the value (if any) that will be returned to the caller
115
      in dry-run mode (signalled by opcode dry_run parameter)
116

117
  """
118
  HPATH = None
119
  HTYPE = None
120
  REQ_BGL = True
121

    
122
  def __init__(self, processor, op, context, rpc):
123
    """Constructor for LogicalUnit.
124

125
    This needs to be overridden in derived classes in order to check op
126
    validity.
127

128
    """
129
    self.proc = processor
130
    self.op = op
131
    self.cfg = context.cfg
132
    self.context = context
133
    self.rpc = rpc
134
    # Dicts used to declare locking needs to mcpu
135
    self.needed_locks = None
136
    self.acquired_locks = {}
137
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
138
    self.add_locks = {}
139
    self.remove_locks = {}
140
    # Used to force good behavior when calling helper functions
141
    self.recalculate_locks = {}
142
    self.__ssh = None
143
    # logging
144
    self.Log = processor.Log # pylint: disable-msg=C0103
145
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
146
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
147
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
148
    # support for dry-run
149
    self.dry_run_result = None
150
    # support for generic debug attribute
151
    if (not hasattr(self.op, "debug_level") or
152
        not isinstance(self.op.debug_level, int)):
153
      self.op.debug_level = 0
154

    
155
    # Tasklets
156
    self.tasklets = None
157

    
158
    # Validate opcode parameters and set defaults
159
    self.op.Validate(True)
160

    
161
    self.CheckArguments()
162

    
163
  def __GetSSH(self):
164
    """Returns the SshRunner object
165

166
    """
167
    if not self.__ssh:
168
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
169
    return self.__ssh
170

    
171
  ssh = property(fget=__GetSSH)
172

    
173
  def CheckArguments(self):
174
    """Check syntactic validity for the opcode arguments.
175

176
    This method is for doing a simple syntactic check and ensure
177
    validity of opcode parameters, without any cluster-related
178
    checks. While the same can be accomplished in ExpandNames and/or
179
    CheckPrereq, doing these separate is better because:
180

181
      - ExpandNames is left as as purely a lock-related function
182
      - CheckPrereq is run after we have acquired locks (and possible
183
        waited for them)
184

185
    The function is allowed to change the self.op attribute so that
186
    later methods can no longer worry about missing parameters.
187

188
    """
189
    pass
190

    
191
  def ExpandNames(self):
192
    """Expand names for this LU.
193

194
    This method is called before starting to execute the opcode, and it should
195
    update all the parameters of the opcode to their canonical form (e.g. a
196
    short node name must be fully expanded after this method has successfully
197
    completed). This way locking, hooks, logging, etc. can work correctly.
198

199
    LUs which implement this method must also populate the self.needed_locks
200
    member, as a dict with lock levels as keys, and a list of needed lock names
201
    as values. Rules:
202

203
      - use an empty dict if you don't need any lock
204
      - if you don't need any lock at a particular level omit that level
205
      - don't put anything for the BGL level
206
      - if you want all locks at a level use locking.ALL_SET as a value
207

208
    If you need to share locks (rather than acquire them exclusively) at one
209
    level you can modify self.share_locks, setting a true value (usually 1) for
210
    that level. By default locks are not shared.
211

212
    This function can also define a list of tasklets, which then will be
213
    executed in order instead of the usual LU-level CheckPrereq and Exec
214
    functions, if those are not defined by the LU.
215

216
    Examples::
217

218
      # Acquire all nodes and one instance
219
      self.needed_locks = {
220
        locking.LEVEL_NODE: locking.ALL_SET,
221
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
222
      }
223
      # Acquire just two nodes
224
      self.needed_locks = {
225
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
226
      }
227
      # Acquire no locks
228
      self.needed_locks = {} # No, you can't leave it to the default value None
229

230
    """
231
    # The implementation of this method is mandatory only if the new LU is
232
    # concurrent, so that old LUs don't need to be changed all at the same
233
    # time.
234
    if self.REQ_BGL:
235
      self.needed_locks = {} # Exclusive LUs don't need locks.
236
    else:
237
      raise NotImplementedError
238

    
239
  def DeclareLocks(self, level):
240
    """Declare LU locking needs for a level
241

242
    While most LUs can just declare their locking needs at ExpandNames time,
243
    sometimes there's the need to calculate some locks after having acquired
244
    the ones before. This function is called just before acquiring locks at a
245
    particular level, but after acquiring the ones at lower levels, and permits
246
    such calculations. It can be used to modify self.needed_locks, and by
247
    default it does nothing.
248

249
    This function is only called if you have something already set in
250
    self.needed_locks for the level.
251

252
    @param level: Locking level which is going to be locked
253
    @type level: member of ganeti.locking.LEVELS
254

255
    """
256

    
257
  def CheckPrereq(self):
258
    """Check prerequisites for this LU.
259

260
    This method should check that the prerequisites for the execution
261
    of this LU are fulfilled. It can do internode communication, but
262
    it should be idempotent - no cluster or system changes are
263
    allowed.
264

265
    The method should raise errors.OpPrereqError in case something is
266
    not fulfilled. Its return value is ignored.
267

268
    This method should also update all the parameters of the opcode to
269
    their canonical form if it hasn't been done by ExpandNames before.
270

271
    """
272
    if self.tasklets is not None:
273
      for (idx, tl) in enumerate(self.tasklets):
274
        logging.debug("Checking prerequisites for tasklet %s/%s",
275
                      idx + 1, len(self.tasklets))
276
        tl.CheckPrereq()
277
    else:
278
      pass
279

    
280
  def Exec(self, feedback_fn):
281
    """Execute the LU.
282

283
    This method should implement the actual work. It should raise
284
    errors.OpExecError for failures that are somewhat dealt with in
285
    code, or expected.
286

287
    """
288
    if self.tasklets is not None:
289
      for (idx, tl) in enumerate(self.tasklets):
290
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
291
        tl.Exec(feedback_fn)
292
    else:
293
      raise NotImplementedError
294

    
295
  def BuildHooksEnv(self):
296
    """Build hooks environment for this LU.
297

298
    @rtype: dict
299
    @return: Dictionary containing the environment that will be used for
300
      running the hooks for this LU. The keys of the dict must not be prefixed
301
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
302
      will extend the environment with additional variables. If no environment
303
      should be defined, an empty dictionary should be returned (not C{None}).
304
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
305
      will not be called.
306

307
    """
308
    raise NotImplementedError
309

    
310
  def BuildHooksNodes(self):
311
    """Build list of nodes to run LU's hooks.
312

313
    @rtype: tuple; (list, list)
314
    @return: Tuple containing a list of node names on which the hook
315
      should run before the execution and a list of node names on which the
316
      hook should run after the execution. No nodes should be returned as an
317
      empty list (and not None).
318
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
319
      will not be called.
320

321
    """
322
    raise NotImplementedError
323

    
324
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
325
    """Notify the LU about the results of its hooks.
326

327
    This method is called every time a hooks phase is executed, and notifies
328
    the Logical Unit about the hooks' result. The LU can then use it to alter
329
    its result based on the hooks.  By default the method does nothing and the
330
    previous result is passed back unchanged but any LU can define it if it
331
    wants to use the local cluster hook-scripts somehow.
332

333
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
334
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
335
    @param hook_results: the results of the multi-node hooks rpc call
336
    @param feedback_fn: function used send feedback back to the caller
337
    @param lu_result: the previous Exec result this LU had, or None
338
        in the PRE phase
339
    @return: the new Exec result, based on the previous result
340
        and hook results
341

342
    """
343
    # API must be kept, thus we ignore the unused argument and could
344
    # be a function warnings
345
    # pylint: disable-msg=W0613,R0201
346
    return lu_result
347

    
348
  def _ExpandAndLockInstance(self):
349
    """Helper function to expand and lock an instance.
350

351
    Many LUs that work on an instance take its name in self.op.instance_name
352
    and need to expand it and then declare the expanded name for locking. This
353
    function does it, and then updates self.op.instance_name to the expanded
354
    name. It also initializes needed_locks as a dict, if this hasn't been done
355
    before.
356

357
    """
358
    if self.needed_locks is None:
359
      self.needed_locks = {}
360
    else:
361
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
362
        "_ExpandAndLockInstance called with instance-level locks set"
363
    self.op.instance_name = _ExpandInstanceName(self.cfg,
364
                                                self.op.instance_name)
365
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
366

    
367
  def _LockInstancesNodes(self, primary_only=False):
368
    """Helper function to declare instances' nodes for locking.
369

370
    This function should be called after locking one or more instances to lock
371
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
372
    with all primary or secondary nodes for instances already locked and
373
    present in self.needed_locks[locking.LEVEL_INSTANCE].
374

375
    It should be called from DeclareLocks, and for safety only works if
376
    self.recalculate_locks[locking.LEVEL_NODE] is set.
377

378
    In the future it may grow parameters to just lock some instance's nodes, or
379
    to just lock primaries or secondary nodes, if needed.
380

381
    If should be called in DeclareLocks in a way similar to::
382

383
      if level == locking.LEVEL_NODE:
384
        self._LockInstancesNodes()
385

386
    @type primary_only: boolean
387
    @param primary_only: only lock primary nodes of locked instances
388

389
    """
390
    assert locking.LEVEL_NODE in self.recalculate_locks, \
391
      "_LockInstancesNodes helper function called with no nodes to recalculate"
392

    
393
    # TODO: check if we're really been called with the instance locks held
394

    
395
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
396
    # future we might want to have different behaviors depending on the value
397
    # of self.recalculate_locks[locking.LEVEL_NODE]
398
    wanted_nodes = []
399
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
400
      instance = self.context.cfg.GetInstanceInfo(instance_name)
401
      wanted_nodes.append(instance.primary_node)
402
      if not primary_only:
403
        wanted_nodes.extend(instance.secondary_nodes)
404

    
405
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
406
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
407
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
408
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
409

    
410
    del self.recalculate_locks[locking.LEVEL_NODE]
411

    
412

    
413
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
414
  """Simple LU which runs no hooks.
415

416
  This LU is intended as a parent for other LogicalUnits which will
417
  run no hooks, in order to reduce duplicate code.
418

419
  """
420
  HPATH = None
421
  HTYPE = None
422

    
423
  def BuildHooksEnv(self):
424
    """Empty BuildHooksEnv for NoHooksLu.
425

426
    This just raises an error.
427

428
    """
429
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
430

    
431
  def BuildHooksNodes(self):
432
    """Empty BuildHooksNodes for NoHooksLU.
433

434
    """
435
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
436

    
437

    
438
class Tasklet:
439
  """Tasklet base class.
440

441
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
442
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
443
  tasklets know nothing about locks.
444

445
  Subclasses must follow these rules:
446
    - Implement CheckPrereq
447
    - Implement Exec
448

449
  """
450
  def __init__(self, lu):
451
    self.lu = lu
452

    
453
    # Shortcuts
454
    self.cfg = lu.cfg
455
    self.rpc = lu.rpc
456

    
457
  def CheckPrereq(self):
458
    """Check prerequisites for this tasklets.
459

460
    This method should check whether the prerequisites for the execution of
461
    this tasklet are fulfilled. It can do internode communication, but it
462
    should be idempotent - no cluster or system changes are allowed.
463

464
    The method should raise errors.OpPrereqError in case something is not
465
    fulfilled. Its return value is ignored.
466

467
    This method should also update all parameters to their canonical form if it
468
    hasn't been done before.
469

470
    """
471
    pass
472

    
473
  def Exec(self, feedback_fn):
474
    """Execute the tasklet.
475

476
    This method should implement the actual work. It should raise
477
    errors.OpExecError for failures that are somewhat dealt with in code, or
478
    expected.
479

480
    """
481
    raise NotImplementedError
482

    
483

    
484
class _QueryBase:
485
  """Base for query utility classes.
486

487
  """
488
  #: Attribute holding field definitions
489
  FIELDS = None
490

    
491
  def __init__(self, filter_, fields, use_locking):
492
    """Initializes this class.
493

494
    """
495
    self.use_locking = use_locking
496

    
497
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
498
                             namefield="name")
499
    self.requested_data = self.query.RequestedData()
500
    self.names = self.query.RequestedNames()
501

    
502
    # Sort only if no names were requested
503
    self.sort_by_name = not self.names
504

    
505
    self.do_locking = None
506
    self.wanted = None
507

    
508
  def _GetNames(self, lu, all_names, lock_level):
509
    """Helper function to determine names asked for in the query.
510

511
    """
512
    if self.do_locking:
513
      names = lu.acquired_locks[lock_level]
514
    else:
515
      names = all_names
516

    
517
    if self.wanted == locking.ALL_SET:
518
      assert not self.names
519
      # caller didn't specify names, so ordering is not important
520
      return utils.NiceSort(names)
521

    
522
    # caller specified names and we must keep the same order
523
    assert self.names
524
    assert not self.do_locking or lu.acquired_locks[lock_level]
525

    
526
    missing = set(self.wanted).difference(names)
527
    if missing:
528
      raise errors.OpExecError("Some items were removed before retrieving"
529
                               " their data: %s" % missing)
530

    
531
    # Return expanded names
532
    return self.wanted
533

    
534
  def ExpandNames(self, lu):
535
    """Expand names for this query.
536

537
    See L{LogicalUnit.ExpandNames}.
538

539
    """
540
    raise NotImplementedError()
541

    
542
  def DeclareLocks(self, lu, level):
543
    """Declare locks for this query.
544

545
    See L{LogicalUnit.DeclareLocks}.
546

547
    """
548
    raise NotImplementedError()
549

    
550
  def _GetQueryData(self, lu):
551
    """Collects all data for this query.
552

553
    @return: Query data object
554

555
    """
556
    raise NotImplementedError()
557

    
558
  def NewStyleQuery(self, lu):
559
    """Collect data and execute query.
560

561
    """
562
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
563
                                  sort_by_name=self.sort_by_name)
564

    
565
  def OldStyleQuery(self, lu):
566
    """Collect data and execute query.
567

568
    """
569
    return self.query.OldStyleQuery(self._GetQueryData(lu),
570
                                    sort_by_name=self.sort_by_name)
571

    
572

    
573
def _GetWantedNodes(lu, nodes):
574
  """Returns list of checked and expanded node names.
575

576
  @type lu: L{LogicalUnit}
577
  @param lu: the logical unit on whose behalf we execute
578
  @type nodes: list
579
  @param nodes: list of node names or None for all nodes
580
  @rtype: list
581
  @return: the list of nodes, sorted
582
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
583

584
  """
585
  if nodes:
586
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
587

    
588
  return utils.NiceSort(lu.cfg.GetNodeList())
589

    
590

    
591
def _GetWantedInstances(lu, instances):
592
  """Returns list of checked and expanded instance names.
593

594
  @type lu: L{LogicalUnit}
595
  @param lu: the logical unit on whose behalf we execute
596
  @type instances: list
597
  @param instances: list of instance names or None for all instances
598
  @rtype: list
599
  @return: the list of instances, sorted
600
  @raise errors.OpPrereqError: if the instances parameter is wrong type
601
  @raise errors.OpPrereqError: if any of the passed instances is not found
602

603
  """
604
  if instances:
605
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
606
  else:
607
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
608
  return wanted
609

    
610

    
611
def _GetUpdatedParams(old_params, update_dict,
612
                      use_default=True, use_none=False):
613
  """Return the new version of a parameter dictionary.
614

615
  @type old_params: dict
616
  @param old_params: old parameters
617
  @type update_dict: dict
618
  @param update_dict: dict containing new parameter values, or
619
      constants.VALUE_DEFAULT to reset the parameter to its default
620
      value
621
  @param use_default: boolean
622
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
623
      values as 'to be deleted' values
624
  @param use_none: boolean
625
  @type use_none: whether to recognise C{None} values as 'to be
626
      deleted' values
627
  @rtype: dict
628
  @return: the new parameter dictionary
629

630
  """
631
  params_copy = copy.deepcopy(old_params)
632
  for key, val in update_dict.iteritems():
633
    if ((use_default and val == constants.VALUE_DEFAULT) or
634
        (use_none and val is None)):
635
      try:
636
        del params_copy[key]
637
      except KeyError:
638
        pass
639
    else:
640
      params_copy[key] = val
641
  return params_copy
642

    
643

    
644
def _RunPostHook(lu, node_name):
645
  """Runs the post-hook for an opcode on a single node.
646

647
  """
648
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
649
  try:
650
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
651
  except:
652
    # pylint: disable-msg=W0702
653
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
654

    
655

    
656
def _CheckOutputFields(static, dynamic, selected):
657
  """Checks whether all selected fields are valid.
658

659
  @type static: L{utils.FieldSet}
660
  @param static: static fields set
661
  @type dynamic: L{utils.FieldSet}
662
  @param dynamic: dynamic fields set
663

664
  """
665
  f = utils.FieldSet()
666
  f.Extend(static)
667
  f.Extend(dynamic)
668

    
669
  delta = f.NonMatching(selected)
670
  if delta:
671
    raise errors.OpPrereqError("Unknown output fields selected: %s"
672
                               % ",".join(delta), errors.ECODE_INVAL)
673

    
674

    
675
def _CheckGlobalHvParams(params):
676
  """Validates that given hypervisor params are not global ones.
677

678
  This will ensure that instances don't get customised versions of
679
  global params.
680

681
  """
682
  used_globals = constants.HVC_GLOBALS.intersection(params)
683
  if used_globals:
684
    msg = ("The following hypervisor parameters are global and cannot"
685
           " be customized at instance level, please modify them at"
686
           " cluster level: %s" % utils.CommaJoin(used_globals))
687
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
688

    
689

    
690
def _CheckNodeOnline(lu, node, msg=None):
691
  """Ensure that a given node is online.
692

693
  @param lu: the LU on behalf of which we make the check
694
  @param node: the node to check
695
  @param msg: if passed, should be a message to replace the default one
696
  @raise errors.OpPrereqError: if the node is offline
697

698
  """
699
  if msg is None:
700
    msg = "Can't use offline node"
701
  if lu.cfg.GetNodeInfo(node).offline:
702
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
703

    
704

    
705
def _CheckNodeNotDrained(lu, node):
706
  """Ensure that a given node is not drained.
707

708
  @param lu: the LU on behalf of which we make the check
709
  @param node: the node to check
710
  @raise errors.OpPrereqError: if the node is drained
711

712
  """
713
  if lu.cfg.GetNodeInfo(node).drained:
714
    raise errors.OpPrereqError("Can't use drained node %s" % node,
715
                               errors.ECODE_STATE)
716

    
717

    
718
def _CheckNodeVmCapable(lu, node):
719
  """Ensure that a given node is vm capable.
720

721
  @param lu: the LU on behalf of which we make the check
722
  @param node: the node to check
723
  @raise errors.OpPrereqError: if the node is not vm capable
724

725
  """
726
  if not lu.cfg.GetNodeInfo(node).vm_capable:
727
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
728
                               errors.ECODE_STATE)
729

    
730

    
731
def _CheckNodeHasOS(lu, node, os_name, force_variant):
732
  """Ensure that a node supports a given OS.
733

734
  @param lu: the LU on behalf of which we make the check
735
  @param node: the node to check
736
  @param os_name: the OS to query about
737
  @param force_variant: whether to ignore variant errors
738
  @raise errors.OpPrereqError: if the node is not supporting the OS
739

740
  """
741
  result = lu.rpc.call_os_get(node, os_name)
742
  result.Raise("OS '%s' not in supported OS list for node %s" %
743
               (os_name, node),
744
               prereq=True, ecode=errors.ECODE_INVAL)
745
  if not force_variant:
746
    _CheckOSVariant(result.payload, os_name)
747

    
748

    
749
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
750
  """Ensure that a node has the given secondary ip.
751

752
  @type lu: L{LogicalUnit}
753
  @param lu: the LU on behalf of which we make the check
754
  @type node: string
755
  @param node: the node to check
756
  @type secondary_ip: string
757
  @param secondary_ip: the ip to check
758
  @type prereq: boolean
759
  @param prereq: whether to throw a prerequisite or an execute error
760
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
761
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
762

763
  """
764
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
765
  result.Raise("Failure checking secondary ip on node %s" % node,
766
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
767
  if not result.payload:
768
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
769
           " please fix and re-run this command" % secondary_ip)
770
    if prereq:
771
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
772
    else:
773
      raise errors.OpExecError(msg)
774

    
775

    
776
def _GetClusterDomainSecret():
777
  """Reads the cluster domain secret.
778

779
  """
780
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
781
                               strict=True)
782

    
783

    
784
def _CheckInstanceDown(lu, instance, reason):
785
  """Ensure that an instance is not running."""
786
  if instance.admin_up:
787
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
788
                               (instance.name, reason), errors.ECODE_STATE)
789

    
790
  pnode = instance.primary_node
791
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
792
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
793
              prereq=True, ecode=errors.ECODE_ENVIRON)
794

    
795
  if instance.name in ins_l.payload:
796
    raise errors.OpPrereqError("Instance %s is running, %s" %
797
                               (instance.name, reason), errors.ECODE_STATE)
798

    
799

    
800
def _ExpandItemName(fn, name, kind):
801
  """Expand an item name.
802

803
  @param fn: the function to use for expansion
804
  @param name: requested item name
805
  @param kind: text description ('Node' or 'Instance')
806
  @return: the resolved (full) name
807
  @raise errors.OpPrereqError: if the item is not found
808

809
  """
810
  full_name = fn(name)
811
  if full_name is None:
812
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
813
                               errors.ECODE_NOENT)
814
  return full_name
815

    
816

    
817
def _ExpandNodeName(cfg, name):
818
  """Wrapper over L{_ExpandItemName} for nodes."""
819
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
820

    
821

    
822
def _ExpandInstanceName(cfg, name):
823
  """Wrapper over L{_ExpandItemName} for instance."""
824
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
825

    
826

    
827
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
828
                          memory, vcpus, nics, disk_template, disks,
829
                          bep, hvp, hypervisor_name):
830
  """Builds instance related env variables for hooks
831

832
  This builds the hook environment from individual variables.
833

834
  @type name: string
835
  @param name: the name of the instance
836
  @type primary_node: string
837
  @param primary_node: the name of the instance's primary node
838
  @type secondary_nodes: list
839
  @param secondary_nodes: list of secondary nodes as strings
840
  @type os_type: string
841
  @param os_type: the name of the instance's OS
842
  @type status: boolean
843
  @param status: the should_run status of the instance
844
  @type memory: string
845
  @param memory: the memory size of the instance
846
  @type vcpus: string
847
  @param vcpus: the count of VCPUs the instance has
848
  @type nics: list
849
  @param nics: list of tuples (ip, mac, mode, link) representing
850
      the NICs the instance has
851
  @type disk_template: string
852
  @param disk_template: the disk template of the instance
853
  @type disks: list
854
  @param disks: the list of (size, mode) pairs
855
  @type bep: dict
856
  @param bep: the backend parameters for the instance
857
  @type hvp: dict
858
  @param hvp: the hypervisor parameters for the instance
859
  @type hypervisor_name: string
860
  @param hypervisor_name: the hypervisor for the instance
861
  @rtype: dict
862
  @return: the hook environment for this instance
863

864
  """
865
  if status:
866
    str_status = "up"
867
  else:
868
    str_status = "down"
869
  env = {
870
    "OP_TARGET": name,
871
    "INSTANCE_NAME": name,
872
    "INSTANCE_PRIMARY": primary_node,
873
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
874
    "INSTANCE_OS_TYPE": os_type,
875
    "INSTANCE_STATUS": str_status,
876
    "INSTANCE_MEMORY": memory,
877
    "INSTANCE_VCPUS": vcpus,
878
    "INSTANCE_DISK_TEMPLATE": disk_template,
879
    "INSTANCE_HYPERVISOR": hypervisor_name,
880
  }
881

    
882
  if nics:
883
    nic_count = len(nics)
884
    for idx, (ip, mac, mode, link) in enumerate(nics):
885
      if ip is None:
886
        ip = ""
887
      env["INSTANCE_NIC%d_IP" % idx] = ip
888
      env["INSTANCE_NIC%d_MAC" % idx] = mac
889
      env["INSTANCE_NIC%d_MODE" % idx] = mode
890
      env["INSTANCE_NIC%d_LINK" % idx] = link
891
      if mode == constants.NIC_MODE_BRIDGED:
892
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
893
  else:
894
    nic_count = 0
895

    
896
  env["INSTANCE_NIC_COUNT"] = nic_count
897

    
898
  if disks:
899
    disk_count = len(disks)
900
    for idx, (size, mode) in enumerate(disks):
901
      env["INSTANCE_DISK%d_SIZE" % idx] = size
902
      env["INSTANCE_DISK%d_MODE" % idx] = mode
903
  else:
904
    disk_count = 0
905

    
906
  env["INSTANCE_DISK_COUNT"] = disk_count
907

    
908
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
909
    for key, value in source.items():
910
      env["INSTANCE_%s_%s" % (kind, key)] = value
911

    
912
  return env
913

    
914

    
915
def _NICListToTuple(lu, nics):
916
  """Build a list of nic information tuples.
917

918
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
919
  value in LUInstanceQueryData.
920

921
  @type lu:  L{LogicalUnit}
922
  @param lu: the logical unit on whose behalf we execute
923
  @type nics: list of L{objects.NIC}
924
  @param nics: list of nics to convert to hooks tuples
925

926
  """
927
  hooks_nics = []
928
  cluster = lu.cfg.GetClusterInfo()
929
  for nic in nics:
930
    ip = nic.ip
931
    mac = nic.mac
932
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
933
    mode = filled_params[constants.NIC_MODE]
934
    link = filled_params[constants.NIC_LINK]
935
    hooks_nics.append((ip, mac, mode, link))
936
  return hooks_nics
937

    
938

    
939
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
940
  """Builds instance related env variables for hooks from an object.
941

942
  @type lu: L{LogicalUnit}
943
  @param lu: the logical unit on whose behalf we execute
944
  @type instance: L{objects.Instance}
945
  @param instance: the instance for which we should build the
946
      environment
947
  @type override: dict
948
  @param override: dictionary with key/values that will override
949
      our values
950
  @rtype: dict
951
  @return: the hook environment dictionary
952

953
  """
954
  cluster = lu.cfg.GetClusterInfo()
955
  bep = cluster.FillBE(instance)
956
  hvp = cluster.FillHV(instance)
957
  args = {
958
    'name': instance.name,
959
    'primary_node': instance.primary_node,
960
    'secondary_nodes': instance.secondary_nodes,
961
    'os_type': instance.os,
962
    'status': instance.admin_up,
963
    'memory': bep[constants.BE_MEMORY],
964
    'vcpus': bep[constants.BE_VCPUS],
965
    'nics': _NICListToTuple(lu, instance.nics),
966
    'disk_template': instance.disk_template,
967
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
968
    'bep': bep,
969
    'hvp': hvp,
970
    'hypervisor_name': instance.hypervisor,
971
  }
972
  if override:
973
    args.update(override)
974
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
975

    
976

    
977
def _AdjustCandidatePool(lu, exceptions):
978
  """Adjust the candidate pool after node operations.
979

980
  """
981
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
982
  if mod_list:
983
    lu.LogInfo("Promoted nodes to master candidate role: %s",
984
               utils.CommaJoin(node.name for node in mod_list))
985
    for name in mod_list:
986
      lu.context.ReaddNode(name)
987
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
988
  if mc_now > mc_max:
989
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
990
               (mc_now, mc_max))
991

    
992

    
993
def _DecideSelfPromotion(lu, exceptions=None):
994
  """Decide whether I should promote myself as a master candidate.
995

996
  """
997
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
998
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
999
  # the new node will increase mc_max with one, so:
1000
  mc_should = min(mc_should + 1, cp_size)
1001
  return mc_now < mc_should
1002

    
1003

    
1004
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1005
  """Check that the brigdes needed by a list of nics exist.
1006

1007
  """
1008
  cluster = lu.cfg.GetClusterInfo()
1009
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1010
  brlist = [params[constants.NIC_LINK] for params in paramslist
1011
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1012
  if brlist:
1013
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1014
    result.Raise("Error checking bridges on destination node '%s'" %
1015
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1016

    
1017

    
1018
def _CheckInstanceBridgesExist(lu, instance, node=None):
1019
  """Check that the brigdes needed by an instance exist.
1020

1021
  """
1022
  if node is None:
1023
    node = instance.primary_node
1024
  _CheckNicsBridgesExist(lu, instance.nics, node)
1025

    
1026

    
1027
def _CheckOSVariant(os_obj, name):
1028
  """Check whether an OS name conforms to the os variants specification.
1029

1030
  @type os_obj: L{objects.OS}
1031
  @param os_obj: OS object to check
1032
  @type name: string
1033
  @param name: OS name passed by the user, to check for validity
1034

1035
  """
1036
  if not os_obj.supported_variants:
1037
    return
1038
  variant = objects.OS.GetVariant(name)
1039
  if not variant:
1040
    raise errors.OpPrereqError("OS name must include a variant",
1041
                               errors.ECODE_INVAL)
1042

    
1043
  if variant not in os_obj.supported_variants:
1044
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1045

    
1046

    
1047
def _GetNodeInstancesInner(cfg, fn):
1048
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1049

    
1050

    
1051
def _GetNodeInstances(cfg, node_name):
1052
  """Returns a list of all primary and secondary instances on a node.
1053

1054
  """
1055

    
1056
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1057

    
1058

    
1059
def _GetNodePrimaryInstances(cfg, node_name):
1060
  """Returns primary instances on a node.
1061

1062
  """
1063
  return _GetNodeInstancesInner(cfg,
1064
                                lambda inst: node_name == inst.primary_node)
1065

    
1066

    
1067
def _GetNodeSecondaryInstances(cfg, node_name):
1068
  """Returns secondary instances on a node.
1069

1070
  """
1071
  return _GetNodeInstancesInner(cfg,
1072
                                lambda inst: node_name in inst.secondary_nodes)
1073

    
1074

    
1075
def _GetStorageTypeArgs(cfg, storage_type):
1076
  """Returns the arguments for a storage type.
1077

1078
  """
1079
  # Special case for file storage
1080
  if storage_type == constants.ST_FILE:
1081
    # storage.FileStorage wants a list of storage directories
1082
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1083

    
1084
  return []
1085

    
1086

    
1087
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1088
  faulty = []
1089

    
1090
  for dev in instance.disks:
1091
    cfg.SetDiskID(dev, node_name)
1092

    
1093
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1094
  result.Raise("Failed to get disk status from node %s" % node_name,
1095
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1096

    
1097
  for idx, bdev_status in enumerate(result.payload):
1098
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1099
      faulty.append(idx)
1100

    
1101
  return faulty
1102

    
1103

    
1104
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1105
  """Check the sanity of iallocator and node arguments and use the
1106
  cluster-wide iallocator if appropriate.
1107

1108
  Check that at most one of (iallocator, node) is specified. If none is
1109
  specified, then the LU's opcode's iallocator slot is filled with the
1110
  cluster-wide default iallocator.
1111

1112
  @type iallocator_slot: string
1113
  @param iallocator_slot: the name of the opcode iallocator slot
1114
  @type node_slot: string
1115
  @param node_slot: the name of the opcode target node slot
1116

1117
  """
1118
  node = getattr(lu.op, node_slot, None)
1119
  iallocator = getattr(lu.op, iallocator_slot, None)
1120

    
1121
  if node is not None and iallocator is not None:
1122
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1123
                               errors.ECODE_INVAL)
1124
  elif node is None and iallocator is None:
1125
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1126
    if default_iallocator:
1127
      setattr(lu.op, iallocator_slot, default_iallocator)
1128
    else:
1129
      raise errors.OpPrereqError("No iallocator or node given and no"
1130
                                 " cluster-wide default iallocator found."
1131
                                 " Please specify either an iallocator or a"
1132
                                 " node, or set a cluster-wide default"
1133
                                 " iallocator.")
1134

    
1135

    
1136
class LUClusterPostInit(LogicalUnit):
1137
  """Logical unit for running hooks after cluster initialization.
1138

1139
  """
1140
  HPATH = "cluster-init"
1141
  HTYPE = constants.HTYPE_CLUSTER
1142

    
1143
  def BuildHooksEnv(self):
1144
    """Build hooks env.
1145

1146
    """
1147
    return {
1148
      "OP_TARGET": self.cfg.GetClusterName(),
1149
      }
1150

    
1151
  def BuildHooksNodes(self):
1152
    """Build hooks nodes.
1153

1154
    """
1155
    return ([], [self.cfg.GetMasterNode()])
1156

    
1157
  def Exec(self, feedback_fn):
1158
    """Nothing to do.
1159

1160
    """
1161
    return True
1162

    
1163

    
1164
class LUClusterDestroy(LogicalUnit):
1165
  """Logical unit for destroying the cluster.
1166

1167
  """
1168
  HPATH = "cluster-destroy"
1169
  HTYPE = constants.HTYPE_CLUSTER
1170

    
1171
  def BuildHooksEnv(self):
1172
    """Build hooks env.
1173

1174
    """
1175
    return {
1176
      "OP_TARGET": self.cfg.GetClusterName(),
1177
      }
1178

    
1179
  def BuildHooksNodes(self):
1180
    """Build hooks nodes.
1181

1182
    """
1183
    return ([], [])
1184

    
1185
  def CheckPrereq(self):
1186
    """Check prerequisites.
1187

1188
    This checks whether the cluster is empty.
1189

1190
    Any errors are signaled by raising errors.OpPrereqError.
1191

1192
    """
1193
    master = self.cfg.GetMasterNode()
1194

    
1195
    nodelist = self.cfg.GetNodeList()
1196
    if len(nodelist) != 1 or nodelist[0] != master:
1197
      raise errors.OpPrereqError("There are still %d node(s) in"
1198
                                 " this cluster." % (len(nodelist) - 1),
1199
                                 errors.ECODE_INVAL)
1200
    instancelist = self.cfg.GetInstanceList()
1201
    if instancelist:
1202
      raise errors.OpPrereqError("There are still %d instance(s) in"
1203
                                 " this cluster." % len(instancelist),
1204
                                 errors.ECODE_INVAL)
1205

    
1206
  def Exec(self, feedback_fn):
1207
    """Destroys the cluster.
1208

1209
    """
1210
    master = self.cfg.GetMasterNode()
1211

    
1212
    # Run post hooks on master node before it's removed
1213
    _RunPostHook(self, master)
1214

    
1215
    result = self.rpc.call_node_stop_master(master, False)
1216
    result.Raise("Could not disable the master role")
1217

    
1218
    return master
1219

    
1220

    
1221
def _VerifyCertificate(filename):
1222
  """Verifies a certificate for LUClusterVerify.
1223

1224
  @type filename: string
1225
  @param filename: Path to PEM file
1226

1227
  """
1228
  try:
1229
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1230
                                           utils.ReadFile(filename))
1231
  except Exception, err: # pylint: disable-msg=W0703
1232
    return (LUClusterVerify.ETYPE_ERROR,
1233
            "Failed to load X509 certificate %s: %s" % (filename, err))
1234

    
1235
  (errcode, msg) = \
1236
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1237
                                constants.SSL_CERT_EXPIRATION_ERROR)
1238

    
1239
  if msg:
1240
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1241
  else:
1242
    fnamemsg = None
1243

    
1244
  if errcode is None:
1245
    return (None, fnamemsg)
1246
  elif errcode == utils.CERT_WARNING:
1247
    return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1248
  elif errcode == utils.CERT_ERROR:
1249
    return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1250

    
1251
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1252

    
1253

    
1254
class LUClusterVerify(LogicalUnit):
1255
  """Verifies the cluster status.
1256

1257
  """
1258
  HPATH = "cluster-verify"
1259
  HTYPE = constants.HTYPE_CLUSTER
1260
  REQ_BGL = False
1261

    
1262
  TCLUSTER = "cluster"
1263
  TNODE = "node"
1264
  TINSTANCE = "instance"
1265

    
1266
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1267
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1268
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1269
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1270
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1271
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1272
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1273
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1274
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1275
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1276
  ENODEDRBD = (TNODE, "ENODEDRBD")
1277
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1278
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1279
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1280
  ENODEHV = (TNODE, "ENODEHV")
1281
  ENODELVM = (TNODE, "ENODELVM")
1282
  ENODEN1 = (TNODE, "ENODEN1")
1283
  ENODENET = (TNODE, "ENODENET")
1284
  ENODEOS = (TNODE, "ENODEOS")
1285
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1286
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1287
  ENODERPC = (TNODE, "ENODERPC")
1288
  ENODESSH = (TNODE, "ENODESSH")
1289
  ENODEVERSION = (TNODE, "ENODEVERSION")
1290
  ENODESETUP = (TNODE, "ENODESETUP")
1291
  ENODETIME = (TNODE, "ENODETIME")
1292
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1293

    
1294
  ETYPE_FIELD = "code"
1295
  ETYPE_ERROR = "ERROR"
1296
  ETYPE_WARNING = "WARNING"
1297

    
1298
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1299

    
1300
  class NodeImage(object):
1301
    """A class representing the logical and physical status of a node.
1302

1303
    @type name: string
1304
    @ivar name: the node name to which this object refers
1305
    @ivar volumes: a structure as returned from
1306
        L{ganeti.backend.GetVolumeList} (runtime)
1307
    @ivar instances: a list of running instances (runtime)
1308
    @ivar pinst: list of configured primary instances (config)
1309
    @ivar sinst: list of configured secondary instances (config)
1310
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1311
        instances for which this node is secondary (config)
1312
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1313
    @ivar dfree: free disk, as reported by the node (runtime)
1314
    @ivar offline: the offline status (config)
1315
    @type rpc_fail: boolean
1316
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1317
        not whether the individual keys were correct) (runtime)
1318
    @type lvm_fail: boolean
1319
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1320
    @type hyp_fail: boolean
1321
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1322
    @type ghost: boolean
1323
    @ivar ghost: whether this is a known node or not (config)
1324
    @type os_fail: boolean
1325
    @ivar os_fail: whether the RPC call didn't return valid OS data
1326
    @type oslist: list
1327
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1328
    @type vm_capable: boolean
1329
    @ivar vm_capable: whether the node can host instances
1330

1331
    """
1332
    def __init__(self, offline=False, name=None, vm_capable=True):
1333
      self.name = name
1334
      self.volumes = {}
1335
      self.instances = []
1336
      self.pinst = []
1337
      self.sinst = []
1338
      self.sbp = {}
1339
      self.mfree = 0
1340
      self.dfree = 0
1341
      self.offline = offline
1342
      self.vm_capable = vm_capable
1343
      self.rpc_fail = False
1344
      self.lvm_fail = False
1345
      self.hyp_fail = False
1346
      self.ghost = False
1347
      self.os_fail = False
1348
      self.oslist = {}
1349

    
1350
  def ExpandNames(self):
1351
    self.needed_locks = {
1352
      locking.LEVEL_NODE: locking.ALL_SET,
1353
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1354
    }
1355
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1356

    
1357
  def _Error(self, ecode, item, msg, *args, **kwargs):
1358
    """Format an error message.
1359

1360
    Based on the opcode's error_codes parameter, either format a
1361
    parseable error code, or a simpler error string.
1362

1363
    This must be called only from Exec and functions called from Exec.
1364

1365
    """
1366
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1367
    itype, etxt = ecode
1368
    # first complete the msg
1369
    if args:
1370
      msg = msg % args
1371
    # then format the whole message
1372
    if self.op.error_codes:
1373
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1374
    else:
1375
      if item:
1376
        item = " " + item
1377
      else:
1378
        item = ""
1379
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1380
    # and finally report it via the feedback_fn
1381
    self._feedback_fn("  - %s" % msg)
1382

    
1383
  def _ErrorIf(self, cond, *args, **kwargs):
1384
    """Log an error message if the passed condition is True.
1385

1386
    """
1387
    cond = bool(cond) or self.op.debug_simulate_errors
1388
    if cond:
1389
      self._Error(*args, **kwargs)
1390
    # do not mark the operation as failed for WARN cases only
1391
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1392
      self.bad = self.bad or cond
1393

    
1394
  def _VerifyNode(self, ninfo, nresult):
1395
    """Perform some basic validation on data returned from a node.
1396

1397
      - check the result data structure is well formed and has all the
1398
        mandatory fields
1399
      - check ganeti version
1400

1401
    @type ninfo: L{objects.Node}
1402
    @param ninfo: the node to check
1403
    @param nresult: the results from the node
1404
    @rtype: boolean
1405
    @return: whether overall this call was successful (and we can expect
1406
         reasonable values in the respose)
1407

1408
    """
1409
    node = ninfo.name
1410
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1411

    
1412
    # main result, nresult should be a non-empty dict
1413
    test = not nresult or not isinstance(nresult, dict)
1414
    _ErrorIf(test, self.ENODERPC, node,
1415
                  "unable to verify node: no data returned")
1416
    if test:
1417
      return False
1418

    
1419
    # compares ganeti version
1420
    local_version = constants.PROTOCOL_VERSION
1421
    remote_version = nresult.get("version", None)
1422
    test = not (remote_version and
1423
                isinstance(remote_version, (list, tuple)) and
1424
                len(remote_version) == 2)
1425
    _ErrorIf(test, self.ENODERPC, node,
1426
             "connection to node returned invalid data")
1427
    if test:
1428
      return False
1429

    
1430
    test = local_version != remote_version[0]
1431
    _ErrorIf(test, self.ENODEVERSION, node,
1432
             "incompatible protocol versions: master %s,"
1433
             " node %s", local_version, remote_version[0])
1434
    if test:
1435
      return False
1436

    
1437
    # node seems compatible, we can actually try to look into its results
1438

    
1439
    # full package version
1440
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1441
                  self.ENODEVERSION, node,
1442
                  "software version mismatch: master %s, node %s",
1443
                  constants.RELEASE_VERSION, remote_version[1],
1444
                  code=self.ETYPE_WARNING)
1445

    
1446
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1447
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1448
      for hv_name, hv_result in hyp_result.iteritems():
1449
        test = hv_result is not None
1450
        _ErrorIf(test, self.ENODEHV, node,
1451
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1452

    
1453
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1454
    if ninfo.vm_capable and isinstance(hvp_result, list):
1455
      for item, hv_name, hv_result in hvp_result:
1456
        _ErrorIf(True, self.ENODEHV, node,
1457
                 "hypervisor %s parameter verify failure (source %s): %s",
1458
                 hv_name, item, hv_result)
1459

    
1460
    test = nresult.get(constants.NV_NODESETUP,
1461
                           ["Missing NODESETUP results"])
1462
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1463
             "; ".join(test))
1464

    
1465
    return True
1466

    
1467
  def _VerifyNodeTime(self, ninfo, nresult,
1468
                      nvinfo_starttime, nvinfo_endtime):
1469
    """Check the node time.
1470

1471
    @type ninfo: L{objects.Node}
1472
    @param ninfo: the node to check
1473
    @param nresult: the remote results for the node
1474
    @param nvinfo_starttime: the start time of the RPC call
1475
    @param nvinfo_endtime: the end time of the RPC call
1476

1477
    """
1478
    node = ninfo.name
1479
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1480

    
1481
    ntime = nresult.get(constants.NV_TIME, None)
1482
    try:
1483
      ntime_merged = utils.MergeTime(ntime)
1484
    except (ValueError, TypeError):
1485
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1486
      return
1487

    
1488
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1489
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1490
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1491
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1492
    else:
1493
      ntime_diff = None
1494

    
1495
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1496
             "Node time diverges by at least %s from master node time",
1497
             ntime_diff)
1498

    
1499
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1500
    """Check the node time.
1501

1502
    @type ninfo: L{objects.Node}
1503
    @param ninfo: the node to check
1504
    @param nresult: the remote results for the node
1505
    @param vg_name: the configured VG name
1506

1507
    """
1508
    if vg_name is None:
1509
      return
1510

    
1511
    node = ninfo.name
1512
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1513

    
1514
    # checks vg existence and size > 20G
1515
    vglist = nresult.get(constants.NV_VGLIST, None)
1516
    test = not vglist
1517
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1518
    if not test:
1519
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1520
                                            constants.MIN_VG_SIZE)
1521
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1522

    
1523
    # check pv names
1524
    pvlist = nresult.get(constants.NV_PVLIST, None)
1525
    test = pvlist is None
1526
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1527
    if not test:
1528
      # check that ':' is not present in PV names, since it's a
1529
      # special character for lvcreate (denotes the range of PEs to
1530
      # use on the PV)
1531
      for _, pvname, owner_vg in pvlist:
1532
        test = ":" in pvname
1533
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1534
                 " '%s' of VG '%s'", pvname, owner_vg)
1535

    
1536
  def _VerifyNodeNetwork(self, ninfo, nresult):
1537
    """Check the node time.
1538

1539
    @type ninfo: L{objects.Node}
1540
    @param ninfo: the node to check
1541
    @param nresult: the remote results for the node
1542

1543
    """
1544
    node = ninfo.name
1545
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1546

    
1547
    test = constants.NV_NODELIST not in nresult
1548
    _ErrorIf(test, self.ENODESSH, node,
1549
             "node hasn't returned node ssh connectivity data")
1550
    if not test:
1551
      if nresult[constants.NV_NODELIST]:
1552
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1553
          _ErrorIf(True, self.ENODESSH, node,
1554
                   "ssh communication with node '%s': %s", a_node, a_msg)
1555

    
1556
    test = constants.NV_NODENETTEST not in nresult
1557
    _ErrorIf(test, self.ENODENET, node,
1558
             "node hasn't returned node tcp connectivity data")
1559
    if not test:
1560
      if nresult[constants.NV_NODENETTEST]:
1561
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1562
        for anode in nlist:
1563
          _ErrorIf(True, self.ENODENET, node,
1564
                   "tcp communication with node '%s': %s",
1565
                   anode, nresult[constants.NV_NODENETTEST][anode])
1566

    
1567
    test = constants.NV_MASTERIP not in nresult
1568
    _ErrorIf(test, self.ENODENET, node,
1569
             "node hasn't returned node master IP reachability data")
1570
    if not test:
1571
      if not nresult[constants.NV_MASTERIP]:
1572
        if node == self.master_node:
1573
          msg = "the master node cannot reach the master IP (not configured?)"
1574
        else:
1575
          msg = "cannot reach the master IP"
1576
        _ErrorIf(True, self.ENODENET, node, msg)
1577

    
1578
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1579
                      diskstatus):
1580
    """Verify an instance.
1581

1582
    This function checks to see if the required block devices are
1583
    available on the instance's node.
1584

1585
    """
1586
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1587
    node_current = instanceconfig.primary_node
1588

    
1589
    node_vol_should = {}
1590
    instanceconfig.MapLVsByNode(node_vol_should)
1591

    
1592
    for node in node_vol_should:
1593
      n_img = node_image[node]
1594
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1595
        # ignore missing volumes on offline or broken nodes
1596
        continue
1597
      for volume in node_vol_should[node]:
1598
        test = volume not in n_img.volumes
1599
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1600
                 "volume %s missing on node %s", volume, node)
1601

    
1602
    if instanceconfig.admin_up:
1603
      pri_img = node_image[node_current]
1604
      test = instance not in pri_img.instances and not pri_img.offline
1605
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1606
               "instance not running on its primary node %s",
1607
               node_current)
1608

    
1609
    for node, n_img in node_image.items():
1610
      if node != node_current:
1611
        test = instance in n_img.instances
1612
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1613
                 "instance should not run on node %s", node)
1614

    
1615
    diskdata = [(nname, success, status, idx)
1616
                for (nname, disks) in diskstatus.items()
1617
                for idx, (success, status) in enumerate(disks)]
1618

    
1619
    for nname, success, bdev_status, idx in diskdata:
1620
      # the 'ghost node' construction in Exec() ensures that we have a
1621
      # node here
1622
      snode = node_image[nname]
1623
      bad_snode = snode.ghost or snode.offline
1624
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1625
               self.EINSTANCEFAULTYDISK, instance,
1626
               "couldn't retrieve status for disk/%s on %s: %s",
1627
               idx, nname, bdev_status)
1628
      _ErrorIf((instanceconfig.admin_up and success and
1629
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1630
               self.EINSTANCEFAULTYDISK, instance,
1631
               "disk/%s on %s is faulty", idx, nname)
1632

    
1633
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1634
    """Verify if there are any unknown volumes in the cluster.
1635

1636
    The .os, .swap and backup volumes are ignored. All other volumes are
1637
    reported as unknown.
1638

1639
    @type reserved: L{ganeti.utils.FieldSet}
1640
    @param reserved: a FieldSet of reserved volume names
1641

1642
    """
1643
    for node, n_img in node_image.items():
1644
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1645
        # skip non-healthy nodes
1646
        continue
1647
      for volume in n_img.volumes:
1648
        test = ((node not in node_vol_should or
1649
                volume not in node_vol_should[node]) and
1650
                not reserved.Matches(volume))
1651
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1652
                      "volume %s is unknown", volume)
1653

    
1654
  def _VerifyOrphanInstances(self, instancelist, node_image):
1655
    """Verify the list of running instances.
1656

1657
    This checks what instances are running but unknown to the cluster.
1658

1659
    """
1660
    for node, n_img in node_image.items():
1661
      for o_inst in n_img.instances:
1662
        test = o_inst not in instancelist
1663
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1664
                      "instance %s on node %s should not exist", o_inst, node)
1665

    
1666
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1667
    """Verify N+1 Memory Resilience.
1668

1669
    Check that if one single node dies we can still start all the
1670
    instances it was primary for.
1671

1672
    """
1673
    cluster_info = self.cfg.GetClusterInfo()
1674
    for node, n_img in node_image.items():
1675
      # This code checks that every node which is now listed as
1676
      # secondary has enough memory to host all instances it is
1677
      # supposed to should a single other node in the cluster fail.
1678
      # FIXME: not ready for failover to an arbitrary node
1679
      # FIXME: does not support file-backed instances
1680
      # WARNING: we currently take into account down instances as well
1681
      # as up ones, considering that even if they're down someone
1682
      # might want to start them even in the event of a node failure.
1683
      if n_img.offline:
1684
        # we're skipping offline nodes from the N+1 warning, since
1685
        # most likely we don't have good memory infromation from them;
1686
        # we already list instances living on such nodes, and that's
1687
        # enough warning
1688
        continue
1689
      for prinode, instances in n_img.sbp.items():
1690
        needed_mem = 0
1691
        for instance in instances:
1692
          bep = cluster_info.FillBE(instance_cfg[instance])
1693
          if bep[constants.BE_AUTO_BALANCE]:
1694
            needed_mem += bep[constants.BE_MEMORY]
1695
        test = n_img.mfree < needed_mem
1696
        self._ErrorIf(test, self.ENODEN1, node,
1697
                      "not enough memory to accomodate instance failovers"
1698
                      " should node %s fail (%dMiB needed, %dMiB available)",
1699
                      prinode, needed_mem, n_img.mfree)
1700

    
1701
  @classmethod
1702
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1703
                   (files_all, files_all_opt, files_mc, files_vm)):
1704
    """Verifies file checksums collected from all nodes.
1705

1706
    @param errorif: Callback for reporting errors
1707
    @param nodeinfo: List of L{objects.Node} objects
1708
    @param master_node: Name of master node
1709
    @param all_nvinfo: RPC results
1710

1711
    """
1712
    node_names = frozenset(node.name for node in nodeinfo)
1713

    
1714
    assert master_node in node_names
1715
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1716
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1717
           "Found file listed in more than one file list"
1718

    
1719
    # Define functions determining which nodes to consider for a file
1720
    file2nodefn = dict([(filename, fn)
1721
      for (files, fn) in [(files_all, None),
1722
                          (files_all_opt, None),
1723
                          (files_mc, lambda node: (node.master_candidate or
1724
                                                   node.name == master_node)),
1725
                          (files_vm, lambda node: node.vm_capable)]
1726
      for filename in files])
1727

    
1728
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1729

    
1730
    for node in nodeinfo:
1731
      nresult = all_nvinfo[node.name]
1732

    
1733
      if nresult.fail_msg or not nresult.payload:
1734
        node_files = None
1735
      else:
1736
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
1737

    
1738
      test = not (node_files and isinstance(node_files, dict))
1739
      errorif(test, cls.ENODEFILECHECK, node.name,
1740
              "Node did not return file checksum data")
1741
      if test:
1742
        continue
1743

    
1744
      for (filename, checksum) in node_files.items():
1745
        # Check if the file should be considered for a node
1746
        fn = file2nodefn[filename]
1747
        if fn is None or fn(node):
1748
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
1749

    
1750
    for (filename, checksums) in fileinfo.items():
1751
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1752

    
1753
      # Nodes having the file
1754
      with_file = frozenset(node_name
1755
                            for nodes in fileinfo[filename].values()
1756
                            for node_name in nodes)
1757

    
1758
      # Nodes missing file
1759
      missing_file = node_names - with_file
1760

    
1761
      if filename in files_all_opt:
1762
        # All or no nodes
1763
        errorif(missing_file and missing_file != node_names,
1764
                cls.ECLUSTERFILECHECK, None,
1765
                "File %s is optional, but it must exist on all or no nodes (not"
1766
                " found on %s)",
1767
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
1768
      else:
1769
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
1770
                "File %s is missing from node(s) %s", filename,
1771
                utils.CommaJoin(utils.NiceSort(missing_file)))
1772

    
1773
      # See if there are multiple versions of the file
1774
      test = len(checksums) > 1
1775
      if test:
1776
        variants = ["variant %s on %s" %
1777
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
1778
                    for (idx, (checksum, nodes)) in
1779
                      enumerate(sorted(checksums.items()))]
1780
      else:
1781
        variants = []
1782

    
1783
      errorif(test, cls.ECLUSTERFILECHECK, None,
1784
              "File %s found with %s different checksums (%s)",
1785
              filename, len(checksums), "; ".join(variants))
1786

    
1787
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1788
                      drbd_map):
1789
    """Verifies and the node DRBD status.
1790

1791
    @type ninfo: L{objects.Node}
1792
    @param ninfo: the node to check
1793
    @param nresult: the remote results for the node
1794
    @param instanceinfo: the dict of instances
1795
    @param drbd_helper: the configured DRBD usermode helper
1796
    @param drbd_map: the DRBD map as returned by
1797
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1798

1799
    """
1800
    node = ninfo.name
1801
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1802

    
1803
    if drbd_helper:
1804
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1805
      test = (helper_result == None)
1806
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1807
               "no drbd usermode helper returned")
1808
      if helper_result:
1809
        status, payload = helper_result
1810
        test = not status
1811
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1812
                 "drbd usermode helper check unsuccessful: %s", payload)
1813
        test = status and (payload != drbd_helper)
1814
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1815
                 "wrong drbd usermode helper: %s", payload)
1816

    
1817
    # compute the DRBD minors
1818
    node_drbd = {}
1819
    for minor, instance in drbd_map[node].items():
1820
      test = instance not in instanceinfo
1821
      _ErrorIf(test, self.ECLUSTERCFG, None,
1822
               "ghost instance '%s' in temporary DRBD map", instance)
1823
        # ghost instance should not be running, but otherwise we
1824
        # don't give double warnings (both ghost instance and
1825
        # unallocated minor in use)
1826
      if test:
1827
        node_drbd[minor] = (instance, False)
1828
      else:
1829
        instance = instanceinfo[instance]
1830
        node_drbd[minor] = (instance.name, instance.admin_up)
1831

    
1832
    # and now check them
1833
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1834
    test = not isinstance(used_minors, (tuple, list))
1835
    _ErrorIf(test, self.ENODEDRBD, node,
1836
             "cannot parse drbd status file: %s", str(used_minors))
1837
    if test:
1838
      # we cannot check drbd status
1839
      return
1840

    
1841
    for minor, (iname, must_exist) in node_drbd.items():
1842
      test = minor not in used_minors and must_exist
1843
      _ErrorIf(test, self.ENODEDRBD, node,
1844
               "drbd minor %d of instance %s is not active", minor, iname)
1845
    for minor in used_minors:
1846
      test = minor not in node_drbd
1847
      _ErrorIf(test, self.ENODEDRBD, node,
1848
               "unallocated drbd minor %d is in use", minor)
1849

    
1850
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1851
    """Builds the node OS structures.
1852

1853
    @type ninfo: L{objects.Node}
1854
    @param ninfo: the node to check
1855
    @param nresult: the remote results for the node
1856
    @param nimg: the node image object
1857

1858
    """
1859
    node = ninfo.name
1860
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1861

    
1862
    remote_os = nresult.get(constants.NV_OSLIST, None)
1863
    test = (not isinstance(remote_os, list) or
1864
            not compat.all(isinstance(v, list) and len(v) == 7
1865
                           for v in remote_os))
1866

    
1867
    _ErrorIf(test, self.ENODEOS, node,
1868
             "node hasn't returned valid OS data")
1869

    
1870
    nimg.os_fail = test
1871

    
1872
    if test:
1873
      return
1874

    
1875
    os_dict = {}
1876

    
1877
    for (name, os_path, status, diagnose,
1878
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1879

    
1880
      if name not in os_dict:
1881
        os_dict[name] = []
1882

    
1883
      # parameters is a list of lists instead of list of tuples due to
1884
      # JSON lacking a real tuple type, fix it:
1885
      parameters = [tuple(v) for v in parameters]
1886
      os_dict[name].append((os_path, status, diagnose,
1887
                            set(variants), set(parameters), set(api_ver)))
1888

    
1889
    nimg.oslist = os_dict
1890

    
1891
  def _VerifyNodeOS(self, ninfo, nimg, base):
1892
    """Verifies the node OS list.
1893

1894
    @type ninfo: L{objects.Node}
1895
    @param ninfo: the node to check
1896
    @param nimg: the node image object
1897
    @param base: the 'template' node we match against (e.g. from the master)
1898

1899
    """
1900
    node = ninfo.name
1901
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1902

    
1903
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1904

    
1905
    for os_name, os_data in nimg.oslist.items():
1906
      assert os_data, "Empty OS status for OS %s?!" % os_name
1907
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1908
      _ErrorIf(not f_status, self.ENODEOS, node,
1909
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1910
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1911
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1912
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1913
      # this will catched in backend too
1914
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1915
               and not f_var, self.ENODEOS, node,
1916
               "OS %s with API at least %d does not declare any variant",
1917
               os_name, constants.OS_API_V15)
1918
      # comparisons with the 'base' image
1919
      test = os_name not in base.oslist
1920
      _ErrorIf(test, self.ENODEOS, node,
1921
               "Extra OS %s not present on reference node (%s)",
1922
               os_name, base.name)
1923
      if test:
1924
        continue
1925
      assert base.oslist[os_name], "Base node has empty OS status?"
1926
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1927
      if not b_status:
1928
        # base OS is invalid, skipping
1929
        continue
1930
      for kind, a, b in [("API version", f_api, b_api),
1931
                         ("variants list", f_var, b_var),
1932
                         ("parameters", f_param, b_param)]:
1933
        _ErrorIf(a != b, self.ENODEOS, node,
1934
                 "OS %s %s differs from reference node %s: %s vs. %s",
1935
                 kind, os_name, base.name,
1936
                 utils.CommaJoin(a), utils.CommaJoin(b))
1937

    
1938
    # check any missing OSes
1939
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1940
    _ErrorIf(missing, self.ENODEOS, node,
1941
             "OSes present on reference node %s but missing on this node: %s",
1942
             base.name, utils.CommaJoin(missing))
1943

    
1944
  def _VerifyOob(self, ninfo, nresult):
1945
    """Verifies out of band functionality of a node.
1946

1947
    @type ninfo: L{objects.Node}
1948
    @param ninfo: the node to check
1949
    @param nresult: the remote results for the node
1950

1951
    """
1952
    node = ninfo.name
1953
    # We just have to verify the paths on master and/or master candidates
1954
    # as the oob helper is invoked on the master
1955
    if ((ninfo.master_candidate or ninfo.master_capable) and
1956
        constants.NV_OOB_PATHS in nresult):
1957
      for path_result in nresult[constants.NV_OOB_PATHS]:
1958
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1959

    
1960
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1961
    """Verifies and updates the node volume data.
1962

1963
    This function will update a L{NodeImage}'s internal structures
1964
    with data from the remote call.
1965

1966
    @type ninfo: L{objects.Node}
1967
    @param ninfo: the node to check
1968
    @param nresult: the remote results for the node
1969
    @param nimg: the node image object
1970
    @param vg_name: the configured VG name
1971

1972
    """
1973
    node = ninfo.name
1974
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1975

    
1976
    nimg.lvm_fail = True
1977
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1978
    if vg_name is None:
1979
      pass
1980
    elif isinstance(lvdata, basestring):
1981
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1982
               utils.SafeEncode(lvdata))
1983
    elif not isinstance(lvdata, dict):
1984
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1985
    else:
1986
      nimg.volumes = lvdata
1987
      nimg.lvm_fail = False
1988

    
1989
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1990
    """Verifies and updates the node instance list.
1991

1992
    If the listing was successful, then updates this node's instance
1993
    list. Otherwise, it marks the RPC call as failed for the instance
1994
    list key.
1995

1996
    @type ninfo: L{objects.Node}
1997
    @param ninfo: the node to check
1998
    @param nresult: the remote results for the node
1999
    @param nimg: the node image object
2000

2001
    """
2002
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2003
    test = not isinstance(idata, list)
2004
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2005
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2006
    if test:
2007
      nimg.hyp_fail = True
2008
    else:
2009
      nimg.instances = idata
2010

    
2011
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2012
    """Verifies and computes a node information map
2013

2014
    @type ninfo: L{objects.Node}
2015
    @param ninfo: the node to check
2016
    @param nresult: the remote results for the node
2017
    @param nimg: the node image object
2018
    @param vg_name: the configured VG name
2019

2020
    """
2021
    node = ninfo.name
2022
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2023

    
2024
    # try to read free memory (from the hypervisor)
2025
    hv_info = nresult.get(constants.NV_HVINFO, None)
2026
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2027
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2028
    if not test:
2029
      try:
2030
        nimg.mfree = int(hv_info["memory_free"])
2031
      except (ValueError, TypeError):
2032
        _ErrorIf(True, self.ENODERPC, node,
2033
                 "node returned invalid nodeinfo, check hypervisor")
2034

    
2035
    # FIXME: devise a free space model for file based instances as well
2036
    if vg_name is not None:
2037
      test = (constants.NV_VGLIST not in nresult or
2038
              vg_name not in nresult[constants.NV_VGLIST])
2039
      _ErrorIf(test, self.ENODELVM, node,
2040
               "node didn't return data for the volume group '%s'"
2041
               " - it is either missing or broken", vg_name)
2042
      if not test:
2043
        try:
2044
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2045
        except (ValueError, TypeError):
2046
          _ErrorIf(True, self.ENODERPC, node,
2047
                   "node returned invalid LVM info, check LVM status")
2048

    
2049
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2050
    """Gets per-disk status information for all instances.
2051

2052
    @type nodelist: list of strings
2053
    @param nodelist: Node names
2054
    @type node_image: dict of (name, L{objects.Node})
2055
    @param node_image: Node objects
2056
    @type instanceinfo: dict of (name, L{objects.Instance})
2057
    @param instanceinfo: Instance objects
2058
    @rtype: {instance: {node: [(succes, payload)]}}
2059
    @return: a dictionary of per-instance dictionaries with nodes as
2060
        keys and disk information as values; the disk information is a
2061
        list of tuples (success, payload)
2062

2063
    """
2064
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2065

    
2066
    node_disks = {}
2067
    node_disks_devonly = {}
2068
    diskless_instances = set()
2069
    diskless = constants.DT_DISKLESS
2070

    
2071
    for nname in nodelist:
2072
      node_instances = list(itertools.chain(node_image[nname].pinst,
2073
                                            node_image[nname].sinst))
2074
      diskless_instances.update(inst for inst in node_instances
2075
                                if instanceinfo[inst].disk_template == diskless)
2076
      disks = [(inst, disk)
2077
               for inst in node_instances
2078
               for disk in instanceinfo[inst].disks]
2079

    
2080
      if not disks:
2081
        # No need to collect data
2082
        continue
2083

    
2084
      node_disks[nname] = disks
2085

    
2086
      # Creating copies as SetDiskID below will modify the objects and that can
2087
      # lead to incorrect data returned from nodes
2088
      devonly = [dev.Copy() for (_, dev) in disks]
2089

    
2090
      for dev in devonly:
2091
        self.cfg.SetDiskID(dev, nname)
2092

    
2093
      node_disks_devonly[nname] = devonly
2094

    
2095
    assert len(node_disks) == len(node_disks_devonly)
2096

    
2097
    # Collect data from all nodes with disks
2098
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2099
                                                          node_disks_devonly)
2100

    
2101
    assert len(result) == len(node_disks)
2102

    
2103
    instdisk = {}
2104

    
2105
    for (nname, nres) in result.items():
2106
      disks = node_disks[nname]
2107

    
2108
      if nres.offline:
2109
        # No data from this node
2110
        data = len(disks) * [(False, "node offline")]
2111
      else:
2112
        msg = nres.fail_msg
2113
        _ErrorIf(msg, self.ENODERPC, nname,
2114
                 "while getting disk information: %s", msg)
2115
        if msg:
2116
          # No data from this node
2117
          data = len(disks) * [(False, msg)]
2118
        else:
2119
          data = []
2120
          for idx, i in enumerate(nres.payload):
2121
            if isinstance(i, (tuple, list)) and len(i) == 2:
2122
              data.append(i)
2123
            else:
2124
              logging.warning("Invalid result from node %s, entry %d: %s",
2125
                              nname, idx, i)
2126
              data.append((False, "Invalid result from the remote node"))
2127

    
2128
      for ((inst, _), status) in zip(disks, data):
2129
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2130

    
2131
    # Add empty entries for diskless instances.
2132
    for inst in diskless_instances:
2133
      assert inst not in instdisk
2134
      instdisk[inst] = {}
2135

    
2136
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2137
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2138
                      compat.all(isinstance(s, (tuple, list)) and
2139
                                 len(s) == 2 for s in statuses)
2140
                      for inst, nnames in instdisk.items()
2141
                      for nname, statuses in nnames.items())
2142
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2143

    
2144
    return instdisk
2145

    
2146
  def _VerifyHVP(self, hvp_data):
2147
    """Verifies locally the syntax of the hypervisor parameters.
2148

2149
    """
2150
    for item, hv_name, hv_params in hvp_data:
2151
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2152
             (item, hv_name))
2153
      try:
2154
        hv_class = hypervisor.GetHypervisor(hv_name)
2155
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2156
        hv_class.CheckParameterSyntax(hv_params)
2157
      except errors.GenericError, err:
2158
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2159

    
2160
  def BuildHooksEnv(self):
2161
    """Build hooks env.
2162

2163
    Cluster-Verify hooks just ran in the post phase and their failure makes
2164
    the output be logged in the verify output and the verification to fail.
2165

2166
    """
2167
    cfg = self.cfg
2168

    
2169
    env = {
2170
      "CLUSTER_TAGS": " ".join(cfg.GetClusterInfo().GetTags())
2171
      }
2172

    
2173
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2174
               for node in cfg.GetAllNodesInfo().values())
2175

    
2176
    return env
2177

    
2178
  def BuildHooksNodes(self):
2179
    """Build hooks nodes.
2180

2181
    """
2182
    return ([], self.cfg.GetNodeList())
2183

    
2184
  def Exec(self, feedback_fn):
2185
    """Verify integrity of cluster, performing various test on nodes.
2186

2187
    """
2188
    # This method has too many local variables. pylint: disable-msg=R0914
2189
    self.bad = False
2190
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2191
    verbose = self.op.verbose
2192
    self._feedback_fn = feedback_fn
2193
    feedback_fn("* Verifying global settings")
2194
    for msg in self.cfg.VerifyConfig():
2195
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2196

    
2197
    # Check the cluster certificates
2198
    for cert_filename in constants.ALL_CERT_FILES:
2199
      (errcode, msg) = _VerifyCertificate(cert_filename)
2200
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2201

    
2202
    vg_name = self.cfg.GetVGName()
2203
    drbd_helper = self.cfg.GetDRBDHelper()
2204
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2205
    cluster = self.cfg.GetClusterInfo()
2206
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2207
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2208
    nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2209
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2210
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2211
                        for iname in instancelist)
2212
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2213
    i_non_redundant = [] # Non redundant instances
2214
    i_non_a_balanced = [] # Non auto-balanced instances
2215
    n_offline = 0 # Count of offline nodes
2216
    n_drained = 0 # Count of nodes being drained
2217
    node_vol_should = {}
2218

    
2219
    # FIXME: verify OS list
2220

    
2221
    # File verification
2222
    filemap = _ComputeAncillaryFiles(cluster, False)
2223

    
2224
    # do local checksums
2225
    master_node = self.master_node = self.cfg.GetMasterNode()
2226
    master_ip = self.cfg.GetMasterIP()
2227

    
2228
    # Compute the set of hypervisor parameters
2229
    hvp_data = []
2230
    for hv_name in hypervisors:
2231
      hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2232
    for os_name, os_hvp in cluster.os_hvp.items():
2233
      for hv_name, hv_params in os_hvp.items():
2234
        if not hv_params:
2235
          continue
2236
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2237
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
2238
    # TODO: collapse identical parameter values in a single one
2239
    for instance in instanceinfo.values():
2240
      if not instance.hvparams:
2241
        continue
2242
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2243
                       cluster.FillHV(instance)))
2244
    # and verify them locally
2245
    self._VerifyHVP(hvp_data)
2246

    
2247
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2248
    node_verify_param = {
2249
      constants.NV_FILELIST:
2250
        utils.UniqueSequence(filename
2251
                             for files in filemap
2252
                             for filename in files),
2253
      constants.NV_NODELIST: [node.name for node in nodeinfo
2254
                              if not node.offline],
2255
      constants.NV_HYPERVISOR: hypervisors,
2256
      constants.NV_HVPARAMS: hvp_data,
2257
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2258
                                  node.secondary_ip) for node in nodeinfo
2259
                                 if not node.offline],
2260
      constants.NV_INSTANCELIST: hypervisors,
2261
      constants.NV_VERSION: None,
2262
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2263
      constants.NV_NODESETUP: None,
2264
      constants.NV_TIME: None,
2265
      constants.NV_MASTERIP: (master_node, master_ip),
2266
      constants.NV_OSLIST: None,
2267
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2268
      }
2269

    
2270
    if vg_name is not None:
2271
      node_verify_param[constants.NV_VGLIST] = None
2272
      node_verify_param[constants.NV_LVLIST] = vg_name
2273
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2274
      node_verify_param[constants.NV_DRBDLIST] = None
2275

    
2276
    if drbd_helper:
2277
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2278

    
2279
    # Build our expected cluster state
2280
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2281
                                                 name=node.name,
2282
                                                 vm_capable=node.vm_capable))
2283
                      for node in nodeinfo)
2284

    
2285
    # Gather OOB paths
2286
    oob_paths = []
2287
    for node in nodeinfo:
2288
      path = _SupportsOob(self.cfg, node)
2289
      if path and path not in oob_paths:
2290
        oob_paths.append(path)
2291

    
2292
    if oob_paths:
2293
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2294

    
2295
    for instance in instancelist:
2296
      inst_config = instanceinfo[instance]
2297

    
2298
      for nname in inst_config.all_nodes:
2299
        if nname not in node_image:
2300
          # ghost node
2301
          gnode = self.NodeImage(name=nname)
2302
          gnode.ghost = True
2303
          node_image[nname] = gnode
2304

    
2305
      inst_config.MapLVsByNode(node_vol_should)
2306

    
2307
      pnode = inst_config.primary_node
2308
      node_image[pnode].pinst.append(instance)
2309

    
2310
      for snode in inst_config.secondary_nodes:
2311
        nimg = node_image[snode]
2312
        nimg.sinst.append(instance)
2313
        if pnode not in nimg.sbp:
2314
          nimg.sbp[pnode] = []
2315
        nimg.sbp[pnode].append(instance)
2316

    
2317
    # At this point, we have the in-memory data structures complete,
2318
    # except for the runtime information, which we'll gather next
2319

    
2320
    # Due to the way our RPC system works, exact response times cannot be
2321
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2322
    # time before and after executing the request, we can at least have a time
2323
    # window.
2324
    nvinfo_starttime = time.time()
2325
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2326
                                           self.cfg.GetClusterName())
2327
    nvinfo_endtime = time.time()
2328

    
2329
    all_drbd_map = self.cfg.ComputeDRBDMap()
2330

    
2331
    feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2332
    instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2333

    
2334
    feedback_fn("* Verifying configuration file consistency")
2335
    self._VerifyFiles(_ErrorIf, nodeinfo, master_node, all_nvinfo, filemap)
2336

    
2337
    feedback_fn("* Verifying node status")
2338

    
2339
    refos_img = None
2340

    
2341
    for node_i in nodeinfo:
2342
      node = node_i.name
2343
      nimg = node_image[node]
2344

    
2345
      if node_i.offline:
2346
        if verbose:
2347
          feedback_fn("* Skipping offline node %s" % (node,))
2348
        n_offline += 1
2349
        continue
2350

    
2351
      if node == master_node:
2352
        ntype = "master"
2353
      elif node_i.master_candidate:
2354
        ntype = "master candidate"
2355
      elif node_i.drained:
2356
        ntype = "drained"
2357
        n_drained += 1
2358
      else:
2359
        ntype = "regular"
2360
      if verbose:
2361
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2362

    
2363
      msg = all_nvinfo[node].fail_msg
2364
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2365
      if msg:
2366
        nimg.rpc_fail = True
2367
        continue
2368

    
2369
      nresult = all_nvinfo[node].payload
2370

    
2371
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2372
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2373
      self._VerifyNodeNetwork(node_i, nresult)
2374
      self._VerifyOob(node_i, nresult)
2375

    
2376
      if nimg.vm_capable:
2377
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2378
        self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2379
                             all_drbd_map)
2380

    
2381
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2382
        self._UpdateNodeInstances(node_i, nresult, nimg)
2383
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2384
        self._UpdateNodeOS(node_i, nresult, nimg)
2385
        if not nimg.os_fail:
2386
          if refos_img is None:
2387
            refos_img = nimg
2388
          self._VerifyNodeOS(node_i, nimg, refos_img)
2389

    
2390
    feedback_fn("* Verifying instance status")
2391
    for instance in instancelist:
2392
      if verbose:
2393
        feedback_fn("* Verifying instance %s" % instance)
2394
      inst_config = instanceinfo[instance]
2395
      self._VerifyInstance(instance, inst_config, node_image,
2396
                           instdisk[instance])
2397
      inst_nodes_offline = []
2398

    
2399
      pnode = inst_config.primary_node
2400
      pnode_img = node_image[pnode]
2401
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2402
               self.ENODERPC, pnode, "instance %s, connection to"
2403
               " primary node failed", instance)
2404

    
2405
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2406
               self.EINSTANCEBADNODE, instance,
2407
               "instance is marked as running and lives on offline node %s",
2408
               inst_config.primary_node)
2409

    
2410
      # If the instance is non-redundant we cannot survive losing its primary
2411
      # node, so we are not N+1 compliant. On the other hand we have no disk
2412
      # templates with more than one secondary so that situation is not well
2413
      # supported either.
2414
      # FIXME: does not support file-backed instances
2415
      if not inst_config.secondary_nodes:
2416
        i_non_redundant.append(instance)
2417

    
2418
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2419
               instance, "instance has multiple secondary nodes: %s",
2420
               utils.CommaJoin(inst_config.secondary_nodes),
2421
               code=self.ETYPE_WARNING)
2422

    
2423
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2424
        pnode = inst_config.primary_node
2425
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2426
        instance_groups = {}
2427

    
2428
        for node in instance_nodes:
2429
          instance_groups.setdefault(nodeinfo_byname[node].group,
2430
                                     []).append(node)
2431

    
2432
        pretty_list = [
2433
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2434
          # Sort so that we always list the primary node first.
2435
          for group, nodes in sorted(instance_groups.items(),
2436
                                     key=lambda (_, nodes): pnode in nodes,
2437
                                     reverse=True)]
2438

    
2439
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2440
                      instance, "instance has primary and secondary nodes in"
2441
                      " different groups: %s", utils.CommaJoin(pretty_list),
2442
                      code=self.ETYPE_WARNING)
2443

    
2444
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2445
        i_non_a_balanced.append(instance)
2446

    
2447
      for snode in inst_config.secondary_nodes:
2448
        s_img = node_image[snode]
2449
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2450
                 "instance %s, connection to secondary node failed", instance)
2451

    
2452
        if s_img.offline:
2453
          inst_nodes_offline.append(snode)
2454

    
2455
      # warn that the instance lives on offline nodes
2456
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2457
               "instance has offline secondary node(s) %s",
2458
               utils.CommaJoin(inst_nodes_offline))
2459
      # ... or ghost/non-vm_capable nodes
2460
      for node in inst_config.all_nodes:
2461
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2462
                 "instance lives on ghost node %s", node)
2463
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2464
                 instance, "instance lives on non-vm_capable node %s", node)
2465

    
2466
    feedback_fn("* Verifying orphan volumes")
2467
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2468
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2469

    
2470
    feedback_fn("* Verifying orphan instances")
2471
    self._VerifyOrphanInstances(instancelist, node_image)
2472

    
2473
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2474
      feedback_fn("* Verifying N+1 Memory redundancy")
2475
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2476

    
2477
    feedback_fn("* Other Notes")
2478
    if i_non_redundant:
2479
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2480
                  % len(i_non_redundant))
2481

    
2482
    if i_non_a_balanced:
2483
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2484
                  % len(i_non_a_balanced))
2485

    
2486
    if n_offline:
2487
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2488

    
2489
    if n_drained:
2490
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2491

    
2492
    return not self.bad
2493

    
2494
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2495
    """Analyze the post-hooks' result
2496

2497
    This method analyses the hook result, handles it, and sends some
2498
    nicely-formatted feedback back to the user.
2499

2500
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2501
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2502
    @param hooks_results: the results of the multi-node hooks rpc call
2503
    @param feedback_fn: function used send feedback back to the caller
2504
    @param lu_result: previous Exec result
2505
    @return: the new Exec result, based on the previous result
2506
        and hook results
2507

2508
    """
2509
    # We only really run POST phase hooks, and are only interested in
2510
    # their results
2511
    if phase == constants.HOOKS_PHASE_POST:
2512
      # Used to change hooks' output to proper indentation
2513
      feedback_fn("* Hooks Results")
2514
      assert hooks_results, "invalid result from hooks"
2515

    
2516
      for node_name in hooks_results:
2517
        res = hooks_results[node_name]
2518
        msg = res.fail_msg
2519
        test = msg and not res.offline
2520
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2521
                      "Communication failure in hooks execution: %s", msg)
2522
        if res.offline or msg:
2523
          # No need to investigate payload if node is offline or gave an error.
2524
          # override manually lu_result here as _ErrorIf only
2525
          # overrides self.bad
2526
          lu_result = 1
2527
          continue
2528
        for script, hkr, output in res.payload:
2529
          test = hkr == constants.HKR_FAIL
2530
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2531
                        "Script %s failed, output:", script)
2532
          if test:
2533
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2534
            feedback_fn("%s" % output)
2535
            lu_result = 0
2536

    
2537
      return lu_result
2538

    
2539

    
2540
class LUClusterVerifyDisks(NoHooksLU):
2541
  """Verifies the cluster disks status.
2542

2543
  """
2544
  REQ_BGL = False
2545

    
2546
  def ExpandNames(self):
2547
    self.needed_locks = {
2548
      locking.LEVEL_NODE: locking.ALL_SET,
2549
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2550
    }
2551
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2552

    
2553
  def Exec(self, feedback_fn):
2554
    """Verify integrity of cluster disks.
2555

2556
    @rtype: tuple of three items
2557
    @return: a tuple of (dict of node-to-node_error, list of instances
2558
        which need activate-disks, dict of instance: (node, volume) for
2559
        missing volumes
2560

2561
    """
2562
    result = res_nodes, res_instances, res_missing = {}, [], {}
2563

    
2564
    nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2565
    instances = self.cfg.GetAllInstancesInfo().values()
2566

    
2567
    nv_dict = {}
2568
    for inst in instances:
2569
      inst_lvs = {}
2570
      if not inst.admin_up:
2571
        continue
2572
      inst.MapLVsByNode(inst_lvs)
2573
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2574
      for node, vol_list in inst_lvs.iteritems():
2575
        for vol in vol_list:
2576
          nv_dict[(node, vol)] = inst
2577

    
2578
    if not nv_dict:
2579
      return result
2580

    
2581
    node_lvs = self.rpc.call_lv_list(nodes, [])
2582
    for node, node_res in node_lvs.items():
2583
      if node_res.offline:
2584
        continue
2585
      msg = node_res.fail_msg
2586
      if msg:
2587
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2588
        res_nodes[node] = msg
2589
        continue
2590

    
2591
      lvs = node_res.payload
2592
      for lv_name, (_, _, lv_online) in lvs.items():
2593
        inst = nv_dict.pop((node, lv_name), None)
2594
        if (not lv_online and inst is not None
2595
            and inst.name not in res_instances):
2596
          res_instances.append(inst.name)
2597

    
2598
    # any leftover items in nv_dict are missing LVs, let's arrange the
2599
    # data better
2600
    for key, inst in nv_dict.iteritems():
2601
      if inst.name not in res_missing:
2602
        res_missing[inst.name] = []
2603
      res_missing[inst.name].append(key)
2604

    
2605
    return result
2606

    
2607

    
2608
class LUClusterRepairDiskSizes(NoHooksLU):
2609
  """Verifies the cluster disks sizes.
2610

2611
  """
2612
  REQ_BGL = False
2613

    
2614
  def ExpandNames(self):
2615
    if self.op.instances:
2616
      self.wanted_names = []
2617
      for name in self.op.instances:
2618
        full_name = _ExpandInstanceName(self.cfg, name)
2619
        self.wanted_names.append(full_name)
2620
      self.needed_locks = {
2621
        locking.LEVEL_NODE: [],
2622
        locking.LEVEL_INSTANCE: self.wanted_names,
2623
        }
2624
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2625
    else:
2626
      self.wanted_names = None
2627
      self.needed_locks = {
2628
        locking.LEVEL_NODE: locking.ALL_SET,
2629
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2630
        }
2631
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2632

    
2633
  def DeclareLocks(self, level):
2634
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2635
      self._LockInstancesNodes(primary_only=True)
2636

    
2637
  def CheckPrereq(self):
2638
    """Check prerequisites.
2639

2640
    This only checks the optional instance list against the existing names.
2641

2642
    """
2643
    if self.wanted_names is None:
2644
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2645

    
2646
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2647
                             in self.wanted_names]
2648

    
2649
  def _EnsureChildSizes(self, disk):
2650
    """Ensure children of the disk have the needed disk size.
2651

2652
    This is valid mainly for DRBD8 and fixes an issue where the
2653
    children have smaller disk size.
2654

2655
    @param disk: an L{ganeti.objects.Disk} object
2656

2657
    """
2658
    if disk.dev_type == constants.LD_DRBD8:
2659
      assert disk.children, "Empty children for DRBD8?"
2660
      fchild = disk.children[0]
2661
      mismatch = fchild.size < disk.size
2662
      if mismatch:
2663
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2664
                     fchild.size, disk.size)
2665
        fchild.size = disk.size
2666

    
2667
      # and we recurse on this child only, not on the metadev
2668
      return self._EnsureChildSizes(fchild) or mismatch
2669
    else:
2670
      return False
2671

    
2672
  def Exec(self, feedback_fn):
2673
    """Verify the size of cluster disks.
2674

2675
    """
2676
    # TODO: check child disks too
2677
    # TODO: check differences in size between primary/secondary nodes
2678
    per_node_disks = {}
2679
    for instance in self.wanted_instances:
2680
      pnode = instance.primary_node
2681
      if pnode not in per_node_disks:
2682
        per_node_disks[pnode] = []
2683
      for idx, disk in enumerate(instance.disks):
2684
        per_node_disks[pnode].append((instance, idx, disk))
2685

    
2686
    changed = []
2687
    for node, dskl in per_node_disks.items():
2688
      newl = [v[2].Copy() for v in dskl]
2689
      for dsk in newl:
2690
        self.cfg.SetDiskID(dsk, node)
2691
      result = self.rpc.call_blockdev_getsize(node, newl)
2692
      if result.fail_msg:
2693
        self.LogWarning("Failure in blockdev_getsize call to node"
2694
                        " %s, ignoring", node)
2695
        continue
2696
      if len(result.payload) != len(dskl):
2697
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
2698
                        " result.payload=%s", node, len(dskl), result.payload)
2699
        self.LogWarning("Invalid result from node %s, ignoring node results",
2700
                        node)
2701
        continue
2702
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
2703
        if size is None:
2704
          self.LogWarning("Disk %d of instance %s did not return size"
2705
                          " information, ignoring", idx, instance.name)
2706
          continue
2707
        if not isinstance(size, (int, long)):
2708
          self.LogWarning("Disk %d of instance %s did not return valid"
2709
                          " size information, ignoring", idx, instance.name)
2710
          continue
2711
        size = size >> 20
2712
        if size != disk.size:
2713
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2714
                       " correcting: recorded %d, actual %d", idx,
2715
                       instance.name, disk.size, size)
2716
          disk.size = size
2717
          self.cfg.Update(instance, feedback_fn)
2718
          changed.append((instance.name, idx, size))
2719
        if self._EnsureChildSizes(disk):
2720
          self.cfg.Update(instance, feedback_fn)
2721
          changed.append((instance.name, idx, disk.size))
2722
    return changed
2723

    
2724

    
2725
class LUClusterRename(LogicalUnit):
2726
  """Rename the cluster.
2727

2728
  """
2729
  HPATH = "cluster-rename"
2730
  HTYPE = constants.HTYPE_CLUSTER
2731

    
2732
  def BuildHooksEnv(self):
2733
    """Build hooks env.
2734

2735
    """
2736
    return {
2737
      "OP_TARGET": self.cfg.GetClusterName(),
2738
      "NEW_NAME": self.op.name,
2739
      }
2740

    
2741
  def BuildHooksNodes(self):
2742
    """Build hooks nodes.
2743

2744
    """
2745
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2746

    
2747
  def CheckPrereq(self):
2748
    """Verify that the passed name is a valid one.
2749

2750
    """
2751
    hostname = netutils.GetHostname(name=self.op.name,
2752
                                    family=self.cfg.GetPrimaryIPFamily())
2753

    
2754
    new_name = hostname.name
2755
    self.ip = new_ip = hostname.ip
2756
    old_name = self.cfg.GetClusterName()
2757
    old_ip = self.cfg.GetMasterIP()
2758
    if new_name == old_name and new_ip == old_ip:
2759
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2760
                                 " cluster has changed",
2761
                                 errors.ECODE_INVAL)
2762
    if new_ip != old_ip:
2763
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2764
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2765
                                   " reachable on the network" %
2766
                                   new_ip, errors.ECODE_NOTUNIQUE)
2767

    
2768
    self.op.name = new_name
2769

    
2770
  def Exec(self, feedback_fn):
2771
    """Rename the cluster.
2772

2773
    """
2774
    clustername = self.op.name
2775
    ip = self.ip
2776

    
2777
    # shutdown the master IP
2778
    master = self.cfg.GetMasterNode()
2779
    result = self.rpc.call_node_stop_master(master, False)
2780
    result.Raise("Could not disable the master role")
2781

    
2782
    try:
2783
      cluster = self.cfg.GetClusterInfo()
2784
      cluster.cluster_name = clustername
2785
      cluster.master_ip = ip
2786
      self.cfg.Update(cluster, feedback_fn)
2787

    
2788
      # update the known hosts file
2789
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2790
      node_list = self.cfg.GetOnlineNodeList()
2791
      try:
2792
        node_list.remove(master)
2793
      except ValueError:
2794
        pass
2795
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2796
    finally:
2797
      result = self.rpc.call_node_start_master(master, False, False)
2798
      msg = result.fail_msg
2799
      if msg:
2800
        self.LogWarning("Could not re-enable the master role on"
2801
                        " the master, please restart manually: %s", msg)
2802

    
2803
    return clustername
2804

    
2805

    
2806
class LUClusterSetParams(LogicalUnit):
2807
  """Change the parameters of the cluster.
2808

2809
  """
2810
  HPATH = "cluster-modify"
2811
  HTYPE = constants.HTYPE_CLUSTER
2812
  REQ_BGL = False
2813

    
2814
  def CheckArguments(self):
2815
    """Check parameters
2816

2817
    """
2818
    if self.op.uid_pool:
2819
      uidpool.CheckUidPool(self.op.uid_pool)
2820

    
2821
    if self.op.add_uids:
2822
      uidpool.CheckUidPool(self.op.add_uids)
2823

    
2824
    if self.op.remove_uids:
2825
      uidpool.CheckUidPool(self.op.remove_uids)
2826

    
2827
  def ExpandNames(self):
2828
    # FIXME: in the future maybe other cluster params won't require checking on
2829
    # all nodes to be modified.
2830
    self.needed_locks = {
2831
      locking.LEVEL_NODE: locking.ALL_SET,
2832
    }
2833
    self.share_locks[locking.LEVEL_NODE] = 1
2834

    
2835
  def BuildHooksEnv(self):
2836
    """Build hooks env.
2837

2838
    """
2839
    return {
2840
      "OP_TARGET": self.cfg.GetClusterName(),
2841
      "NEW_VG_NAME": self.op.vg_name,
2842
      }
2843

    
2844
  def BuildHooksNodes(self):
2845
    """Build hooks nodes.
2846

2847
    """
2848
    mn = self.cfg.GetMasterNode()
2849
    return ([mn], [mn])
2850

    
2851
  def CheckPrereq(self):
2852
    """Check prerequisites.
2853

2854
    This checks whether the given params don't conflict and
2855
    if the given volume group is valid.
2856

2857
    """
2858
    if self.op.vg_name is not None and not self.op.vg_name:
2859
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2860
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2861
                                   " instances exist", errors.ECODE_INVAL)
2862

    
2863
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2864
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2865
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2866
                                   " drbd-based instances exist",
2867
                                   errors.ECODE_INVAL)
2868

    
2869
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2870

    
2871
    # if vg_name not None, checks given volume group on all nodes
2872
    if self.op.vg_name:
2873
      vglist = self.rpc.call_vg_list(node_list)
2874
      for node in node_list:
2875
        msg = vglist[node].fail_msg
2876
        if msg:
2877
          # ignoring down node
2878
          self.LogWarning("Error while gathering data on node %s"
2879
                          " (ignoring node): %s", node, msg)
2880
          continue
2881
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2882
                                              self.op.vg_name,
2883
                                              constants.MIN_VG_SIZE)
2884
        if vgstatus:
2885
          raise errors.OpPrereqError("Error on node '%s': %s" %
2886
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2887

    
2888
    if self.op.drbd_helper:
2889
      # checks given drbd helper on all nodes
2890
      helpers = self.rpc.call_drbd_helper(node_list)
2891
      for node in node_list:
2892
        ninfo = self.cfg.GetNodeInfo(node)
2893
        if ninfo.offline:
2894
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2895
          continue
2896
        msg = helpers[node].fail_msg
2897
        if msg:
2898
          raise errors.OpPrereqError("Error checking drbd helper on node"
2899
                                     " '%s': %s" % (node, msg),
2900
                                     errors.ECODE_ENVIRON)
2901
        node_helper = helpers[node].payload
2902
        if node_helper != self.op.drbd_helper:
2903
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2904
                                     (node, node_helper), errors.ECODE_ENVIRON)
2905

    
2906
    self.cluster = cluster = self.cfg.GetClusterInfo()
2907
    # validate params changes
2908
    if self.op.beparams:
2909
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2910
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2911

    
2912
    if self.op.ndparams:
2913
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2914
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2915

    
2916
      # TODO: we need a more general way to handle resetting
2917
      # cluster-level parameters to default values
2918
      if self.new_ndparams["oob_program"] == "":
2919
        self.new_ndparams["oob_program"] = \
2920
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
2921

    
2922
    if self.op.nicparams:
2923
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2924
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2925
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2926
      nic_errors = []
2927

    
2928
      # check all instances for consistency
2929
      for instance in self.cfg.GetAllInstancesInfo().values():
2930
        for nic_idx, nic in enumerate(instance.nics):
2931
          params_copy = copy.deepcopy(nic.nicparams)
2932
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2933

    
2934
          # check parameter syntax
2935
          try:
2936
            objects.NIC.CheckParameterSyntax(params_filled)
2937
          except errors.ConfigurationError, err:
2938
            nic_errors.append("Instance %s, nic/%d: %s" %
2939
                              (instance.name, nic_idx, err))
2940

    
2941
          # if we're moving instances to routed, check that they have an ip
2942
          target_mode = params_filled[constants.NIC_MODE]
2943
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2944
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2945
                              (instance.name, nic_idx))
2946
      if nic_errors:
2947
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2948
                                   "\n".join(nic_errors))
2949

    
2950
    # hypervisor list/parameters
2951
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2952
    if self.op.hvparams:
2953
      for hv_name, hv_dict in self.op.hvparams.items():
2954
        if hv_name not in self.new_hvparams:
2955
          self.new_hvparams[hv_name] = hv_dict
2956
        else:
2957
          self.new_hvparams[hv_name].update(hv_dict)
2958

    
2959
    # os hypervisor parameters
2960
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2961
    if self.op.os_hvp:
2962
      for os_name, hvs in self.op.os_hvp.items():
2963
        if os_name not in self.new_os_hvp:
2964
          self.new_os_hvp[os_name] = hvs
2965
        else:
2966
          for hv_name, hv_dict in hvs.items():
2967
            if hv_name not in self.new_os_hvp[os_name]:
2968
              self.new_os_hvp[os_name][hv_name] = hv_dict
2969
            else:
2970
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2971

    
2972
    # os parameters
2973
    self.new_osp = objects.FillDict(cluster.osparams, {})
2974
    if self.op.osparams:
2975
      for os_name, osp in self.op.osparams.items():
2976
        if os_name not in self.new_osp:
2977
          self.new_osp[os_name] = {}
2978

    
2979
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2980
                                                  use_none=True)
2981

    
2982
        if not self.new_osp[os_name]:
2983
          # we removed all parameters
2984
          del self.new_osp[os_name]
2985
        else:
2986
          # check the parameter validity (remote check)
2987
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2988
                         os_name, self.new_osp[os_name])
2989

    
2990
    # changes to the hypervisor list
2991
    if self.op.enabled_hypervisors is not None:
2992
      self.hv_list = self.op.enabled_hypervisors
2993
      for hv in self.hv_list:
2994
        # if the hypervisor doesn't already exist in the cluster
2995
        # hvparams, we initialize it to empty, and then (in both
2996
        # cases) we make sure to fill the defaults, as we might not
2997
        # have a complete defaults list if the hypervisor wasn't
2998
        # enabled before
2999
        if hv not in new_hvp:
3000
          new_hvp[hv] = {}
3001
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3002
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3003
    else:
3004
      self.hv_list = cluster.enabled_hypervisors
3005

    
3006
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3007
      # either the enabled list has changed, or the parameters have, validate
3008
      for hv_name, hv_params in self.new_hvparams.items():
3009
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3010
            (self.op.enabled_hypervisors and
3011
             hv_name in self.op.enabled_hypervisors)):
3012
          # either this is a new hypervisor, or its parameters have changed
3013
          hv_class = hypervisor.GetHypervisor(hv_name)
3014
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3015
          hv_class.CheckParameterSyntax(hv_params)
3016
          _CheckHVParams(self, node_list, hv_name, hv_params)
3017

    
3018
    if self.op.os_hvp:
3019
      # no need to check any newly-enabled hypervisors, since the
3020
      # defaults have already been checked in the above code-block
3021
      for os_name, os_hvp in self.new_os_hvp.items():
3022
        for hv_name, hv_params in os_hvp.items():
3023
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3024
          # we need to fill in the new os_hvp on top of the actual hv_p
3025
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3026
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3027
          hv_class = hypervisor.GetHypervisor(hv_name)
3028
          hv_class.CheckParameterSyntax(new_osp)
3029
          _CheckHVParams(self, node_list, hv_name, new_osp)
3030

    
3031
    if self.op.default_iallocator:
3032
      alloc_script = utils.FindFile(self.op.default_iallocator,
3033
                                    constants.IALLOCATOR_SEARCH_PATH,
3034
                                    os.path.isfile)
3035
      if alloc_script is None:
3036
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3037
                                   " specified" % self.op.default_iallocator,
3038
                                   errors.ECODE_INVAL)
3039

    
3040
  def Exec(self, feedback_fn):
3041
    """Change the parameters of the cluster.
3042

3043
    """
3044
    if self.op.vg_name is not None:
3045
      new_volume = self.op.vg_name
3046
      if not new_volume:
3047
        new_volume = None
3048
      if new_volume != self.cfg.GetVGName():
3049
        self.cfg.SetVGName(new_volume)
3050
      else:
3051
        feedback_fn("Cluster LVM configuration already in desired"
3052
                    " state, not changing")
3053
    if self.op.drbd_helper is not None:
3054
      new_helper = self.op.drbd_helper
3055
      if not new_helper:
3056
        new_helper = None
3057
      if new_helper != self.cfg.GetDRBDHelper():
3058
        self.cfg.SetDRBDHelper(new_helper)
3059
      else:
3060
        feedback_fn("Cluster DRBD helper already in desired state,"
3061
                    " not changing")
3062
    if self.op.hvparams:
3063
      self.cluster.hvparams = self.new_hvparams
3064
    if self.op.os_hvp:
3065
      self.cluster.os_hvp = self.new_os_hvp
3066
    if self.op.enabled_hypervisors is not None:
3067
      self.cluster.hvparams = self.new_hvparams
3068
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3069
    if self.op.beparams:
3070
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3071
    if self.op.nicparams:
3072
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3073
    if self.op.osparams:
3074
      self.cluster.osparams = self.new_osp
3075
    if self.op.ndparams:
3076
      self.cluster.ndparams = self.new_ndparams
3077

    
3078
    if self.op.candidate_pool_size is not None:
3079
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3080
      # we need to update the pool size here, otherwise the save will fail
3081
      _AdjustCandidatePool(self, [])
3082

    
3083
    if self.op.maintain_node_health is not None:
3084
      self.cluster.maintain_node_health = self.op.maintain_node_health
3085

    
3086
    if self.op.prealloc_wipe_disks is not None:
3087
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3088

    
3089
    if self.op.add_uids is not None:
3090
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3091

    
3092
    if self.op.remove_uids is not None:
3093
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3094

    
3095
    if self.op.uid_pool is not None:
3096
      self.cluster.uid_pool = self.op.uid_pool
3097

    
3098
    if self.op.default_iallocator is not None:
3099
      self.cluster.default_iallocator = self.op.default_iallocator
3100

    
3101
    if self.op.reserved_lvs is not None:
3102
      self.cluster.reserved_lvs = self.op.reserved_lvs
3103

    
3104
    def helper_os(aname, mods, desc):
3105
      desc += " OS list"
3106
      lst = getattr(self.cluster, aname)
3107
      for key, val in mods:
3108
        if key == constants.DDM_ADD:
3109
          if val in lst:
3110
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3111
          else:
3112
            lst.append(val)
3113
        elif key == constants.DDM_REMOVE:
3114
          if val in lst:
3115
            lst.remove(val)
3116
          else:
3117
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3118
        else:
3119
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3120

    
3121
    if self.op.hidden_os:
3122
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3123

    
3124
    if self.op.blacklisted_os:
3125
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3126

    
3127
    if self.op.master_netdev:
3128
      master = self.cfg.GetMasterNode()
3129
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3130
                  self.cluster.master_netdev)
3131
      result = self.rpc.call_node_stop_master(master, False)
3132
      result.Raise("Could not disable the master ip")
3133
      feedback_fn("Changing master_netdev from %s to %s" %
3134
                  (self.cluster.master_netdev, self.op.master_netdev))
3135
      self.cluster.master_netdev = self.op.master_netdev
3136

    
3137
    self.cfg.Update(self.cluster, feedback_fn)
3138

    
3139
    if self.op.master_netdev:
3140
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3141
                  self.op.master_netdev)
3142
      result = self.rpc.call_node_start_master(master, False, False)
3143
      if result.fail_msg:
3144
        self.LogWarning("Could not re-enable the master ip on"
3145
                        " the master, please restart manually: %s",
3146
                        result.fail_msg)
3147

    
3148

    
3149
def _UploadHelper(lu, nodes, fname):
3150
  """Helper for uploading a file and showing warnings.
3151

3152
  """
3153
  if os.path.exists(fname):
3154
    result = lu.rpc.call_upload_file(nodes, fname)
3155
    for to_node, to_result in result.items():
3156
      msg = to_result.fail_msg
3157
      if msg:
3158
        msg = ("Copy of file %s to node %s failed: %s" %
3159
               (fname, to_node, msg))
3160
        lu.proc.LogWarning(msg)
3161

    
3162

    
3163
def _ComputeAncillaryFiles(cluster, redist):
3164
  """Compute files external to Ganeti which need to be consistent.
3165

3166
  @type redist: boolean
3167
  @param redist: Whether to include files which need to be redistributed
3168

3169
  """
3170
  # Compute files for all nodes
3171
  files_all = set([
3172
    constants.SSH_KNOWN_HOSTS_FILE,
3173
    constants.CONFD_HMAC_KEY,
3174
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3175
    ])
3176

    
3177
  if not redist:
3178
    files_all.update(constants.ALL_CERT_FILES)
3179
    files_all.update(ssconf.SimpleStore().GetFileList())
3180

    
3181
  if cluster.modify_etc_hosts:
3182
    files_all.add(constants.ETC_HOSTS)
3183

    
3184
  # Files which must either exist on all nodes or on none
3185
  files_all_opt = set([
3186
    constants.RAPI_USERS_FILE,
3187
    ])
3188

    
3189
  # Files which should only be on master candidates
3190
  files_mc = set()
3191
  if not redist:
3192
    files_mc.add(constants.CLUSTER_CONF_FILE)
3193

    
3194
  # Files which should only be on VM-capable nodes
3195
  files_vm = set(filename
3196
    for hv_name in cluster.enabled_hypervisors
3197
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3198

    
3199
  # Filenames must be unique
3200
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3201
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3202
         "Found file listed in more than one file list"
3203

    
3204
  return (files_all, files_all_opt, files_mc, files_vm)
3205

    
3206

    
3207
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3208
  """Distribute additional files which are part of the cluster configuration.
3209

3210
  ConfigWriter takes care of distributing the config and ssconf files, but
3211
  there are more files which should be distributed to all nodes. This function
3212
  makes sure those are copied.
3213

3214
  @param lu: calling logical unit
3215
  @param additional_nodes: list of nodes not in the config to distribute to
3216
  @type additional_vm: boolean
3217
  @param additional_vm: whether the additional nodes are vm-capable or not
3218

3219
  """
3220
  # Gather target nodes
3221
  cluster = lu.cfg.GetClusterInfo()
3222
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3223

    
3224
  online_nodes = lu.cfg.GetOnlineNodeList()
3225
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3226

    
3227
  if additional_nodes is not None:
3228
    online_nodes.extend(additional_nodes)
3229
    if additional_vm:
3230
      vm_nodes.extend(additional_nodes)
3231

    
3232
  # Never distribute to master node
3233
  for nodelist in [online_nodes, vm_nodes]:
3234
    if master_info.name in nodelist:
3235
      nodelist.remove(master_info.name)
3236

    
3237
  # Gather file lists
3238
  (files_all, files_all_opt, files_mc, files_vm) = \
3239
    _ComputeAncillaryFiles(cluster, True)
3240

    
3241
  # Never re-distribute configuration file from here
3242
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3243
              constants.CLUSTER_CONF_FILE in files_vm)
3244
  assert not files_mc, "Master candidates not handled in this function"
3245

    
3246
  filemap = [
3247
    (online_nodes, files_all),
3248
    (online_nodes, files_all_opt),
3249
    (vm_nodes, files_vm),
3250
    ]
3251

    
3252
  # Upload the files
3253
  for (node_list, files) in filemap:
3254
    for fname in files:
3255
      _UploadHelper(lu, node_list, fname)
3256

    
3257

    
3258
class LUClusterRedistConf(NoHooksLU):
3259
  """Force the redistribution of cluster configuration.
3260

3261
  This is a very simple LU.
3262

3263
  """
3264
  REQ_BGL = False
3265

    
3266
  def ExpandNames(self):
3267
    self.needed_locks = {
3268
      locking.LEVEL_NODE: locking.ALL_SET,
3269
    }
3270
    self.share_locks[locking.LEVEL_NODE] = 1
3271

    
3272
  def Exec(self, feedback_fn):
3273
    """Redistribute the configuration.
3274

3275
    """
3276
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3277
    _RedistributeAncillaryFiles(self)
3278

    
3279

    
3280
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3281
  """Sleep and poll for an instance's disk to sync.
3282

3283
  """
3284
  if not instance.disks or disks is not None and not disks:
3285
    return True
3286

    
3287
  disks = _ExpandCheckDisks(instance, disks)
3288

    
3289
  if not oneshot:
3290
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3291

    
3292
  node = instance.primary_node
3293

    
3294
  for dev in disks:
3295
    lu.cfg.SetDiskID(dev, node)
3296

    
3297
  # TODO: Convert to utils.Retry
3298

    
3299
  retries = 0
3300
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3301
  while True:
3302
    max_time = 0
3303
    done = True
3304
    cumul_degraded = False
3305
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3306
    msg = rstats.fail_msg
3307
    if msg:
3308
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3309
      retries += 1
3310
      if retries >= 10:
3311
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3312
                                 " aborting." % node)
3313
      time.sleep(6)
3314
      continue
3315
    rstats = rstats.payload
3316
    retries = 0
3317
    for i, mstat in enumerate(rstats):
3318
      if mstat is None:
3319
        lu.LogWarning("Can't compute data for node %s/%s",
3320
                           node, disks[i].iv_name)
3321
        continue
3322

    
3323
      cumul_degraded = (cumul_degraded or
3324
                        (mstat.is_degraded and mstat.sync_percent is None))
3325
      if mstat.sync_percent is not None:
3326
        done = False
3327
        if mstat.estimated_time is not None:
3328
          rem_time = ("%s remaining (estimated)" %
3329
                      utils.FormatSeconds(mstat.estimated_time))
3330
          max_time = mstat.estimated_time
3331
        else:
3332
          rem_time = "no time estimate"
3333
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3334
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3335

    
3336
    # if we're done but degraded, let's do a few small retries, to
3337
    # make sure we see a stable and not transient situation; therefore
3338
    # we force restart of the loop
3339
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3340
      logging.info("Degraded disks found, %d retries left", degr_retries)
3341
      degr_retries -= 1
3342
      time.sleep(1)
3343
      continue
3344

    
3345
    if done or oneshot:
3346
      break
3347

    
3348
    time.sleep(min(60, max_time))
3349

    
3350
  if done:
3351
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3352
  return not cumul_degraded
3353

    
3354

    
3355
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3356
  """Check that mirrors are not degraded.
3357

3358
  The ldisk parameter, if True, will change the test from the
3359
  is_degraded attribute (which represents overall non-ok status for
3360
  the device(s)) to the ldisk (representing the local storage status).
3361

3362
  """
3363
  lu.cfg.SetDiskID(dev, node)
3364

    
3365
  result = True
3366

    
3367
  if on_primary or dev.AssembleOnSecondary():
3368
    rstats = lu.rpc.call_blockdev_find(node, dev)
3369
    msg = rstats.fail_msg
3370
    if msg:
3371
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3372
      result = False
3373
    elif not rstats.payload:
3374
      lu.LogWarning("Can't find disk on node %s", node)
3375
      result = False
3376
    else:
3377
      if ldisk:
3378
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3379
      else:
3380
        result = result and not rstats.payload.is_degraded
3381

    
3382
  if dev.children:
3383
    for child in dev.children:
3384
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3385

    
3386
  return result
3387

    
3388

    
3389
class LUOobCommand(NoHooksLU):
3390
  """Logical unit for OOB handling.
3391

3392
  """
3393
  REG_BGL = False
3394
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3395

    
3396
  def CheckPrereq(self):
3397
    """Check prerequisites.
3398

3399
    This checks:
3400
     - the node exists in the configuration
3401
     - OOB is supported
3402

3403
    Any errors are signaled by raising errors.OpPrereqError.
3404

3405
    """
3406
    self.nodes = []
3407
    self.master_node = self.cfg.GetMasterNode()
3408

    
3409
    assert self.op.power_delay >= 0.0
3410

    
3411
    if self.op.node_names:
3412
      if self.op.command in self._SKIP_MASTER:
3413
        if self.master_node in self.op.node_names:
3414
          master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3415
          master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3416

    
3417
          if master_oob_handler:
3418
            additional_text = ("Run '%s %s %s' if you want to operate on the"
3419
                               " master regardless") % (master_oob_handler,
3420
                                                        self.op.command,
3421
                                                        self.master_node)
3422
          else:
3423
            additional_text = "The master node does not support out-of-band"
3424

    
3425
          raise errors.OpPrereqError(("Operating on the master node %s is not"
3426
                                      " allowed for %s\n%s") %
3427
                                     (self.master_node, self.op.command,
3428
                                      additional_text), errors.ECODE_INVAL)
3429
    else:
3430
      self.op.node_names = self.cfg.GetNodeList()
3431
      if self.op.command in self._SKIP_MASTER:
3432
        self.op.node_names.remove(self.master_node)
3433

    
3434
    if self.op.command in self._SKIP_MASTER:
3435
      assert self.master_node not in self.op.node_names
3436

    
3437
    for node_name in self.op.node_names:
3438
      node = self.cfg.GetNodeInfo(node_name)
3439

    
3440
      if node is None:
3441
        raise errors.OpPrereqError("Node %s not found" % node_name,
3442
                                   errors.ECODE_NOENT)
3443
      else:
3444
        self.nodes.append(node)
3445

    
3446
      if (not self.op.ignore_status and
3447
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3448
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3449
                                    " not marked offline") % node_name,
3450
                                   errors.ECODE_STATE)
3451

    
3452
  def ExpandNames(self):
3453
    """Gather locks we need.
3454

3455
    """
3456
    if self.op.node_names:
3457
      self.op.node_names = [_ExpandNodeName(self.cfg, name)
3458
                            for name in self.op.node_names]
3459
      lock_names = self.op.node_names
3460
    else:
3461
      lock_names = locking.ALL_SET
3462

    
3463
    self.needed_locks = {
3464
      locking.LEVEL_NODE: lock_names,
3465
      }
3466

    
3467
  def Exec(self, feedback_fn):
3468
    """Execute OOB and return result if we expect any.
3469

3470
    """
3471
    master_node = self.master_node
3472
    ret = []
3473

    
3474
    for idx, node in enumerate(self.nodes):
3475
      node_entry = [(constants.RS_NORMAL, node.name)]
3476
      ret.append(node_entry)
3477

    
3478
      oob_program = _SupportsOob(self.cfg, node)
3479

    
3480
      if not oob_program:
3481
        node_entry.append((constants.RS_UNAVAIL, None))
3482
        continue
3483

    
3484
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3485
                   self.op.command, oob_program, node.name)
3486
      result = self.rpc.call_run_oob(master_node, oob_program,
3487
                                     self.op.command, node.name,
3488
                                     self.op.timeout)
3489

    
3490
      if result.fail_msg:
3491
        self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3492
                        node.name, result.fail_msg)
3493
        node_entry.append((constants.RS_NODATA, None))
3494
      else:
3495
        try:
3496
          self._CheckPayload(result)
3497
        except errors.OpExecError, err:
3498
          self.LogWarning("The payload returned by '%s' is not valid: %s",
3499
                          node.name, err)
3500
          node_entry.append((constants.RS_NODATA, None))
3501
        else:
3502
          if self.op.command == constants.OOB_HEALTH:
3503
            # For health we should log important events
3504
            for item, status in result.payload:
3505
              if status in [constants.OOB_STATUS_WARNING,
3506
                            constants.OOB_STATUS_CRITICAL]:
3507
                self.LogWarning("On node '%s' item '%s' has status '%s'",
3508
                                node.name, item, status)
3509

    
3510
          if self.op.command == constants.OOB_POWER_ON:
3511
            node.powered = True
3512
          elif self.op.command == constants.OOB_POWER_OFF:
3513
            node.powered = False
3514
          elif self.op.command == constants.OOB_POWER_STATUS:
3515
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3516
            if powered != node.powered:
3517
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3518
                               " match actual power state (%s)"), node.powered,
3519
                              node.name, powered)
3520

    
3521
          # For configuration changing commands we should update the node
3522
          if self.op.command in (constants.OOB_POWER_ON,
3523
                                 constants.OOB_POWER_OFF):
3524
            self.cfg.Update(node, feedback_fn)
3525

    
3526
          node_entry.append((constants.RS_NORMAL, result.payload))
3527

    
3528
          if (self.op.command == constants.OOB_POWER_ON and
3529
              idx < len(self.nodes) - 1):
3530
            time.sleep(self.op.power_delay)
3531

    
3532
    return ret
3533

    
3534
  def _CheckPayload(self, result):
3535
    """Checks if the payload is valid.
3536

3537
    @param result: RPC result
3538
    @raises errors.OpExecError: If payload is not valid
3539

3540
    """
3541
    errs = []
3542
    if self.op.command == constants.OOB_HEALTH:
3543
      if not isinstance(result.payload, list):
3544
        errs.append("command 'health' is expected to return a list but got %s" %
3545
                    type(result.payload))
3546
      else:
3547
        for item, status in result.payload:
3548
          if status not in constants.OOB_STATUSES:
3549
            errs.append("health item '%s' has invalid status '%s'" %
3550
                        (item, status))
3551

    
3552
    if self.op.command == constants.OOB_POWER_STATUS:
3553
      if not isinstance(result.payload, dict):
3554
        errs.append("power-status is expected to return a dict but got %s" %
3555
                    type(result.payload))
3556

    
3557
    if self.op.command in [
3558
        constants.OOB_POWER_ON,
3559
        constants.OOB_POWER_OFF,
3560
        constants.OOB_POWER_CYCLE,
3561
        ]:
3562
      if result.payload is not None:
3563
        errs.append("%s is expected to not return payload but got '%s'" %
3564
                    (self.op.command, result.payload))
3565

    
3566
    if errs:
3567
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3568
                               utils.CommaJoin(errs))
3569

    
3570
class _OsQuery(_QueryBase):
3571
  FIELDS = query.OS_FIELDS
3572

    
3573
  def ExpandNames(self, lu):
3574
    # Lock all nodes in shared mode
3575
    # Temporary removal of locks, should be reverted later
3576
    # TODO: reintroduce locks when they are lighter-weight
3577
    lu.needed_locks = {}
3578
    #self.share_locks[locking.LEVEL_NODE] = 1
3579
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3580

    
3581
    # The following variables interact with _QueryBase._GetNames
3582
    if self.names:
3583
      self.wanted = self.names
3584
    else:
3585
      self.wanted = locking.ALL_SET
3586

    
3587
    self.do_locking = self.use_locking
3588

    
3589
  def DeclareLocks(self, lu, level):
3590
    pass
3591

    
3592
  @staticmethod
3593
  def _DiagnoseByOS(rlist):
3594
    """Remaps a per-node return list into an a per-os per-node dictionary
3595

3596
    @param rlist: a map with node names as keys and OS objects as values
3597

3598
    @rtype: dict
3599
    @return: a dictionary with osnames as keys and as value another
3600
        map, with nodes as keys and tuples of (path, status, diagnose,
3601
        variants, parameters, api_versions) as values, eg::
3602

3603
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3604
                                     (/srv/..., False, "invalid api")],
3605
                           "node2": [(/srv/..., True, "", [], [])]}
3606
          }
3607

3608
    """
3609
    all_os = {}
3610
    # we build here the list of nodes that didn't fail the RPC (at RPC
3611
    # level), so that nodes with a non-responding node daemon don't
3612
    # make all OSes invalid
3613
    good_nodes = [node_name for node_name in rlist
3614
                  if not rlist[node_name].fail_msg]
3615
    for node_name, nr in rlist.items():
3616
      if nr.fail_msg or not nr.payload:
3617
        continue
3618
      for (name, path, status, diagnose, variants,
3619
           params, api_versions) in nr.payload:
3620
        if name not in all_os:
3621
          # build a list of nodes for this os containing empty lists
3622
          # for each node in node_list
3623
          all_os[name] = {}
3624
          for nname in good_nodes:
3625
            all_os[name][nname] = []
3626
        # convert params from [name, help] to (name, help)
3627
        params = [tuple(v) for v in params]
3628
        all_os[name][node_name].append((path, status, diagnose,
3629
                                        variants, params, api_versions))
3630
    return all_os
3631

    
3632
  def _GetQueryData(self, lu):
3633
    """Computes the list of nodes and their attributes.
3634

3635
    """
3636
    # Locking is not used
3637
    assert not (lu.acquired_locks or self.do_locking or self.use_locking)
3638

    
3639
    valid_nodes = [node.name
3640
                   for node in lu.cfg.GetAllNodesInfo().values()
3641
                   if not node.offline and node.vm_capable]
3642
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3643
    cluster = lu.cfg.GetClusterInfo()
3644

    
3645
    data = {}
3646

    
3647
    for (os_name, os_data) in pol.items():
3648
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3649
                          hidden=(os_name in cluster.hidden_os),
3650
                          blacklisted=(os_name in cluster.blacklisted_os))
3651

    
3652
      variants = set()
3653
      parameters = set()
3654
      api_versions = set()
3655

    
3656
      for idx, osl in enumerate(os_data.values()):
3657
        info.valid = bool(info.valid and osl and osl[0][1])
3658
        if not info.valid:
3659
          break
3660

    
3661
        (node_variants, node_params, node_api) = osl[0][3:6]
3662
        if idx == 0:
3663
          # First entry
3664
          variants.update(node_variants)
3665
          parameters.update(node_params)
3666
          api_versions.update(node_api)
3667
        else:
3668
          # Filter out inconsistent values
3669
          variants.intersection_update(node_variants)
3670
          parameters.intersection_update(node_params)
3671
          api_versions.intersection_update(node_api)
3672

    
3673
      info.variants = list(variants)
3674
      info.parameters = list(parameters)
3675
      info.api_versions = list(api_versions)
3676

    
3677
      data[os_name] = info
3678

    
3679
    # Prepare data in requested order
3680
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3681
            if name in data]
3682

    
3683

    
3684
class LUOsDiagnose(NoHooksLU):
3685
  """Logical unit for OS diagnose/query.
3686

3687
  """
3688
  REQ_BGL = False
3689

    
3690
  @staticmethod
3691
  def _BuildFilter(fields, names):
3692
    """Builds a filter for querying OSes.
3693

3694
    """
3695
    name_filter = qlang.MakeSimpleFilter("name", names)
3696

    
3697
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3698
    # respective field is not requested
3699
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3700
                     for fname in ["hidden", "blacklisted"]
3701
                     if fname not in fields]
3702
    if "valid" not in fields:
3703
      status_filter.append([qlang.OP_TRUE, "valid"])
3704

    
3705
    if status_filter:
3706
      status_filter.insert(0, qlang.OP_AND)
3707
    else:
3708
      status_filter = None
3709

    
3710
    if name_filter and status_filter:
3711
      return [qlang.OP_AND, name_filter, status_filter]
3712
    elif name_filter:
3713
      return name_filter
3714
    else:
3715
      return status_filter
3716

    
3717
  def CheckArguments(self):
3718
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3719
                       self.op.output_fields, False)
3720

    
3721
  def ExpandNames(self):
3722
    self.oq.ExpandNames(self)
3723

    
3724
  def Exec(self, feedback_fn):
3725
    return self.oq.OldStyleQuery(self)
3726

    
3727

    
3728
class LUNodeRemove(LogicalUnit):
3729
  """Logical unit for removing a node.
3730

3731
  """
3732
  HPATH = "node-remove"
3733
  HTYPE = constants.HTYPE_NODE
3734

    
3735
  def BuildHooksEnv(self):
3736
    """Build hooks env.
3737

3738
    This doesn't run on the target node in the pre phase as a failed
3739
    node would then be impossible to remove.
3740

3741
    """
3742
    return {
3743
      "OP_TARGET": self.op.node_name,
3744
      "NODE_NAME": self.op.node_name,
3745
      }
3746

    
3747
  def BuildHooksNodes(self):
3748
    """Build hooks nodes.
3749

3750
    """
3751
    all_nodes = self.cfg.GetNodeList()
3752
    try:
3753
      all_nodes.remove(self.op.node_name)
3754
    except ValueError:
3755
      logging.warning("Node '%s', which is about to be removed, was not found"
3756
                      " in the list of all nodes", self.op.node_name)
3757
    return (all_nodes, all_nodes)
3758

    
3759
  def CheckPrereq(self):
3760
    """Check prerequisites.
3761

3762
    This checks:
3763
     - the node exists in the configuration
3764
     - it does not have primary or secondary instances
3765
     - it's not the master
3766

3767
    Any errors are signaled by raising errors.OpPrereqError.
3768

3769
    """
3770
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3771
    node = self.cfg.GetNodeInfo(self.op.node_name)
3772
    assert node is not None
3773

    
3774
    instance_list = self.cfg.GetInstanceList()
3775

    
3776
    masternode = self.cfg.GetMasterNode()
3777
    if node.name == masternode:
3778
      raise errors.OpPrereqError("Node is the master node,"
3779
                                 " you need to failover first.",
3780
                                 errors.ECODE_INVAL)
3781

    
3782
    for instance_name in instance_list:
3783
      instance = self.cfg.GetInstanceInfo(instance_name)
3784
      if node.name in instance.all_nodes:
3785
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3786
                                   " please remove first." % instance_name,
3787
                                   errors.ECODE_INVAL)
3788
    self.op.node_name = node.name
3789
    self.node = node
3790

    
3791
  def Exec(self, feedback_fn):
3792
    """Removes the node from the cluster.
3793

3794
    """
3795
    node = self.node
3796
    logging.info("Stopping the node daemon and removing configs from node %s",
3797
                 node.name)
3798

    
3799
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3800

    
3801
    # Promote nodes to master candidate as needed
3802
    _AdjustCandidatePool(self, exceptions=[node.name])
3803
    self.context.RemoveNode(node.name)
3804

    
3805
    # Run post hooks on the node before it's removed
3806
    _RunPostHook(self, node.name)
3807

    
3808
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3809
    msg = result.fail_msg
3810
    if msg:
3811
      self.LogWarning("Errors encountered on the remote node while leaving"
3812
                      " the cluster: %s", msg)
3813

    
3814
    # Remove node from our /etc/hosts
3815
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3816
      master_node = self.cfg.GetMasterNode()
3817
      result = self.rpc.call_etc_hosts_modify(master_node,
3818
                                              constants.ETC_HOSTS_REMOVE,
3819
                                              node.name, None)
3820
      result.Raise("Can't update hosts file with new host data")
3821
      _RedistributeAncillaryFiles(self)
3822

    
3823

    
3824
class _NodeQuery(_QueryBase):
3825
  FIELDS = query.NODE_FIELDS
3826

    
3827
  def ExpandNames(self, lu):
3828
    lu.needed_locks = {}
3829
    lu.share_locks[locking.LEVEL_NODE] = 1
3830

    
3831
    if self.names:
3832
      self.wanted = _GetWantedNodes(lu, self.names)
3833
    else:
3834
      self.wanted = locking.ALL_SET
3835

    
3836
    self.do_locking = (self.use_locking and
3837
                       query.NQ_LIVE in self.requested_data)
3838

    
3839
    if self.do_locking:
3840
      # if we don't request only static fields, we need to lock the nodes
3841
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3842

    
3843
  def DeclareLocks(self, lu, level):
3844
    pass
3845

    
3846
  def _GetQueryData(self, lu):
3847
    """Computes the list of nodes and their attributes.
3848

3849
    """
3850
    all_info = lu.cfg.GetAllNodesInfo()
3851

    
3852
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3853

    
3854
    # Gather data as requested
3855
    if query.NQ_LIVE in self.requested_data:
3856
      # filter out non-vm_capable nodes
3857
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3858

    
3859
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3860
                                        lu.cfg.GetHypervisorType())
3861
      live_data = dict((name, nresult.payload)
3862
                       for (name, nresult) in node_data.items()
3863
                       if not nresult.fail_msg and nresult.payload)
3864
    else:
3865
      live_data = None
3866

    
3867
    if query.NQ_INST in self.requested_data:
3868
      node_to_primary = dict([(name, set()) for name in nodenames])
3869
      node_to_secondary = dict([(name, set()) for name in nodenames])
3870

    
3871
      inst_data = lu.cfg.GetAllInstancesInfo()
3872

    
3873
      for inst in inst_data.values():
3874
        if inst.primary_node in node_to_primary:
3875
          node_to_primary[inst.primary_node].add(inst.name)
3876
        for secnode in inst.secondary_nodes:
3877
          if secnode in node_to_secondary:
3878
            node_to_secondary[secnode].add(inst.name)
3879
    else:
3880
      node_to_primary = None
3881
      node_to_secondary = None
3882

    
3883
    if query.NQ_OOB in self.requested_data:
3884
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3885
                         for name, node in all_info.iteritems())
3886
    else:
3887
      oob_support = None
3888

    
3889
    if query.NQ_GROUP in self.requested_data:
3890
      groups = lu.cfg.GetAllNodeGroupsInfo()
3891
    else:
3892
      groups = {}
3893

    
3894
    return query.NodeQueryData([all_info[name] for name in nodenames],
3895
                               live_data, lu.cfg.GetMasterNode(),
3896
                               node_to_primary, node_to_secondary, groups,
3897
                               oob_support, lu.cfg.GetClusterInfo())
3898

    
3899

    
3900
class LUNodeQuery(NoHooksLU):
3901
  """Logical unit for querying nodes.
3902

3903
  """
3904
  # pylint: disable-msg=W0142
3905
  REQ_BGL = False
3906

    
3907
  def CheckArguments(self):
3908
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3909
                         self.op.output_fields, self.op.use_locking)
3910

    
3911
  def ExpandNames(self):
3912
    self.nq.ExpandNames(self)
3913

    
3914
  def Exec(self, feedback_fn):
3915
    return self.nq.OldStyleQuery(self)
3916

    
3917

    
3918
class LUNodeQueryvols(NoHooksLU):
3919
  """Logical unit for getting volumes on node(s).
3920

3921
  """
3922
  REQ_BGL = False
3923
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3924
  _FIELDS_STATIC = utils.FieldSet("node")
3925

    
3926
  def CheckArguments(self):
3927
    _CheckOutputFields(static=self._FIELDS_STATIC,
3928
                       dynamic=self._FIELDS_DYNAMIC,
3929
                       selected=self.op.output_fields)
3930

    
3931
  def ExpandNames(self):
3932
    self.needed_locks = {}
3933
    self.share_locks[locking.LEVEL_NODE] = 1
3934
    if not self.op.nodes:
3935
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3936
    else:
3937
      self.needed_locks[locking.LEVEL_NODE] = \
3938
        _GetWantedNodes(self, self.op.nodes)
3939

    
3940
  def Exec(self, feedback_fn):
3941
    """Computes the list of nodes and their attributes.
3942

3943
    """
3944
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3945
    volumes = self.rpc.call_node_volumes(nodenames)
3946

    
3947
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3948
             in self.cfg.GetInstanceList()]
3949

    
3950
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3951

    
3952
    output = []
3953
    for node in nodenames:
3954
      nresult = volumes[node]
3955
      if nresult.offline:
3956
        continue
3957
      msg = nresult.fail_msg
3958
      if msg:
3959
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3960
        continue
3961

    
3962
      node_vols = nresult.payload[:]
3963
      node_vols.sort(key=lambda vol: vol['dev'])
3964

    
3965
      for vol in node_vols:
3966
        node_output = []
3967
        for field in self.op.output_fields:
3968
          if field == "node":
3969
            val = node
3970
          elif field == "phys":
3971
            val = vol['dev']
3972
          elif field == "vg":
3973
            val = vol['vg']
3974
          elif field == "name":
3975
            val = vol['name']
3976
          elif field == "size":
3977
            val = int(float(vol['size']))
3978
          elif field == "instance":
3979
            for inst in ilist:
3980
              if node not in lv_by_node[inst]:
3981
                continue
3982
              if vol['name'] in lv_by_node[inst][node]:
3983
                val = inst.name
3984
                break
3985
            else:
3986
              val = '-'
3987
          else:
3988
            raise errors.ParameterError(field)
3989
          node_output.append(str(val))
3990

    
3991
        output.append(node_output)
3992

    
3993
    return output
3994

    
3995

    
3996
class LUNodeQueryStorage(NoHooksLU):
3997
  """Logical unit for getting information on storage units on node(s).
3998

3999
  """
4000
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4001
  REQ_BGL = False
4002

    
4003
  def CheckArguments(self):
4004
    _CheckOutputFields(static=self._FIELDS_STATIC,
4005
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4006
                       selected=self.op.output_fields)
4007

    
4008
  def ExpandNames(self):
4009
    self.needed_locks = {}
4010
    self.share_locks[locking.LEVEL_NODE] = 1
4011

    
4012
    if self.op.nodes:
4013
      self.needed_locks[locking.LEVEL_NODE] = \
4014
        _GetWantedNodes(self, self.op.nodes)
4015
    else:
4016
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4017

    
4018
  def Exec(self, feedback_fn):
4019
    """Computes the list of nodes and their attributes.
4020

4021
    """
4022
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
4023

    
4024
    # Always get name to sort by
4025
    if constants.SF_NAME in self.op.output_fields:
4026
      fields = self.op.output_fields[:]
4027
    else:
4028
      fields = [constants.SF_NAME] + self.op.output_fields
4029

    
4030
    # Never ask for node or type as it's only known to the LU
4031
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4032
      while extra in fields:
4033
        fields.remove(extra)
4034

    
4035
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4036
    name_idx = field_idx[constants.SF_NAME]
4037

    
4038
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4039
    data = self.rpc.call_storage_list(self.nodes,
4040
                                      self.op.storage_type, st_args,
4041
                                      self.op.name, fields)
4042

    
4043
    result = []
4044

    
4045
    for node in utils.NiceSort(self.nodes):
4046
      nresult = data[node]
4047
      if nresult.offline:
4048
        continue
4049

    
4050
      msg = nresult.fail_msg
4051
      if msg:
4052
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4053
        continue
4054

    
4055
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4056

    
4057
      for name in utils.NiceSort(rows.keys()):
4058
        row = rows[name]
4059

    
4060
        out = []
4061

    
4062
        for field in self.op.output_fields:
4063
          if field == constants.SF_NODE:
4064
            val = node
4065
          elif field == constants.SF_TYPE:
4066
            val = self.op.storage_type
4067
          elif field in field_idx:
4068
            val = row[field_idx[field]]
4069
          else:
4070
            raise errors.ParameterError(field)
4071

    
4072
          out.append(val)
4073

    
4074
        result.append(out)
4075

    
4076
    return result
4077

    
4078

    
4079
class _InstanceQuery(_QueryBase):
4080
  FIELDS = query.INSTANCE_FIELDS
4081

    
4082
  def ExpandNames(self, lu):
4083
    lu.needed_locks = {}
4084
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
4085
    lu.share_locks[locking.LEVEL_NODE] = 1
4086

    
4087
    if self.names:
4088
      self.wanted = _GetWantedInstances(lu, self.names)
4089
    else:
4090
      self.wanted = locking.ALL_SET
4091

    
4092
    self.do_locking = (self.use_locking and
4093
                       query.IQ_LIVE in self.requested_data)
4094
    if self.do_locking:
4095
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4096
      lu.needed_locks[locking.LEVEL_NODE] = []
4097
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4098

    
4099
  def DeclareLocks(self, lu, level):
4100
    if level == locking.LEVEL_NODE and self.do_locking:
4101
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
4102

    
4103
  def _GetQueryData(self, lu):
4104
    """Computes the list of instances and their attributes.
4105

4106
    """
4107
    cluster = lu.cfg.GetClusterInfo()
4108
    all_info = lu.cfg.GetAllInstancesInfo()
4109

    
4110
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4111

    
4112
    instance_list = [all_info[name] for name in instance_names]
4113
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4114
                                        for inst in instance_list)))
4115
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4116
    bad_nodes = []
4117
    offline_nodes = []
4118
    wrongnode_inst = set()
4119

    
4120
    # Gather data as requested
4121
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4122
      live_data = {}
4123
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4124
      for name in nodes:
4125
        result = node_data[name]
4126
        if result.offline:
4127
          # offline nodes will be in both lists
4128
          assert result.fail_msg
4129
          offline_nodes.append(name)
4130
        if result.fail_msg:
4131
          bad_nodes.append(name)
4132
        elif result.payload:
4133
          for inst in result.payload:
4134
            if inst in all_info:
4135
              if all_info[inst].primary_node == name:
4136
                live_data.update(result.payload)
4137
              else:
4138
                wrongnode_inst.add(inst)
4139
            else:
4140
              # orphan instance; we don't list it here as we don't
4141
              # handle this case yet in the output of instance listing
4142
              logging.warning("Orphan instance '%s' found on node %s",
4143
                              inst, name)
4144
        # else no instance is alive
4145
    else:
4146
      live_data = {}
4147

    
4148
    if query.IQ_DISKUSAGE in self.requested_data:
4149
      disk_usage = dict((inst.name,
4150
                         _ComputeDiskSize(inst.disk_template,
4151
                                          [{constants.IDISK_SIZE: disk.size}
4152
                                           for disk in inst.disks]))
4153
                        for inst in instance_list)
4154
    else:
4155
      disk_usage = None
4156

    
4157
    if query.IQ_CONSOLE in self.requested_data:
4158
      consinfo = {}
4159
      for inst in instance_list:
4160
        if inst.name in live_data:
4161
          # Instance is running
4162
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4163
        else:
4164
          consinfo[inst.name] = None
4165
      assert set(consinfo.keys()) == set(instance_names)
4166
    else:
4167
      consinfo = None
4168

    
4169
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4170
                                   disk_usage, offline_nodes, bad_nodes,
4171
                                   live_data, wrongnode_inst, consinfo)
4172

    
4173

    
4174
class LUQuery(NoHooksLU):
4175
  """Query for resources/items of a certain kind.
4176

4177
  """
4178
  # pylint: disable-msg=W0142
4179
  REQ_BGL = False
4180

    
4181
  def CheckArguments(self):
4182
    qcls = _GetQueryImplementation(self.op.what)
4183

    
4184
    self.impl = qcls(self.op.filter, self.op.fields, False)
4185

    
4186
  def ExpandNames(self):
4187
    self.impl.ExpandNames(self)
4188

    
4189
  def DeclareLocks(self, level):
4190
    self.impl.DeclareLocks(self, level)
4191

    
4192
  def Exec(self, feedback_fn):
4193
    return self.impl.NewStyleQuery(self)
4194

    
4195

    
4196
class LUQueryFields(NoHooksLU):
4197
  """Query for resources/items of a certain kind.
4198

4199
  """
4200
  # pylint: disable-msg=W0142
4201
  REQ_BGL = False
4202

    
4203
  def CheckArguments(self):
4204
    self.qcls = _GetQueryImplementation(self.op.what)
4205

    
4206
  def ExpandNames(self):
4207
    self.needed_locks = {}
4208

    
4209
  def Exec(self, feedback_fn):
4210
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4211

    
4212

    
4213
class LUNodeModifyStorage(NoHooksLU):
4214
  """Logical unit for modifying a storage volume on a node.
4215

4216
  """
4217
  REQ_BGL = False
4218

    
4219
  def CheckArguments(self):
4220
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4221

    
4222
    storage_type = self.op.storage_type
4223

    
4224
    try:
4225
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4226
    except KeyError:
4227
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4228
                                 " modified" % storage_type,
4229
                                 errors.ECODE_INVAL)
4230

    
4231
    diff = set(self.op.changes.keys()) - modifiable
4232
    if diff:
4233
      raise errors.OpPrereqError("The following fields can not be modified for"
4234
                                 " storage units of type '%s': %r" %
4235
                                 (storage_type, list(diff)),
4236
                                 errors.ECODE_INVAL)
4237

    
4238
  def ExpandNames(self):
4239
    self.needed_locks = {
4240
      locking.LEVEL_NODE: self.op.node_name,
4241
      }
4242

    
4243
  def Exec(self, feedback_fn):
4244
    """Computes the list of nodes and their attributes.
4245

4246
    """
4247
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4248
    result = self.rpc.call_storage_modify(self.op.node_name,
4249
                                          self.op.storage_type, st_args,
4250
                                          self.op.name, self.op.changes)
4251
    result.Raise("Failed to modify storage unit '%s' on %s" %
4252
                 (self.op.name, self.op.node_name))
4253

    
4254

    
4255
class LUNodeAdd(LogicalUnit):
4256
  """Logical unit for adding node to the cluster.
4257

4258
  """
4259
  HPATH = "node-add"
4260
  HTYPE = constants.HTYPE_NODE
4261
  _NFLAGS = ["master_capable", "vm_capable"]
4262

    
4263
  def CheckArguments(self):
4264
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4265
    # validate/normalize the node name
4266
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4267
                                         family=self.primary_ip_family)
4268
    self.op.node_name = self.hostname.name
4269
    if self.op.readd and self.op.group:
4270
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4271
                                 " being readded", errors.ECODE_INVAL)
4272

    
4273
  def BuildHooksEnv(self):
4274
    """Build hooks env.
4275

4276
    This will run on all nodes before, and on all nodes + the new node after.
4277

4278
    """
4279
    return {
4280
      "OP_TARGET": self.op.node_name,
4281
      "NODE_NAME": self.op.node_name,
4282
      "NODE_PIP": self.op.primary_ip,
4283
      "NODE_SIP": self.op.secondary_ip,
4284
      "MASTER_CAPABLE": str(self.op.master_capable),
4285
      "VM_CAPABLE": str(self.op.vm_capable),
4286
      }
4287

    
4288
  def BuildHooksNodes(self):
4289
    """Build hooks nodes.
4290

4291
    """
4292
    # Exclude added node
4293
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4294
    post_nodes = pre_nodes + [self.op.node_name, ]
4295

    
4296
    return (pre_nodes, post_nodes)
4297

    
4298
  def CheckPrereq(self):
4299
    """Check prerequisites.
4300

4301
    This checks:
4302
     - the new node is not already in the config
4303
     - it is resolvable
4304
     - its parameters (single/dual homed) matches the cluster
4305

4306
    Any errors are signaled by raising errors.OpPrereqError.
4307

4308
    """
4309
    cfg = self.cfg
4310
    hostname = self.hostname
4311
    node = hostname.name
4312
    primary_ip = self.op.primary_ip = hostname.ip
4313
    if self.op.secondary_ip is None:
4314
      if self.primary_ip_family == netutils.IP6Address.family:
4315
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4316
                                   " IPv4 address must be given as secondary",
4317
                                   errors.ECODE_INVAL)
4318
      self.op.secondary_ip = primary_ip
4319

    
4320
    secondary_ip = self.op.secondary_ip
4321
    if not netutils.IP4Address.IsValid(secondary_ip):
4322
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4323
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4324

    
4325
    node_list = cfg.GetNodeList()
4326
    if not self.op.readd and node in node_list:
4327
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4328
                                 node, errors.ECODE_EXISTS)
4329
    elif self.op.readd and node not in node_list:
4330
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4331
                                 errors.ECODE_NOENT)
4332

    
4333
    self.changed_primary_ip = False
4334

    
4335
    for existing_node_name in node_list:
4336
      existing_node = cfg.GetNodeInfo(existing_node_name)
4337

    
4338
      if self.op.readd and node == existing_node_name:
4339
        if existing_node.secondary_ip != secondary_ip:
4340
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4341
                                     " address configuration as before",
4342
                                     errors.ECODE_INVAL)
4343
        if existing_node.primary_ip != primary_ip:
4344
          self.changed_primary_ip = True
4345

    
4346
        continue
4347

    
4348
      if (existing_node.primary_ip == primary_ip or
4349
          existing_node.secondary_ip == primary_ip or
4350
          existing_node.primary_ip == secondary_ip or
4351
          existing_node.secondary_ip == secondary_ip):
4352
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4353
                                   " existing node %s" % existing_node.name,
4354
                                   errors.ECODE_NOTUNIQUE)
4355

    
4356
    # After this 'if' block, None is no longer a valid value for the
4357
    # _capable op attributes
4358
    if self.op.readd:
4359
      old_node = self.cfg.GetNodeInfo(node)
4360
      assert old_node is not None, "Can't retrieve locked node %s" % node
4361
      for attr in self._NFLAGS:
4362
        if getattr(self.op, attr) is None:
4363
          setattr(self.op, attr, getattr(old_node, attr))
4364
    else:
4365
      for attr in self._NFLAGS:
4366
        if getattr(self.op, attr) is None:
4367
          setattr(self.op, attr, True)
4368

    
4369
    if self.op.readd and not self.op.vm_capable:
4370
      pri, sec = cfg.GetNodeInstances(node)
4371
      if pri or sec:
4372
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4373
                                   " flag set to false, but it already holds"
4374
                                   " instances" % node,
4375
                                   errors.ECODE_STATE)
4376

    
4377
    # check that the type of the node (single versus dual homed) is the
4378
    # same as for the master
4379
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4380
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4381
    newbie_singlehomed = secondary_ip == primary_ip
4382
    if master_singlehomed != newbie_singlehomed:
4383
      if master_singlehomed:
4384
        raise errors.OpPrereqError("The master has no secondary ip but the"
4385
                                   " new node has one",
4386
                                   errors.ECODE_INVAL)
4387
      else:
4388
        raise errors.OpPrereqError("The master has a secondary ip but the"
4389
                                   " new node doesn't have one",
4390
                                   errors.ECODE_INVAL)
4391

    
4392
    # checks reachability
4393
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4394
      raise errors.OpPrereqError("Node not reachable by ping",
4395
                                 errors.ECODE_ENVIRON)
4396

    
4397
    if not newbie_singlehomed:
4398
      # check reachability from my secondary ip to newbie's secondary ip
4399
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4400
                           source=myself.secondary_ip):
4401
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4402
                                   " based ping to node daemon port",
4403
                                   errors.ECODE_ENVIRON)
4404

    
4405
    if self.op.readd:
4406
      exceptions = [node]
4407
    else:
4408
      exceptions = []
4409

    
4410
    if self.op.master_capable:
4411
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4412
    else:
4413
      self.master_candidate = False
4414

    
4415
    if self.op.readd:
4416
      self.new_node = old_node
4417
    else:
4418
      node_group = cfg.LookupNodeGroup(self.op.group)
4419
      self.new_node = objects.Node(name=node,
4420
                                   primary_ip=primary_ip,
4421
                                   secondary_ip=secondary_ip,
4422
                                   master_candidate=self.master_candidate,
4423
                                   offline=False, drained=False,
4424
                                   group=node_group)
4425

    
4426
    if self.op.ndparams:
4427
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4428

    
4429
  def Exec(self, feedback_fn):
4430
    """Adds the new node to the cluster.
4431

4432
    """
4433
    new_node = self.new_node
4434
    node = new_node.name
4435

    
4436
    # We adding a new node so we assume it's powered
4437
    new_node.powered = True
4438

    
4439
    # for re-adds, reset the offline/drained/master-candidate flags;
4440
    # we need to reset here, otherwise offline would prevent RPC calls
4441
    # later in the procedure; this also means that if the re-add
4442
    # fails, we are left with a non-offlined, broken node
4443
    if self.op.readd:
4444
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4445
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4446
      # if we demote the node, we do cleanup later in the procedure
4447
      new_node.master_candidate = self.master_candidate
4448
      if self.changed_primary_ip:
4449
        new_node.primary_ip = self.op.primary_ip
4450

    
4451
    # copy the master/vm_capable flags
4452
    for attr in self._NFLAGS:
4453
      setattr(new_node, attr, getattr(self.op, attr))
4454

    
4455
    # notify the user about any possible mc promotion
4456
    if new_node.master_candidate:
4457
      self.LogInfo("Node will be a master candidate")
4458

    
4459
    if self.op.ndparams:
4460
      new_node.ndparams = self.op.ndparams
4461
    else:
4462
      new_node.ndparams = {}
4463

    
4464
    # check connectivity
4465
    result = self.rpc.call_version([node])[node]
4466
    result.Raise("Can't get version information from node %s" % node)
4467
    if constants.PROTOCOL_VERSION == result.payload:
4468
      logging.info("Communication to node %s fine, sw version %s match",
4469
                   node, result.payload)
4470
    else:
4471
      raise errors.OpExecError("Version mismatch master version %s,"
4472
                               " node version %s" %
4473
                               (constants.PROTOCOL_VERSION, result.payload))
4474

    
4475
    # Add node to our /etc/hosts, and add key to known_hosts
4476
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4477
      master_node = self.cfg.GetMasterNode()
4478
      result = self.rpc.call_etc_hosts_modify(master_node,
4479
                                              constants.ETC_HOSTS_ADD,
4480
                                              self.hostname.name,
4481
                                              self.hostname.ip)
4482
      result.Raise("Can't update hosts file with new host data")
4483

    
4484
    if new_node.secondary_ip != new_node.primary_ip:
4485
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4486
                               False)
4487

    
4488
    node_verify_list = [self.cfg.GetMasterNode()]
4489
    node_verify_param = {
4490
      constants.NV_NODELIST: [node],
4491
      # TODO: do a node-net-test as well?
4492
    }
4493

    
4494
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4495
                                       self.cfg.GetClusterName())
4496
    for verifier in node_verify_list:
4497
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4498
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4499
      if nl_payload:
4500
        for failed in nl_payload:
4501
          feedback_fn("ssh/hostname verification failed"
4502
                      " (checking from %s): %s" %
4503
                      (verifier, nl_payload[failed]))
4504
        raise errors.OpExecError("ssh/hostname verification failed.")
4505

    
4506
    if self.op.readd:
4507
      _RedistributeAncillaryFiles(self)
4508
      self.context.ReaddNode(new_node)
4509
      # make sure we redistribute the config
4510
      self.cfg.Update(new_node, feedback_fn)
4511
      # and make sure the new node will not have old files around
4512
      if not new_node.master_candidate:
4513
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4514
        msg = result.fail_msg
4515
        if msg:
4516
          self.LogWarning("Node failed to demote itself from master"
4517
                          " candidate status: %s" % msg)
4518
    else:
4519
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4520
                                  additional_vm=self.op.vm_capable)
4521
      self.context.AddNode(new_node, self.proc.GetECId())
4522

    
4523

    
4524
class LUNodeSetParams(LogicalUnit):
4525
  """Modifies the parameters of a node.
4526

4527
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4528
      to the node role (as _ROLE_*)
4529
  @cvar _R2F: a dictionary from node role to tuples of flags
4530
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4531

4532
  """
4533
  HPATH = "node-modify"
4534
  HTYPE = constants.HTYPE_NODE
4535
  REQ_BGL = False
4536
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4537
  _F2R = {
4538
    (True, False, False): _ROLE_CANDIDATE,
4539
    (False, True, False): _ROLE_DRAINED,
4540
    (False, False, True): _ROLE_OFFLINE,
4541
    (False, False, False): _ROLE_REGULAR,
4542
    }
4543
  _R2F = dict((v, k) for k, v in _F2R.items())
4544
  _FLAGS = ["master_candidate", "drained", "offline"]
4545

    
4546
  def CheckArguments(self):
4547
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4548
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4549
                self.op.master_capable, self.op.vm_capable,
4550
                self.op.secondary_ip, self.op.ndparams]
4551
    if all_mods.count(None) == len(all_mods):
4552
      raise errors.OpPrereqError("Please pass at least one modification",
4553
                                 errors.ECODE_INVAL)
4554
    if all_mods.count(True) > 1:
4555
      raise errors.OpPrereqError("Can't set the node into more than one"
4556
                                 " state at the same time",
4557
                                 errors.ECODE_INVAL)
4558

    
4559
    # Boolean value that tells us whether we might be demoting from MC
4560
    self.might_demote = (self.op.master_candidate == False or
4561
                         self.op.offline == True or
4562
                         self.op.drained == True or
4563
                         self.op.master_capable == False)
4564

    
4565
    if self.op.secondary_ip:
4566
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4567
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4568
                                   " address" % self.op.secondary_ip,
4569
                                   errors.ECODE_INVAL)
4570

    
4571
    self.lock_all = self.op.auto_promote and self.might_demote
4572
    self.lock_instances = self.op.secondary_ip is not None
4573

    
4574
  def ExpandNames(self):
4575
    if self.lock_all:
4576
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4577
    else:
4578
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4579

    
4580
    if self.lock_instances:
4581
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4582

    
4583
  def DeclareLocks(self, level):
4584
    # If we have locked all instances, before waiting to lock nodes, release
4585
    # all the ones living on nodes unrelated to the current operation.
4586
    if level == locking.LEVEL_NODE and self.lock_instances:
4587
      instances_release = []
4588
      instances_keep = []
4589
      self.affected_instances = []
4590
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4591
        for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4592
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4593
          i_mirrored = instance.disk_template in constants.DTS_INT_MIRROR
4594
          if i_mirrored and self.op.node_name in instance.all_nodes:
4595
            instances_keep.append(instance_name)
4596
            self.affected_instances.append(instance)
4597
          else:
4598
            instances_release.append(instance_name)
4599
        if instances_release:
4600
          self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4601
          self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4602

    
4603
  def BuildHooksEnv(self):
4604
    """Build hooks env.
4605

4606
    This runs on the master node.
4607

4608
    """
4609
    return {
4610
      "OP_TARGET": self.op.node_name,
4611
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4612
      "OFFLINE": str(self.op.offline),
4613
      "DRAINED": str(self.op.drained),
4614
      "MASTER_CAPABLE": str(self.op.master_capable),
4615
      "VM_CAPABLE": str(self.op.vm_capable),
4616
      }
4617

    
4618
  def BuildHooksNodes(self):
4619
    """Build hooks nodes.
4620

4621
    """
4622
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
4623
    return (nl, nl)
4624

    
4625
  def CheckPrereq(self):
4626
    """Check prerequisites.
4627

4628
    This only checks the instance list against the existing names.
4629

4630
    """
4631
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4632

    
4633
    if (self.op.master_candidate is not None or
4634
        self.op.drained is not None or
4635
        self.op.offline is not None):
4636
      # we can't change the master's node flags
4637
      if self.op.node_name == self.cfg.GetMasterNode():
4638
        raise errors.OpPrereqError("The master role can be changed"
4639
                                   " only via master-failover",
4640
                                   errors.ECODE_INVAL)
4641

    
4642
    if self.op.master_candidate and not node.master_capable:
4643
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4644
                                 " it a master candidate" % node.name,
4645
                                 errors.ECODE_STATE)
4646

    
4647
    if self.op.vm_capable == False:
4648
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4649
      if ipri or isec:
4650
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4651
                                   " the vm_capable flag" % node.name,
4652
                                   errors.ECODE_STATE)
4653

    
4654
    if node.master_candidate and self.might_demote and not self.lock_all:
4655
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
4656
      # check if after removing the current node, we're missing master
4657
      # candidates
4658
      (mc_remaining, mc_should, _) = \
4659
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4660
      if mc_remaining < mc_should:
4661
        raise errors.OpPrereqError("Not enough master candidates, please"
4662
                                   " pass auto promote option to allow"
4663
                                   " promotion", errors.ECODE_STATE)
4664

    
4665
    self.old_flags = old_flags = (node.master_candidate,
4666
                                  node.drained, node.offline)
4667
    assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4668
    self.old_role = old_role = self._F2R[old_flags]
4669

    
4670
    # Check for ineffective changes
4671
    for attr in self._FLAGS:
4672
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4673
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4674
        setattr(self.op, attr, None)
4675

    
4676
    # Past this point, any flag change to False means a transition
4677
    # away from the respective state, as only real changes are kept
4678

    
4679
    # TODO: We might query the real power state if it supports OOB
4680
    if _SupportsOob(self.cfg, node):
4681
      if self.op.offline is False and not (node.powered or
4682
                                           self.op.powered == True):
4683
        raise errors.OpPrereqError(("Please power on node %s first before you"
4684
                                    " can reset offline state") %
4685
                                   self.op.node_name)
4686
    elif self.op.powered is not None:
4687
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
4688
                                  " which does not support out-of-band"
4689
                                  " handling") % self.op.node_name)
4690

    
4691
    # If we're being deofflined/drained, we'll MC ourself if needed
4692
    if (self.op.drained == False or self.op.offline == False or
4693
        (self.op.master_capable and not node.master_capable)):
4694
      if _DecideSelfPromotion(self):
4695
        self.op.master_candidate = True
4696
        self.LogInfo("Auto-promoting node to master candidate")
4697

    
4698
    # If we're no longer master capable, we'll demote ourselves from MC
4699
    if self.op.master_capable == False and node.master_candidate:
4700
      self.LogInfo("Demoting from master candidate")
4701
      self.op.master_candidate = False
4702

    
4703
    # Compute new role
4704
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4705
    if self.op.master_candidate:
4706
      new_role = self._ROLE_CANDIDATE
4707
    elif self.op.drained:
4708
      new_role = self._ROLE_DRAINED
4709
    elif self.op.offline:
4710
      new_role = self._ROLE_OFFLINE
4711
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4712
      # False is still in new flags, which means we're un-setting (the
4713
      # only) True flag
4714
      new_role = self._ROLE_REGULAR
4715
    else: # no new flags, nothing, keep old role
4716
      new_role = old_role
4717

    
4718
    self.new_role = new_role
4719

    
4720
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4721
      # Trying to transition out of offline status
4722
      result = self.rpc.call_version([node.name])[node.name]
4723
      if result.fail_msg:
4724
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4725
                                   " to report its version: %s" %
4726
                                   (node.name, result.fail_msg),
4727
                                   errors.ECODE_STATE)
4728
      else:
4729
        self.LogWarning("Transitioning node from offline to online state"
4730
                        " without using re-add. Please make sure the node"
4731
                        " is healthy!")
4732

    
4733
    if self.op.secondary_ip:
4734
      # Ok even without locking, because this can't be changed by any LU
4735
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4736
      master_singlehomed = master.secondary_ip == master.primary_ip
4737
      if master_singlehomed and self.op.secondary_ip:
4738
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4739
                                   " homed cluster", errors.ECODE_INVAL)
4740

    
4741
      if node.offline:
4742
        if self.affected_instances:
4743
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4744
                                     " node has instances (%s) configured"
4745
                                     " to use it" % self.affected_instances)
4746
      else:
4747
        # On online nodes, check that no instances are running, and that
4748
        # the node has the new ip and we can reach it.
4749
        for instance in self.affected_instances:
4750
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4751

    
4752
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4753
        if master.name != node.name:
4754
          # check reachability from master secondary ip to new secondary ip
4755
          if not netutils.TcpPing(self.op.secondary_ip,
4756
                                  constants.DEFAULT_NODED_PORT,
4757
                                  source=master.secondary_ip):
4758
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4759
                                       " based ping to node daemon port",
4760
                                       errors.ECODE_ENVIRON)
4761

    
4762
    if self.op.ndparams:
4763
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4764
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4765
      self.new_ndparams = new_ndparams
4766

    
4767
  def Exec(self, feedback_fn):
4768
    """Modifies a node.
4769

4770
    """
4771
    node = self.node
4772
    old_role = self.old_role
4773
    new_role = self.new_role
4774

    
4775
    result = []
4776

    
4777
    if self.op.ndparams:
4778
      node.ndparams = self.new_ndparams
4779

    
4780
    if self.op.powered is not None:
4781
      node.powered = self.op.powered
4782

    
4783
    for attr in ["master_capable", "vm_capable"]:
4784
      val = getattr(self.op, attr)
4785
      if val is not None:
4786
        setattr(node, attr, val)
4787
        result.append((attr, str(val)))
4788

    
4789
    if new_role != old_role:
4790
      # Tell the node to demote itself, if no longer MC and not offline
4791
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4792
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4793
        if msg:
4794
          self.LogWarning("Node failed to demote itself: %s", msg)
4795

    
4796
      new_flags = self._R2F[new_role]
4797
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4798
        if of != nf:
4799
          result.append((desc, str(nf)))
4800
      (node.master_candidate, node.drained, node.offline) = new_flags
4801

    
4802
      # we locked all nodes, we adjust the CP before updating this node
4803
      if self.lock_all:
4804
        _AdjustCandidatePool(self, [node.name])
4805

    
4806
    if self.op.secondary_ip:
4807
      node.secondary_ip = self.op.secondary_ip
4808
      result.append(("secondary_ip", self.op.secondary_ip))
4809

    
4810
    # this will trigger configuration file update, if needed
4811
    self.cfg.Update(node, feedback_fn)
4812

    
4813
    # this will trigger job queue propagation or cleanup if the mc
4814
    # flag changed
4815
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4816
      self.context.ReaddNode(node)
4817

    
4818
    return result
4819

    
4820

    
4821
class LUNodePowercycle(NoHooksLU):
4822
  """Powercycles a node.
4823

4824
  """
4825
  REQ_BGL = False
4826

    
4827
  def CheckArguments(self):
4828
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4829
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4830
      raise errors.OpPrereqError("The node is the master and the force"
4831
                                 " parameter was not set",
4832
                                 errors.ECODE_INVAL)
4833

    
4834
  def ExpandNames(self):
4835
    """Locking for PowercycleNode.
4836

4837
    This is a last-resort option and shouldn't block on other
4838
    jobs. Therefore, we grab no locks.
4839

4840
    """
4841
    self.needed_locks = {}
4842

    
4843
  def Exec(self, feedback_fn):
4844
    """Reboots a node.
4845

4846
    """
4847
    result = self.rpc.call_node_powercycle(self.op.node_name,
4848
                                           self.cfg.GetHypervisorType())
4849
    result.Raise("Failed to schedule the reboot")
4850
    return result.payload
4851

    
4852

    
4853
class LUClusterQuery(NoHooksLU):
4854
  """Query cluster configuration.
4855

4856
  """
4857
  REQ_BGL = False
4858

    
4859
  def ExpandNames(self):
4860
    self.needed_locks = {}
4861

    
4862
  def Exec(self, feedback_fn):
4863
    """Return cluster config.
4864

4865
    """
4866
    cluster = self.cfg.GetClusterInfo()
4867
    os_hvp = {}
4868

    
4869
    # Filter just for enabled hypervisors
4870
    for os_name, hv_dict in cluster.os_hvp.items():
4871
      os_hvp[os_name] = {}
4872
      for hv_name, hv_params in hv_dict.items():
4873
        if hv_name in cluster.enabled_hypervisors:
4874
          os_hvp[os_name][hv_name] = hv_params
4875

    
4876
    # Convert ip_family to ip_version
4877
    primary_ip_version = constants.IP4_VERSION
4878
    if cluster.primary_ip_family == netutils.IP6Address.family:
4879
      primary_ip_version = constants.IP6_VERSION
4880

    
4881
    result = {
4882
      "software_version": constants.RELEASE_VERSION,
4883
      "protocol_version": constants.PROTOCOL_VERSION,
4884
      "config_version": constants.CONFIG_VERSION,
4885
      "os_api_version": max(constants.OS_API_VERSIONS),
4886
      "export_version": constants.EXPORT_VERSION,
4887
      "architecture": (platform.architecture()[0], platform.machine()),
4888
      "name": cluster.cluster_name,
4889
      "master": cluster.master_node,
4890
      "default_hypervisor": cluster.enabled_hypervisors[0],
4891
      "enabled_hypervisors": cluster.enabled_hypervisors,
4892
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4893
                        for hypervisor_name in cluster.enabled_hypervisors]),
4894
      "os_hvp": os_hvp,
4895
      "beparams": cluster.beparams,
4896
      "osparams": cluster.osparams,
4897
      "nicparams": cluster.nicparams,
4898
      "ndparams": cluster.ndparams,
4899
      "candidate_pool_size": cluster.candidate_pool_size,
4900
      "master_netdev": cluster.master_netdev,
4901
      "volume_group_name": cluster.volume_group_name,
4902
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4903
      "file_storage_dir": cluster.file_storage_dir,
4904
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
4905
      "maintain_node_health": cluster.maintain_node_health,
4906
      "ctime": cluster.ctime,
4907
      "mtime": cluster.mtime,
4908
      "uuid": cluster.uuid,
4909
      "tags": list(cluster.GetTags()),
4910
      "uid_pool": cluster.uid_pool,
4911
      "default_iallocator": cluster.default_iallocator,
4912
      "reserved_lvs": cluster.reserved_lvs,
4913
      "primary_ip_version": primary_ip_version,
4914
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4915
      "hidden_os": cluster.hidden_os,
4916
      "blacklisted_os": cluster.blacklisted_os,
4917
      }
4918

    
4919
    return result
4920

    
4921

    
4922
class LUClusterConfigQuery(NoHooksLU):
4923
  """Return configuration values.
4924

4925
  """
4926
  REQ_BGL = False
4927
  _FIELDS_DYNAMIC = utils.FieldSet()
4928
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4929
                                  "watcher_pause", "volume_group_name")
4930

    
4931
  def CheckArguments(self):
4932
    _CheckOutputFields(static=self._FIELDS_STATIC,
4933
                       dynamic=self._FIELDS_DYNAMIC,
4934
                       selected=self.op.output_fields)
4935

    
4936
  def ExpandNames(self):
4937
    self.needed_locks = {}
4938

    
4939
  def Exec(self, feedback_fn):
4940
    """Dump a representation of the cluster config to the standard output.
4941

4942
    """
4943
    values = []
4944
    for field in self.op.output_fields:
4945
      if field == "cluster_name":
4946
        entry = self.cfg.GetClusterName()
4947
      elif field == "master_node":
4948
        entry = self.cfg.GetMasterNode()
4949
      elif field == "drain_flag":
4950
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4951
      elif field == "watcher_pause":
4952
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4953
      elif field == "volume_group_name":
4954
        entry = self.cfg.GetVGName()
4955
      else:
4956
        raise errors.ParameterError(field)
4957
      values.append(entry)
4958
    return values
4959

    
4960

    
4961
class LUInstanceActivateDisks(NoHooksLU):
4962
  """Bring up an instance's disks.
4963

4964
  """
4965
  REQ_BGL = False
4966

    
4967
  def ExpandNames(self):
4968
    self._ExpandAndLockInstance()
4969
    self.needed_locks[locking.LEVEL_NODE] = []
4970
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4971

    
4972
  def DeclareLocks(self, level):
4973
    if level == locking.LEVEL_NODE:
4974
      self._LockInstancesNodes()
4975

    
4976
  def CheckPrereq(self):
4977
    """Check prerequisites.
4978

4979
    This checks that the instance is in the cluster.
4980

4981
    """
4982
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4983
    assert self.instance is not None, \
4984
      "Cannot retrieve locked instance %s" % self.op.instance_name
4985
    _CheckNodeOnline(self, self.instance.primary_node)
4986

    
4987
  def Exec(self, feedback_fn):
4988
    """Activate the disks.
4989

4990
    """
4991
    disks_ok, disks_info = \
4992
              _AssembleInstanceDisks(self, self.instance,
4993
                                     ignore_size=self.op.ignore_size)
4994
    if not disks_ok:
4995
      raise errors.OpExecError("Cannot activate block devices")
4996

    
4997
    return disks_info
4998

    
4999

    
5000
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5001
                           ignore_size=False):
5002
  """Prepare the block devices for an instance.
5003

5004
  This sets up the block devices on all nodes.
5005

5006
  @type lu: L{LogicalUnit}
5007
  @param lu: the logical unit on whose behalf we execute
5008
  @type instance: L{objects.Instance}
5009
  @param instance: the instance for whose disks we assemble
5010
  @type disks: list of L{objects.Disk} or None
5011
  @param disks: which disks to assemble (or all, if None)
5012
  @type ignore_secondaries: boolean
5013
  @param ignore_secondaries: if true, errors on secondary nodes
5014
      won't result in an error return from the function
5015
  @type ignore_size: boolean
5016
  @param ignore_size: if true, the current known size of the disk
5017
      will not be used during the disk activation, useful for cases
5018
      when the size is wrong
5019
  @return: False if the operation failed, otherwise a list of
5020
      (host, instance_visible_name, node_visible_name)
5021
      with the mapping from node devices to instance devices
5022

5023
  """
5024
  device_info = []
5025
  disks_ok = True
5026
  iname = instance.name
5027
  disks = _ExpandCheckDisks(instance, disks)
5028

    
5029
  # With the two passes mechanism we try to reduce the window of
5030
  # opportunity for the race condition of switching DRBD to primary
5031
  # before handshaking occured, but we do not eliminate it
5032

    
5033
  # The proper fix would be to wait (with some limits) until the
5034
  # connection has been made and drbd transitions from WFConnection
5035
  # into any other network-connected state (Connected, SyncTarget,
5036
  # SyncSource, etc.)
5037

    
5038
  # 1st pass, assemble on all nodes in secondary mode
5039
  for idx, inst_disk in enumerate(disks):
5040
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5041
      if ignore_size:
5042
        node_disk = node_disk.Copy()
5043
        node_disk.UnsetSize()
5044
      lu.cfg.SetDiskID(node_disk, node)
5045
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5046
      msg = result.fail_msg
5047
      if msg:
5048
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5049
                           " (is_primary=False, pass=1): %s",
5050
                           inst_disk.iv_name, node, msg)
5051
        if not ignore_secondaries:
5052
          disks_ok = False
5053

    
5054
  # FIXME: race condition on drbd migration to primary
5055

    
5056
  # 2nd pass, do only the primary node
5057
  for idx, inst_disk in enumerate(disks):
5058
    dev_path = None
5059

    
5060
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5061
      if node != instance.primary_node:
5062
        continue
5063
      if ignore_size:
5064
        node_disk = node_disk.Copy()
5065
        node_disk.UnsetSize()
5066
      lu.cfg.SetDiskID(node_disk, node)
5067
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5068
      msg = result.fail_msg
5069
      if msg:
5070
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5071
                           " (is_primary=True, pass=2): %s",
5072
                           inst_disk.iv_name, node, msg)
5073
        disks_ok = False
5074
      else:
5075
        dev_path = result.payload
5076

    
5077
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5078

    
5079
  # leave the disks configured for the primary node
5080
  # this is a workaround that would be fixed better by
5081
  # improving the logical/physical id handling
5082
  for disk in disks:
5083
    lu.cfg.SetDiskID(disk, instance.primary_node)
5084

    
5085
  return disks_ok, device_info
5086

    
5087

    
5088
def _StartInstanceDisks(lu, instance, force):
5089
  """Start the disks of an instance.
5090

5091
  """
5092
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5093
                                           ignore_secondaries=force)
5094
  if not disks_ok:
5095
    _ShutdownInstanceDisks(lu, instance)
5096
    if force is not None and not force:
5097
      lu.proc.LogWarning("", hint="If the message above refers to a"
5098
                         " secondary node,"
5099
                         " you can retry the operation using '--force'.")
5100
    raise errors.OpExecError("Disk consistency error")
5101

    
5102

    
5103
class LUInstanceDeactivateDisks(NoHooksLU):
5104
  """Shutdown an instance's disks.
5105

5106
  """
5107
  REQ_BGL = False
5108

    
5109
  def ExpandNames(self):
5110
    self._ExpandAndLockInstance()
5111
    self.needed_locks[locking.LEVEL_NODE] = []
5112
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5113

    
5114
  def DeclareLocks(self, level):
5115
    if level == locking.LEVEL_NODE:
5116
      self._LockInstancesNodes()
5117

    
5118
  def CheckPrereq(self):
5119
    """Check prerequisites.
5120

5121
    This checks that the instance is in the cluster.
5122

5123
    """
5124
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5125
    assert self.instance is not None, \
5126
      "Cannot retrieve locked instance %s" % self.op.instance_name
5127

    
5128
  def Exec(self, feedback_fn):
5129
    """Deactivate the disks
5130

5131
    """
5132
    instance = self.instance
5133
    if self.op.force:
5134
      _ShutdownInstanceDisks(self, instance)
5135
    else:
5136
      _SafeShutdownInstanceDisks(self, instance)
5137

    
5138

    
5139
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5140
  """Shutdown block devices of an instance.
5141

5142
  This function checks if an instance is running, before calling
5143
  _ShutdownInstanceDisks.
5144

5145
  """
5146
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5147
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5148

    
5149

    
5150
def _ExpandCheckDisks(instance, disks):
5151
  """Return the instance disks selected by the disks list
5152

5153
  @type disks: list of L{objects.Disk} or None
5154
  @param disks: selected disks
5155
  @rtype: list of L{objects.Disk}
5156
  @return: selected instance disks to act on
5157

5158
  """
5159
  if disks is None:
5160
    return instance.disks
5161
  else:
5162
    if not set(disks).issubset(instance.disks):
5163
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5164
                                   " target instance")
5165
    return disks
5166

    
5167

    
5168
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5169
  """Shutdown block devices of an instance.
5170

5171
  This does the shutdown on all nodes of the instance.
5172

5173
  If the ignore_primary is false, errors on the primary node are
5174
  ignored.
5175

5176
  """
5177
  all_result = True
5178
  disks = _ExpandCheckDisks(instance, disks)
5179

    
5180
  for disk in disks:
5181
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5182
      lu.cfg.SetDiskID(top_disk, node)
5183
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5184
      msg = result.fail_msg
5185
      if msg:
5186
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5187
                      disk.iv_name, node, msg)
5188
        if ((node == instance.primary_node and not ignore_primary) or
5189
            (node != instance.primary_node and not result.offline)):
5190
          all_result = False
5191
  return all_result
5192

    
5193

    
5194
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5195
  """Checks if a node has enough free memory.
5196

5197
  This function check if a given node has the needed amount of free
5198
  memory. In case the node has less memory or we cannot get the
5199
  information from the node, this function raise an OpPrereqError
5200
  exception.
5201

5202
  @type lu: C{LogicalUnit}
5203
  @param lu: a logical unit from which we get configuration data
5204
  @type node: C{str}
5205
  @param node: the node to check
5206
  @type reason: C{str}
5207
  @param reason: string to use in the error message
5208
  @type requested: C{int}
5209
  @param requested: the amount of memory in MiB to check for
5210
  @type hypervisor_name: C{str}
5211
  @param hypervisor_name: the hypervisor to ask for memory stats
5212
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5213
      we cannot check the node
5214

5215
  """
5216
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5217
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5218
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5219
  free_mem = nodeinfo[node].payload.get('memory_free', None)
5220
  if not isinstance(free_mem, int):
5221
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5222
                               " was '%s'" % (node, free_mem),
5223
                               errors.ECODE_ENVIRON)
5224
  if requested > free_mem:
5225
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5226
                               " needed %s MiB, available %s MiB" %
5227
                               (node, reason, requested, free_mem),
5228
                               errors.ECODE_NORES)
5229

    
5230

    
5231
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5232
  """Checks if nodes have enough free disk space in the all VGs.
5233

5234
  This function check if all given nodes have the needed amount of
5235
  free disk. In case any node has less disk or we cannot get the
5236
  information from the node, this function raise an OpPrereqError
5237
  exception.
5238

5239
  @type lu: C{LogicalUnit}
5240
  @param lu: a logical unit from which we get configuration data
5241
  @type nodenames: C{list}
5242
  @param nodenames: the list of node names to check
5243
  @type req_sizes: C{dict}
5244
  @param req_sizes: the hash of vg and corresponding amount of disk in
5245
      MiB to check for
5246
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5247
      or we cannot check the node
5248

5249
  """
5250
  for vg, req_size in req_sizes.items():
5251
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5252

    
5253

    
5254
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5255
  """Checks if nodes have enough free disk space in the specified VG.
5256

5257
  This function check if all given nodes have the needed amount of
5258
  free disk. In case any node has less disk or we cannot get the
5259
  information from the node, this function raise an OpPrereqError
5260
  exception.
5261

5262
  @type lu: C{LogicalUnit}
5263
  @param lu: a logical unit from which we get configuration data
5264
  @type nodenames: C{list}
5265
  @param nodenames: the list of node names to check
5266
  @type vg: C{str}
5267
  @param vg: the volume group to check
5268
  @type requested: C{int}
5269
  @param requested: the amount of disk in MiB to check for
5270
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5271
      or we cannot check the node
5272

5273
  """
5274
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5275
  for node in nodenames:
5276
    info = nodeinfo[node]
5277
    info.Raise("Cannot get current information from node %s" % node,
5278
               prereq=True, ecode=errors.ECODE_ENVIRON)
5279
    vg_free = info.payload.get("vg_free", None)
5280
    if not isinstance(vg_free, int):
5281
      raise errors.OpPrereqError("Can't compute free disk space on node"
5282
                                 " %s for vg %s, result was '%s'" %
5283
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5284
    if requested > vg_free:
5285
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5286
                                 " vg %s: required %d MiB, available %d MiB" %
5287
                                 (node, vg, requested, vg_free),
5288
                                 errors.ECODE_NORES)
5289

    
5290

    
5291
class LUInstanceStartup(LogicalUnit):
5292
  """Starts an instance.
5293

5294
  """
5295
  HPATH = "instance-start"
5296
  HTYPE = constants.HTYPE_INSTANCE
5297
  REQ_BGL = False
5298

    
5299
  def CheckArguments(self):
5300
    # extra beparams
5301
    if self.op.beparams:
5302
      # fill the beparams dict
5303
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5304

    
5305
  def ExpandNames(self):
5306
    self._ExpandAndLockInstance()
5307

    
5308
  def BuildHooksEnv(self):
5309
    """Build hooks env.
5310

5311
    This runs on master, primary and secondary nodes of the instance.
5312

5313
    """
5314
    env = {
5315
      "FORCE": self.op.force,
5316
      }
5317

    
5318
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5319

    
5320
    return env
5321

    
5322
  def BuildHooksNodes(self):
5323
    """Build hooks nodes.
5324

5325
    """
5326
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5327
    return (nl, nl)
5328

    
5329
  def CheckPrereq(self):
5330
    """Check prerequisites.
5331

5332
    This checks that the instance is in the cluster.
5333

5334
    """
5335
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5336
    assert self.instance is not None, \
5337
      "Cannot retrieve locked instance %s" % self.op.instance_name
5338

    
5339
    # extra hvparams
5340
    if self.op.hvparams:
5341
      # check hypervisor parameter syntax (locally)
5342
      cluster = self.cfg.GetClusterInfo()
5343
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5344
      filled_hvp = cluster.FillHV(instance)
5345
      filled_hvp.update(self.op.hvparams)
5346
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5347
      hv_type.CheckParameterSyntax(filled_hvp)
5348
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5349

    
5350
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5351

    
5352
    if self.primary_offline and self.op.ignore_offline_nodes:
5353
      self.proc.LogWarning("Ignoring offline primary node")
5354

    
5355
      if self.op.hvparams or self.op.beparams:
5356
        self.proc.LogWarning("Overridden parameters are ignored")
5357
    else:
5358
      _CheckNodeOnline(self, instance.primary_node)
5359

    
5360
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5361

    
5362
      # check bridges existence
5363
      _CheckInstanceBridgesExist(self, instance)
5364

    
5365
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5366
                                                instance.name,
5367
                                                instance.hypervisor)
5368
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5369
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5370
      if not remote_info.payload: # not running already
5371
        _CheckNodeFreeMemory(self, instance.primary_node,
5372
                             "starting instance %s" % instance.name,
5373
                             bep[constants.BE_MEMORY], instance.hypervisor)
5374

    
5375
  def Exec(self, feedback_fn):
5376
    """Start the instance.
5377

5378
    """
5379
    instance = self.instance
5380
    force = self.op.force
5381

    
5382
    self.cfg.MarkInstanceUp(instance.name)
5383

    
5384
    if self.primary_offline:
5385
      assert self.op.ignore_offline_nodes
5386
      self.proc.LogInfo("Primary node offline, marked instance as started")
5387
    else:
5388
      node_current = instance.primary_node
5389

    
5390
      _StartInstanceDisks(self, instance, force)
5391

    
5392
      result = self.rpc.call_instance_start(node_current, instance,
5393
                                            self.op.hvparams, self.op.beparams)
5394
      msg = result.fail_msg
5395
      if msg:
5396
        _ShutdownInstanceDisks(self, instance)
5397
        raise errors.OpExecError("Could not start instance: %s" % msg)
5398

    
5399

    
5400
class LUInstanceReboot(LogicalUnit):
5401
  """Reboot an instance.
5402

5403
  """
5404
  HPATH = "instance-reboot"
5405
  HTYPE = constants.HTYPE_INSTANCE
5406
  REQ_BGL = False
5407

    
5408
  def ExpandNames(self):
5409
    self._ExpandAndLockInstance()
5410

    
5411
  def BuildHooksEnv(self):
5412
    """Build hooks env.
5413

5414
    This runs on master, primary and secondary nodes of the instance.
5415

5416
    """
5417
    env = {
5418
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5419
      "REBOOT_TYPE": self.op.reboot_type,
5420
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5421
      }
5422

    
5423
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5424

    
5425
    return env
5426

    
5427
  def BuildHooksNodes(self):
5428
    """Build hooks nodes.
5429

5430
    """
5431
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5432
    return (nl, nl)
5433

    
5434
  def CheckPrereq(self):
5435
    """Check prerequisites.
5436

5437
    This checks that the instance is in the cluster.
5438

5439
    """
5440
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5441
    assert self.instance is not None, \
5442
      "Cannot retrieve locked instance %s" % self.op.instance_name
5443

    
5444
    _CheckNodeOnline(self, instance.primary_node)
5445

    
5446
    # check bridges existence
5447
    _CheckInstanceBridgesExist(self, instance)
5448

    
5449
  def Exec(self, feedback_fn):
5450
    """Reboot the instance.
5451

5452
    """
5453
    instance = self.instance
5454
    ignore_secondaries = self.op.ignore_secondaries
5455
    reboot_type = self.op.reboot_type
5456

    
5457
    remote_info = self.rpc.call_instance_info(instance.primary_node,
5458
                                              instance.name,
5459
                                              instance.hypervisor)
5460
    remote_info.Raise("Error checking node %s" % instance.primary_node)
5461
    instance_running = bool(remote_info.payload)
5462

    
5463
    node_current = instance.primary_node
5464

    
5465
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5466
                                            constants.INSTANCE_REBOOT_HARD]:
5467
      for disk in instance.disks:
5468
        self.cfg.SetDiskID(disk, node_current)
5469
      result = self.rpc.call_instance_reboot(node_current, instance,
5470
                                             reboot_type,
5471
                                             self.op.shutdown_timeout)
5472
      result.Raise("Could not reboot instance")
5473
    else:
5474
      if instance_running:
5475
        result = self.rpc.call_instance_shutdown(node_current, instance,
5476
                                                 self.op.shutdown_timeout)
5477
        result.Raise("Could not shutdown instance for full reboot")
5478
        _ShutdownInstanceDisks(self, instance)
5479
      else:
5480
        self.LogInfo("Instance %s was already stopped, starting now",
5481
                     instance.name)
5482
      _StartInstanceDisks(self, instance, ignore_secondaries)
5483
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5484
      msg = result.fail_msg
5485
      if msg:
5486
        _ShutdownInstanceDisks(self, instance)
5487
        raise errors.OpExecError("Could not start instance for"
5488
                                 " full reboot: %s" % msg)
5489

    
5490
    self.cfg.MarkInstanceUp(instance.name)
5491

    
5492

    
5493
class LUInstanceShutdown(LogicalUnit):
5494
  """Shutdown an instance.
5495

5496
  """
5497
  HPATH = "instance-stop"
5498
  HTYPE = constants.HTYPE_INSTANCE
5499
  REQ_BGL = False
5500

    
5501
  def ExpandNames(self):
5502
    self._ExpandAndLockInstance()
5503

    
5504
  def BuildHooksEnv(self):
5505
    """Build hooks env.
5506

5507
    This runs on master, primary and secondary nodes of the instance.
5508

5509
    """
5510
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5511
    env["TIMEOUT"] = self.op.timeout
5512
    return env
5513

    
5514
  def BuildHooksNodes(self):
5515
    """Build hooks nodes.
5516

5517
    """
5518
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5519
    return (nl, nl)
5520

    
5521
  def CheckPrereq(self):
5522
    """Check prerequisites.
5523

5524
    This checks that the instance is in the cluster.
5525

5526
    """
5527
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5528
    assert self.instance is not None, \
5529
      "Cannot retrieve locked instance %s" % self.op.instance_name
5530

    
5531
    self.primary_offline = \
5532
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5533

    
5534
    if self.primary_offline and self.op.ignore_offline_nodes:
5535
      self.proc.LogWarning("Ignoring offline primary node")
5536
    else:
5537
      _CheckNodeOnline(self, self.instance.primary_node)
5538

    
5539
  def Exec(self, feedback_fn):
5540
    """Shutdown the instance.
5541

5542
    """
5543
    instance = self.instance
5544
    node_current = instance.primary_node
5545
    timeout = self.op.timeout
5546

    
5547
    self.cfg.MarkInstanceDown(instance.name)
5548

    
5549
    if self.primary_offline:
5550
      assert self.op.ignore_offline_nodes
5551
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5552
    else:
5553
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5554
      msg = result.fail_msg
5555
      if msg:
5556
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5557

    
5558
      _ShutdownInstanceDisks(self, instance)
5559

    
5560

    
5561
class LUInstanceReinstall(LogicalUnit):
5562
  """Reinstall an instance.
5563

5564
  """
5565
  HPATH = "instance-reinstall"
5566
  HTYPE = constants.HTYPE_INSTANCE
5567
  REQ_BGL = False
5568

    
5569
  def ExpandNames(self):
5570
    self._ExpandAndLockInstance()
5571

    
5572
  def BuildHooksEnv(self):
5573
    """Build hooks env.
5574

5575
    This runs on master, primary and secondary nodes of the instance.
5576

5577
    """
5578
    return _BuildInstanceHookEnvByObject(self, self.instance)
5579

    
5580
  def BuildHooksNodes(self):
5581
    """Build hooks nodes.
5582

5583
    """
5584
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5585
    return (nl, nl)
5586

    
5587
  def CheckPrereq(self):
5588
    """Check prerequisites.
5589

5590
    This checks that the instance is in the cluster and is not running.
5591

5592
    """
5593
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5594
    assert instance is not None, \
5595
      "Cannot retrieve locked instance %s" % self.op.instance_name
5596
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5597
                     " offline, cannot reinstall")
5598
    for node in instance.secondary_nodes:
5599
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5600
                       " cannot reinstall")
5601

    
5602
    if instance.disk_template == constants.DT_DISKLESS:
5603
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5604
                                 self.op.instance_name,
5605
                                 errors.ECODE_INVAL)
5606
    _CheckInstanceDown(self, instance, "cannot reinstall")
5607

    
5608
    if self.op.os_type is not None:
5609
      # OS verification
5610
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5611
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5612
      instance_os = self.op.os_type
5613
    else:
5614
      instance_os = instance.os
5615

    
5616
    nodelist = list(instance.all_nodes)
5617

    
5618
    if self.op.osparams:
5619
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5620
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5621
      self.os_inst = i_osdict # the new dict (without defaults)
5622
    else:
5623
      self.os_inst = None
5624

    
5625
    self.instance = instance
5626

    
5627
  def Exec(self, feedback_fn):
5628
    """Reinstall the instance.
5629

5630
    """
5631
    inst = self.instance
5632

    
5633
    if self.op.os_type is not None:
5634
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5635
      inst.os = self.op.os_type
5636
      # Write to configuration
5637
      self.cfg.Update(inst, feedback_fn)
5638

    
5639
    _StartInstanceDisks(self, inst, None)
5640
    try:
5641
      feedback_fn("Running the instance OS create scripts...")
5642
      # FIXME: pass debug option from opcode to backend
5643
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5644
                                             self.op.debug_level,
5645
                                             osparams=self.os_inst)
5646
      result.Raise("Could not install OS for instance %s on node %s" %
5647
                   (inst.name, inst.primary_node))
5648
    finally:
5649
      _ShutdownInstanceDisks(self, inst)
5650

    
5651

    
5652
class LUInstanceRecreateDisks(LogicalUnit):
5653
  """Recreate an instance's missing disks.
5654

5655
  """
5656
  HPATH = "instance-recreate-disks"
5657
  HTYPE = constants.HTYPE_INSTANCE
5658
  REQ_BGL = False
5659

    
5660
  def ExpandNames(self):
5661
    self._ExpandAndLockInstance()
5662

    
5663
  def BuildHooksEnv(self):
5664
    """Build hooks env.
5665

5666
    This runs on master, primary and secondary nodes of the instance.
5667

5668
    """
5669
    return _BuildInstanceHookEnvByObject(self, self.instance)
5670

    
5671
  def BuildHooksNodes(self):
5672
    """Build hooks nodes.
5673

5674
    """
5675
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5676
    return (nl, nl)
5677

    
5678
  def CheckPrereq(self):
5679
    """Check prerequisites.
5680

5681
    This checks that the instance is in the cluster and is not running.
5682

5683
    """
5684
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5685
    assert instance is not None, \
5686
      "Cannot retrieve locked instance %s" % self.op.instance_name
5687
    _CheckNodeOnline(self, instance.primary_node)
5688

    
5689
    if instance.disk_template == constants.DT_DISKLESS:
5690
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5691
                                 self.op.instance_name, errors.ECODE_INVAL)
5692
    _CheckInstanceDown(self, instance, "cannot recreate disks")
5693

    
5694
    if not self.op.disks:
5695
      self.op.disks = range(len(instance.disks))
5696
    else:
5697
      for idx in self.op.disks:
5698
        if idx >= len(instance.disks):
5699
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5700
                                     errors.ECODE_INVAL)
5701

    
5702
    self.instance = instance
5703

    
5704
  def Exec(self, feedback_fn):
5705
    """Recreate the disks.
5706

5707
    """
5708
    to_skip = []
5709
    for idx, _ in enumerate(self.instance.disks):
5710
      if idx not in self.op.disks: # disk idx has not been passed in
5711
        to_skip.append(idx)
5712
        continue
5713

    
5714
    _CreateDisks(self, self.instance, to_skip=to_skip)
5715

    
5716

    
5717
class LUInstanceRename(LogicalUnit):
5718
  """Rename an instance.
5719

5720
  """
5721
  HPATH = "instance-rename"
5722
  HTYPE = constants.HTYPE_INSTANCE
5723

    
5724
  def CheckArguments(self):
5725
    """Check arguments.
5726

5727
    """
5728
    if self.op.ip_check and not self.op.name_check:
5729
      # TODO: make the ip check more flexible and not depend on the name check
5730
      raise errors.OpPrereqError("Cannot do ip check without a name check",
5731
                                 errors.ECODE_INVAL)
5732

    
5733
  def BuildHooksEnv(self):
5734
    """Build hooks env.
5735

5736
    This runs on master, primary and secondary nodes of the instance.
5737

5738
    """
5739
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5740
    env["INSTANCE_NEW_NAME"] = self.op.new_name
5741
    return env
5742

    
5743
  def BuildHooksNodes(self):
5744
    """Build hooks nodes.
5745

5746
    """
5747
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5748
    return (nl, nl)
5749

    
5750
  def CheckPrereq(self):
5751
    """Check prerequisites.
5752

5753
    This checks that the instance is in the cluster and is not running.
5754

5755
    """
5756
    self.op.instance_name = _ExpandInstanceName(self.cfg,
5757
                                                self.op.instance_name)
5758
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5759
    assert instance is not None
5760
    _CheckNodeOnline(self, instance.primary_node)
5761
    _CheckInstanceDown(self, instance, "cannot rename")
5762
    self.instance = instance
5763

    
5764
    new_name = self.op.new_name
5765
    if self.op.name_check:
5766
      hostname = netutils.GetHostname(name=new_name)
5767
      self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5768
                   hostname.name)
5769
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5770
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5771
                                    " same as given hostname '%s'") %
5772
                                    (hostname.name, self.op.new_name),
5773
                                    errors.ECODE_INVAL)
5774
      new_name = self.op.new_name = hostname.name
5775
      if (self.op.ip_check and
5776
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5777
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5778
                                   (hostname.ip, new_name),
5779
                                   errors.ECODE_NOTUNIQUE)
5780

    
5781
    instance_list = self.cfg.GetInstanceList()
5782
    if new_name in instance_list and new_name != instance.name:
5783
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5784
                                 new_name, errors.ECODE_EXISTS)
5785

    
5786
  def Exec(self, feedback_fn):
5787
    """Rename the instance.
5788

5789
    """
5790
    inst = self.instance
5791
    old_name = inst.name
5792

    
5793
    rename_file_storage = False
5794
    if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5795
        self.op.new_name != inst.name):
5796
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5797
      rename_file_storage = True
5798

    
5799
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5800
    # Change the instance lock. This is definitely safe while we hold the BGL.
5801
    # Otherwise the new lock would have to be added in acquired mode.
5802
    assert self.REQ_BGL
5803
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5804
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5805

    
5806
    # re-read the instance from the configuration after rename
5807
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5808

    
5809
    if rename_file_storage:
5810
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5811
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5812
                                                     old_file_storage_dir,
5813
                                                     new_file_storage_dir)
5814
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5815
                   " (but the instance has been renamed in Ganeti)" %
5816
                   (inst.primary_node, old_file_storage_dir,
5817
                    new_file_storage_dir))
5818

    
5819
    _StartInstanceDisks(self, inst, None)
5820
    try:
5821
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5822
                                                 old_name, self.op.debug_level)
5823
      msg = result.fail_msg
5824
      if msg:
5825
        msg = ("Could not run OS rename script for instance %s on node %s"
5826
               " (but the instance has been renamed in Ganeti): %s" %
5827
               (inst.name, inst.primary_node, msg))
5828
        self.proc.LogWarning(msg)
5829
    finally:
5830
      _ShutdownInstanceDisks(self, inst)
5831

    
5832
    return inst.name
5833

    
5834

    
5835
class LUInstanceRemove(LogicalUnit):
5836
  """Remove an instance.
5837

5838
  """
5839
  HPATH = "instance-remove"
5840
  HTYPE = constants.HTYPE_INSTANCE
5841
  REQ_BGL = False
5842

    
5843
  def ExpandNames(self):
5844
    self._ExpandAndLockInstance()
5845
    self.needed_locks[locking.LEVEL_NODE] = []
5846
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5847

    
5848
  def DeclareLocks(self, level):
5849
    if level == locking.LEVEL_NODE:
5850
      self._LockInstancesNodes()
5851

    
5852
  def BuildHooksEnv(self):
5853
    """Build hooks env.
5854

5855
    This runs on master, primary and secondary nodes of the instance.
5856

5857
    """
5858
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5859
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5860
    return env
5861

    
5862
  def BuildHooksNodes(self):
5863
    """Build hooks nodes.
5864

5865
    """
5866
    nl = [self.cfg.GetMasterNode()]
5867
    nl_post = list(self.instance.all_nodes) + nl
5868
    return (nl, nl_post)
5869

    
5870
  def CheckPrereq(self):
5871
    """Check prerequisites.
5872

5873
    This checks that the instance is in the cluster.
5874

5875
    """
5876
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5877
    assert self.instance is not None, \
5878
      "Cannot retrieve locked instance %s" % self.op.instance_name
5879

    
5880
  def Exec(self, feedback_fn):
5881
    """Remove the instance.
5882

5883
    """
5884
    instance = self.instance
5885
    logging.info("Shutting down instance %s on node %s",
5886
                 instance.name, instance.primary_node)
5887

    
5888
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5889
                                             self.op.shutdown_timeout)
5890
    msg = result.fail_msg
5891
    if msg:
5892
      if self.op.ignore_failures:
5893
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5894
      else:
5895
        raise errors.OpExecError("Could not shutdown instance %s on"
5896
                                 " node %s: %s" %
5897
                                 (instance.name, instance.primary_node, msg))
5898

    
5899
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5900

    
5901

    
5902
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5903
  """Utility function to remove an instance.
5904

5905
  """
5906
  logging.info("Removing block devices for instance %s", instance.name)
5907

    
5908
  if not _RemoveDisks(lu, instance):
5909
    if not ignore_failures:
5910
      raise errors.OpExecError("Can't remove instance's disks")
5911
    feedback_fn("Warning: can't remove instance's disks")
5912

    
5913
  logging.info("Removing instance %s out of cluster config", instance.name)
5914

    
5915
  lu.cfg.RemoveInstance(instance.name)
5916

    
5917
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5918
    "Instance lock removal conflict"
5919

    
5920
  # Remove lock for the instance
5921
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5922

    
5923

    
5924
class LUInstanceQuery(NoHooksLU):
5925
  """Logical unit for querying instances.
5926

5927
  """
5928
  # pylint: disable-msg=W0142
5929
  REQ_BGL = False
5930

    
5931
  def CheckArguments(self):
5932
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5933
                             self.op.output_fields, self.op.use_locking)
5934

    
5935
  def ExpandNames(self):
5936
    self.iq.ExpandNames(self)
5937

    
5938
  def DeclareLocks(self, level):
5939
    self.iq.DeclareLocks(self, level)
5940

    
5941
  def Exec(self, feedback_fn):
5942
    return self.iq.OldStyleQuery(self)
5943

    
5944

    
5945
class LUInstanceFailover(LogicalUnit):
5946
  """Failover an instance.
5947

5948
  """
5949
  HPATH = "instance-failover"
5950
  HTYPE = constants.HTYPE_INSTANCE
5951
  REQ_BGL = False
5952

    
5953
  def CheckArguments(self):
5954
    """Check the arguments.
5955

5956
    """
5957
    self.iallocator = getattr(self.op, "iallocator", None)
5958
    self.target_node = getattr(self.op, "target_node", None)
5959

    
5960
  def ExpandNames(self):
5961
    self._ExpandAndLockInstance()
5962

    
5963
    if self.op.target_node is not None:
5964
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5965

    
5966
    self.needed_locks[locking.LEVEL_NODE] = []
5967
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5968

    
5969
    ignore_consistency = self.op.ignore_consistency
5970
    shutdown_timeout = self.op.shutdown_timeout
5971
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5972
                                       cleanup=False,
5973
                                       iallocator=self.op.iallocator,
5974
                                       target_node=self.op.target_node,
5975
                                       failover=True,
5976
                                       ignore_consistency=ignore_consistency,
5977
                                       shutdown_timeout=shutdown_timeout)
5978
    self.tasklets = [self._migrater]
5979

    
5980
  def DeclareLocks(self, level):
5981
    if level == locking.LEVEL_NODE:
5982
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
5983
      if instance.disk_template in constants.DTS_EXT_MIRROR:
5984
        if self.op.target_node is None:
5985
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5986
        else:
5987
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
5988
                                                   self.op.target_node]
5989
        del self.recalculate_locks[locking.LEVEL_NODE]
5990
      else:
5991
        self._LockInstancesNodes()
5992

    
5993
  def BuildHooksEnv(self):
5994
    """Build hooks env.
5995

5996
    This runs on master, primary and secondary nodes of the instance.
5997

5998
    """
5999
    instance = self._migrater.instance
6000
    source_node = instance.primary_node
6001
    target_node = self._migrater.target_node
6002
    env = {
6003
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6004
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6005
      "OLD_PRIMARY": source_node,
6006
      "NEW_PRIMARY": target_node,
6007
      }
6008

    
6009
    if instance.disk_template in constants.DTS_INT_MIRROR:
6010
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6011
      env["NEW_SECONDARY"] = source_node
6012
    else:
6013
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6014

    
6015
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6016

    
6017
    return env
6018

    
6019
  def BuildHooksNodes(self):
6020
    """Build hooks nodes.
6021

6022
    """
6023
    instance = self._migrater.instance
6024
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6025
    return (nl, nl + [instance.primary_node])
6026

    
6027

    
6028
class LUInstanceMigrate(LogicalUnit):
6029
  """Migrate an instance.
6030

6031
  This is migration without shutting down, compared to the failover,
6032
  which is done with shutdown.
6033

6034
  """
6035
  HPATH = "instance-migrate"
6036
  HTYPE = constants.HTYPE_INSTANCE
6037
  REQ_BGL = False
6038

    
6039
  def ExpandNames(self):
6040
    self._ExpandAndLockInstance()
6041

    
6042
    if self.op.target_node is not None:
6043
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6044

    
6045
    self.needed_locks[locking.LEVEL_NODE] = []
6046
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6047

    
6048
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6049
                                       cleanup=self.op.cleanup,
6050
                                       iallocator=self.op.iallocator,
6051
                                       target_node=self.op.target_node,
6052
                                       failover=False,
6053
                                       fallback=self.op.allow_failover)
6054
    self.tasklets = [self._migrater]
6055

    
6056
  def DeclareLocks(self, level):
6057
    if level == locking.LEVEL_NODE:
6058
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6059
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6060
        if self.op.target_node is None:
6061
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6062
        else:
6063
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6064
                                                   self.op.target_node]
6065
        del self.recalculate_locks[locking.LEVEL_NODE]
6066
      else:
6067
        self._LockInstancesNodes()
6068

    
6069
  def BuildHooksEnv(self):
6070
    """Build hooks env.
6071

6072
    This runs on master, primary and secondary nodes of the instance.
6073

6074
    """
6075
    instance = self._migrater.instance
6076
    source_node = instance.primary_node
6077
    target_node = self._migrater.target_node
6078
    env = _BuildInstanceHookEnvByObject(self, instance)
6079
    env.update({
6080
      "MIGRATE_LIVE": self._migrater.live,
6081
      "MIGRATE_CLEANUP": self.op.cleanup,
6082
      "OLD_PRIMARY": source_node,
6083
      "NEW_PRIMARY": target_node,
6084
      })
6085

    
6086
    if instance.disk_template in constants.DTS_INT_MIRROR:
6087
      env["OLD_SECONDARY"] = target_node
6088
      env["NEW_SECONDARY"] = source_node
6089
    else:
6090
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6091

    
6092
    return env
6093

    
6094
  def BuildHooksNodes(self):
6095
    """Build hooks nodes.
6096

6097
    """
6098
    instance = self._migrater.instance
6099
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6100
    return (nl, nl + [instance.primary_node])
6101

    
6102

    
6103
class LUInstanceMove(LogicalUnit):
6104
  """Move an instance by data-copying.
6105

6106
  """
6107
  HPATH = "instance-move"
6108
  HTYPE = constants.HTYPE_INSTANCE
6109
  REQ_BGL = False
6110

    
6111
  def ExpandNames(self):
6112
    self._ExpandAndLockInstance()
6113
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6114
    self.op.target_node = target_node
6115
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6116
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6117

    
6118
  def DeclareLocks(self, level):
6119
    if level == locking.LEVEL_NODE:
6120
      self._LockInstancesNodes(primary_only=True)
6121

    
6122
  def BuildHooksEnv(self):
6123
    """Build hooks env.
6124

6125
    This runs on master, primary and secondary nodes of the instance.
6126

6127
    """
6128
    env = {
6129
      "TARGET_NODE": self.op.target_node,
6130
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6131
      }
6132
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6133
    return env
6134

    
6135
  def BuildHooksNodes(self):
6136
    """Build hooks nodes.
6137

6138
    """
6139
    nl = [
6140
      self.cfg.GetMasterNode(),
6141
      self.instance.primary_node,
6142
      self.op.target_node,
6143
      ]
6144
    return (nl, nl)
6145

    
6146
  def CheckPrereq(self):
6147
    """Check prerequisites.
6148

6149
    This checks that the instance is in the cluster.
6150

6151
    """
6152
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6153
    assert self.instance is not None, \
6154
      "Cannot retrieve locked instance %s" % self.op.instance_name
6155

    
6156
    node = self.cfg.GetNodeInfo(self.op.target_node)
6157
    assert node is not None, \
6158
      "Cannot retrieve locked node %s" % self.op.target_node
6159

    
6160
    self.target_node = target_node = node.name
6161

    
6162
    if target_node == instance.primary_node:
6163
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6164
                                 (instance.name, target_node),
6165
                                 errors.ECODE_STATE)
6166

    
6167
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6168

    
6169
    for idx, dsk in enumerate(instance.disks):
6170
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6171
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6172
                                   " cannot copy" % idx, errors.ECODE_STATE)
6173

    
6174
    _CheckNodeOnline(self, target_node)
6175
    _CheckNodeNotDrained(self, target_node)
6176
    _CheckNodeVmCapable(self, target_node)
6177

    
6178
    if instance.admin_up:
6179
      # check memory requirements on the secondary node
6180
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6181
                           instance.name, bep[constants.BE_MEMORY],
6182
                           instance.hypervisor)
6183
    else:
6184
      self.LogInfo("Not checking memory on the secondary node as"
6185
                   " instance will not be started")
6186

    
6187
    # check bridge existance
6188
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6189

    
6190
  def Exec(self, feedback_fn):
6191
    """Move an instance.
6192

6193
    The move is done by shutting it down on its present node, copying
6194
    the data over (slow) and starting it on the new node.
6195

6196
    """
6197
    instance = self.instance
6198

    
6199
    source_node = instance.primary_node
6200
    target_node = self.target_node
6201

    
6202
    self.LogInfo("Shutting down instance %s on source node %s",
6203
                 instance.name, source_node)
6204

    
6205
    result = self.rpc.call_instance_shutdown(source_node, instance,
6206
                                             self.op.shutdown_timeout)
6207
    msg = result.fail_msg
6208
    if msg:
6209
      if self.op.ignore_consistency:
6210
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6211
                             " Proceeding anyway. Please make sure node"
6212
                             " %s is down. Error details: %s",
6213
                             instance.name, source_node, source_node, msg)
6214
      else:
6215
        raise errors.OpExecError("Could not shutdown instance %s on"
6216
                                 " node %s: %s" %
6217
                                 (instance.name, source_node, msg))
6218

    
6219
    # create the target disks
6220
    try:
6221
      _CreateDisks(self, instance, target_node=target_node)
6222
    except errors.OpExecError:
6223
      self.LogWarning("Device creation failed, reverting...")
6224
      try:
6225
        _RemoveDisks(self, instance, target_node=target_node)
6226
      finally:
6227
        self.cfg.ReleaseDRBDMinors(instance.name)
6228
        raise
6229

    
6230
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6231

    
6232
    errs = []
6233
    # activate, get path, copy the data over
6234
    for idx, disk in enumerate(instance.disks):
6235
      self.LogInfo("Copying data for disk %d", idx)
6236
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6237
                                               instance.name, True, idx)
6238
      if result.fail_msg:
6239
        self.LogWarning("Can't assemble newly created disk %d: %s",
6240
                        idx, result.fail_msg)
6241
        errs.append(result.fail_msg)
6242
        break
6243
      dev_path = result.payload
6244
      result = self.rpc.call_blockdev_export(source_node, disk,
6245
                                             target_node, dev_path,
6246
                                             cluster_name)
6247
      if result.fail_msg:
6248
        self.LogWarning("Can't copy data over for disk %d: %s",
6249
                        idx, result.fail_msg)
6250
        errs.append(result.fail_msg)
6251
        break
6252

    
6253
    if errs:
6254
      self.LogWarning("Some disks failed to copy, aborting")
6255
      try:
6256
        _RemoveDisks(self, instance, target_node=target_node)
6257
      finally:
6258
        self.cfg.ReleaseDRBDMinors(instance.name)
6259
        raise errors.OpExecError("Errors during disk copy: %s" %
6260
                                 (",".join(errs),))
6261

    
6262
    instance.primary_node = target_node
6263
    self.cfg.Update(instance, feedback_fn)
6264

    
6265
    self.LogInfo("Removing the disks on the original node")
6266
    _RemoveDisks(self, instance, target_node=source_node)
6267

    
6268
    # Only start the instance if it's marked as up
6269
    if instance.admin_up:
6270
      self.LogInfo("Starting instance %s on node %s",
6271
                   instance.name, target_node)
6272

    
6273
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6274
                                           ignore_secondaries=True)
6275
      if not disks_ok:
6276
        _ShutdownInstanceDisks(self, instance)
6277
        raise errors.OpExecError("Can't activate the instance's disks")
6278

    
6279
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6280
      msg = result.fail_msg
6281
      if msg:
6282
        _ShutdownInstanceDisks(self, instance)
6283
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6284
                                 (instance.name, target_node, msg))
6285

    
6286

    
6287
class LUNodeMigrate(LogicalUnit):
6288
  """Migrate all instances from a node.
6289

6290
  """
6291
  HPATH = "node-migrate"
6292
  HTYPE = constants.HTYPE_NODE
6293
  REQ_BGL = False
6294

    
6295
  def CheckArguments(self):
6296
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6297

    
6298
  def ExpandNames(self):
6299
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6300

    
6301
    self.needed_locks = {}
6302

    
6303
    # Create tasklets for migrating instances for all instances on this node
6304
    names = []
6305
    tasklets = []
6306

    
6307
    self.lock_all_nodes = False
6308

    
6309
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6310
      logging.debug("Migrating instance %s", inst.name)
6311
      names.append(inst.name)
6312

    
6313
      tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False,
6314
                                        iallocator=self.op.iallocator,
6315
                                        taget_node=None))
6316

    
6317
      if inst.disk_template in constants.DTS_EXT_MIRROR:
6318
        # We need to lock all nodes, as the iallocator will choose the
6319
        # destination nodes afterwards
6320
        self.lock_all_nodes = True
6321

    
6322
    self.tasklets = tasklets
6323

    
6324
    # Declare node locks
6325
    if self.lock_all_nodes:
6326
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6327
    else:
6328
      self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6329
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6330

    
6331
    # Declare instance locks
6332
    self.needed_locks[locking.LEVEL_INSTANCE] = names
6333

    
6334
  def DeclareLocks(self, level):
6335
    if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6336
      self._LockInstancesNodes()
6337

    
6338
  def BuildHooksEnv(self):
6339
    """Build hooks env.
6340

6341
    This runs on the master, the primary and all the secondaries.
6342

6343
    """
6344
    return {
6345
      "NODE_NAME": self.op.node_name,
6346
      }
6347

    
6348
  def BuildHooksNodes(self):
6349
    """Build hooks nodes.
6350

6351
    """
6352
    nl = [self.cfg.GetMasterNode()]
6353
    return (nl, nl)
6354

    
6355

    
6356
class TLMigrateInstance(Tasklet):
6357
  """Tasklet class for instance migration.
6358

6359
  @type live: boolean
6360
  @ivar live: whether the migration will be done live or non-live;
6361
      this variable is initalized only after CheckPrereq has run
6362
  @type cleanup: boolean
6363
  @ivar cleanup: Wheater we cleanup from a failed migration
6364
  @type iallocator: string
6365
  @ivar iallocator: The iallocator used to determine target_node
6366
  @type target_node: string
6367
  @ivar target_node: If given, the target_node to reallocate the instance to
6368
  @type failover: boolean
6369
  @ivar failover: Whether operation results in failover or migration
6370
  @type fallback: boolean
6371
  @ivar fallback: Whether fallback to failover is allowed if migration not
6372
                  possible
6373
  @type ignore_consistency: boolean
6374
  @ivar ignore_consistency: Wheter we should ignore consistency between source
6375
                            and target node
6376
  @type shutdown_timeout: int
6377
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
6378

6379
  """
6380
  def __init__(self, lu, instance_name, cleanup=False, iallocator=None,
6381
               target_node=None, failover=False, fallback=False,
6382
               ignore_consistency=False,
6383
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6384
    """Initializes this class.
6385

6386
    """
6387
    Tasklet.__init__(self, lu)
6388

    
6389
    # Parameters
6390
    self.instance_name = instance_name
6391
    self.cleanup = cleanup
6392
    self.live = False # will be overridden later
6393
    self.iallocator = iallocator
6394
    self.target_node = target_node
6395
    self.failover = failover
6396
    self.fallback = fallback
6397
    self.ignore_consistency = ignore_consistency
6398
    self.shutdown_timeout = shutdown_timeout
6399

    
6400
  def CheckPrereq(self):
6401
    """Check prerequisites.
6402

6403
    This checks that the instance is in the cluster.
6404

6405
    """
6406
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6407
    instance = self.cfg.GetInstanceInfo(instance_name)
6408
    assert instance is not None
6409
    self.instance = instance
6410

    
6411
    if (not self.cleanup and not instance.admin_up and not self.failover and
6412
        self.fallback):
6413
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6414
                      " to failover")
6415
      self.failover = True
6416

    
6417
    if instance.disk_template not in constants.DTS_MIRRORED:
6418
      if self.failover:
6419
        text = "failovers"
6420
      else:
6421
        text = "migrations"
6422
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6423
                                 " %s" % (instance.disk_template, text),
6424
                                 errors.ECODE_STATE)
6425

    
6426
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6427
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6428

    
6429
      if self.iallocator:
6430
        self._RunAllocator()
6431

    
6432
      # self.target_node is already populated, either directly or by the
6433
      # iallocator run
6434
      target_node = self.target_node
6435

    
6436
      if len(self.lu.tasklets) == 1:
6437
        # It is safe to remove locks only when we're the only tasklet in the LU
6438
        nodes_keep = [instance.primary_node, self.target_node]
6439
        nodes_rel = [node for node in self.lu.acquired_locks[locking.LEVEL_NODE]
6440
                     if node not in nodes_keep]
6441
        self.lu.context.glm.release(locking.LEVEL_NODE, nodes_rel)
6442
        self.lu.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6443

    
6444
    else:
6445
      secondary_nodes = instance.secondary_nodes
6446
      if not secondary_nodes:
6447
        raise errors.ConfigurationError("No secondary node but using"
6448
                                        " %s disk template" %
6449
                                        instance.disk_template)
6450
      target_node = secondary_nodes[0]
6451
      if self.iallocator or (self.target_node and
6452
                             self.target_node != target_node):
6453
        if self.failover:
6454
          text = "failed over"
6455
        else:
6456
          text = "migrated"
6457
        raise errors.OpPrereqError("Instances with disk template %s cannot"
6458
                                   " be %s to arbitrary nodes"
6459
                                   " (neither an iallocator nor a target"
6460
                                   " node can be passed)" %
6461
                                   (instance.disk_template, text),
6462
                                   errors.ECODE_INVAL)
6463

    
6464
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6465

    
6466
    # check memory requirements on the secondary node
6467
    if not self.failover or instance.admin_up:
6468
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6469
                           instance.name, i_be[constants.BE_MEMORY],
6470
                           instance.hypervisor)
6471
    else:
6472
      self.lu.LogInfo("Not checking memory on the secondary node as"
6473
                      " instance will not be started")
6474

    
6475
    # check bridge existance
6476
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6477

    
6478
    if not self.cleanup:
6479
      _CheckNodeNotDrained(self.lu, target_node)
6480
      if not self.failover:
6481
        result = self.rpc.call_instance_migratable(instance.primary_node,
6482
                                                   instance)
6483
        if result.fail_msg and self.fallback:
6484
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6485
                          " failover")
6486
          self.failover = True
6487
        else:
6488
          result.Raise("Can't migrate, please use failover",
6489
                       prereq=True, ecode=errors.ECODE_STATE)
6490

    
6491
    assert not (self.failover and self.cleanup)
6492

    
6493
  def _RunAllocator(self):
6494
    """Run the allocator based on input opcode.
6495

6496
    """
6497
    ial = IAllocator(self.cfg, self.rpc,
6498
                     mode=constants.IALLOCATOR_MODE_RELOC,
6499
                     name=self.instance_name,
6500
                     # TODO See why hail breaks with a single node below
6501
                     relocate_from=[self.instance.primary_node,
6502
                                    self.instance.primary_node],
6503
                     )
6504

    
6505
    ial.Run(self.iallocator)
6506

    
6507
    if not ial.success:
6508
      raise errors.OpPrereqError("Can't compute nodes using"
6509
                                 " iallocator '%s': %s" %
6510
                                 (self.iallocator, ial.info),
6511
                                 errors.ECODE_NORES)
6512
    if len(ial.result) != ial.required_nodes:
6513
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6514
                                 " of nodes (%s), required %s" %
6515
                                 (self.iallocator, len(ial.result),
6516
                                  ial.required_nodes), errors.ECODE_FAULT)
6517
    self.target_node = ial.result[0]
6518
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6519
                 self.instance_name, self.iallocator,
6520
                 utils.CommaJoin(ial.result))
6521

    
6522
    if not self.failover:
6523
      if self.lu.op.live is not None and self.lu.op.mode is not None:
6524
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6525
                                   " parameters are accepted",
6526
                                   errors.ECODE_INVAL)
6527
      if self.lu.op.live is not None:
6528
        if self.lu.op.live:
6529
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
6530
        else:
6531
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6532
        # reset the 'live' parameter to None so that repeated
6533
        # invocations of CheckPrereq do not raise an exception
6534
        self.lu.op.live = None
6535
      elif self.lu.op.mode is None:
6536
        # read the default value from the hypervisor
6537
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6538
                                                skip_globals=False)
6539
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6540

    
6541
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6542
    else:
6543
      # Failover is never live
6544
      self.live = False
6545

    
6546
  def _WaitUntilSync(self):
6547
    """Poll with custom rpc for disk sync.
6548

6549
    This uses our own step-based rpc call.
6550

6551
    """
6552
    self.feedback_fn("* wait until resync is done")
6553
    all_done = False
6554
    while not all_done:
6555
      all_done = True
6556
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6557
                                            self.nodes_ip,
6558
                                            self.instance.disks)
6559
      min_percent = 100
6560
      for node, nres in result.items():
6561
        nres.Raise("Cannot resync disks on node %s" % node)
6562
        node_done, node_percent = nres.payload
6563
        all_done = all_done and node_done
6564
        if node_percent is not None:
6565
          min_percent = min(min_percent, node_percent)
6566
      if not all_done:
6567
        if min_percent < 100:
6568
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6569
        time.sleep(2)
6570

    
6571
  def _EnsureSecondary(self, node):
6572
    """Demote a node to secondary.
6573

6574
    """
6575
    self.feedback_fn("* switching node %s to secondary mode" % node)
6576

    
6577
    for dev in self.instance.disks:
6578
      self.cfg.SetDiskID(dev, node)
6579

    
6580
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6581
                                          self.instance.disks)
6582
    result.Raise("Cannot change disk to secondary on node %s" % node)
6583

    
6584
  def _GoStandalone(self):
6585
    """Disconnect from the network.
6586

6587
    """
6588
    self.feedback_fn("* changing into standalone mode")
6589
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6590
                                               self.instance.disks)
6591
    for node, nres in result.items():
6592
      nres.Raise("Cannot disconnect disks node %s" % node)
6593

    
6594
  def _GoReconnect(self, multimaster):
6595
    """Reconnect to the network.
6596

6597
    """
6598
    if multimaster:
6599
      msg = "dual-master"
6600
    else:
6601
      msg = "single-master"
6602
    self.feedback_fn("* changing disks into %s mode" % msg)
6603
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6604
                                           self.instance.disks,
6605
                                           self.instance.name, multimaster)
6606
    for node, nres in result.items():
6607
      nres.Raise("Cannot change disks config on node %s" % node)
6608

    
6609
  def _ExecCleanup(self):
6610
    """Try to cleanup after a failed migration.
6611

6612
    The cleanup is done by:
6613
      - check that the instance is running only on one node
6614
        (and update the config if needed)
6615
      - change disks on its secondary node to secondary
6616
      - wait until disks are fully synchronized
6617
      - disconnect from the network
6618
      - change disks into single-master mode
6619
      - wait again until disks are fully synchronized
6620

6621
    """
6622
    instance = self.instance
6623
    target_node = self.target_node
6624
    source_node = self.source_node
6625

    
6626
    # check running on only one node
6627
    self.feedback_fn("* checking where the instance actually runs"
6628
                     " (if this hangs, the hypervisor might be in"
6629
                     " a bad state)")
6630
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6631
    for node, result in ins_l.items():
6632
      result.Raise("Can't contact node %s" % node)
6633

    
6634
    runningon_source = instance.name in ins_l[source_node].payload
6635
    runningon_target = instance.name in ins_l[target_node].payload
6636

    
6637
    if runningon_source and runningon_target:
6638
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6639
                               " or the hypervisor is confused. You will have"
6640
                               " to ensure manually that it runs only on one"
6641
                               " and restart this operation.")
6642

    
6643
    if not (runningon_source or runningon_target):
6644
      raise errors.OpExecError("Instance does not seem to be running at all."
6645
                               " In this case, it's safer to repair by"
6646
                               " running 'gnt-instance stop' to ensure disk"
6647
                               " shutdown, and then restarting it.")
6648

    
6649
    if runningon_target:
6650
      # the migration has actually succeeded, we need to update the config
6651
      self.feedback_fn("* instance running on secondary node (%s),"
6652
                       " updating config" % target_node)
6653
      instance.primary_node = target_node
6654
      self.cfg.Update(instance, self.feedback_fn)
6655
      demoted_node = source_node
6656
    else:
6657
      self.feedback_fn("* instance confirmed to be running on its"
6658
                       " primary node (%s)" % source_node)
6659
      demoted_node = target_node
6660

    
6661
    if instance.disk_template in constants.DTS_INT_MIRROR:
6662
      self._EnsureSecondary(demoted_node)
6663
      try:
6664
        self._WaitUntilSync()
6665
      except errors.OpExecError:
6666
        # we ignore here errors, since if the device is standalone, it
6667
        # won't be able to sync
6668
        pass
6669
      self._GoStandalone()
6670
      self._GoReconnect(False)
6671
      self._WaitUntilSync()
6672

    
6673
    self.feedback_fn("* done")
6674

    
6675
  def _RevertDiskStatus(self):
6676
    """Try to revert the disk status after a failed migration.
6677

6678
    """
6679
    target_node = self.target_node
6680
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6681
      return
6682

    
6683
    try:
6684
      self._EnsureSecondary(target_node)
6685
      self._GoStandalone()
6686
      self._GoReconnect(False)
6687
      self._WaitUntilSync()
6688
    except errors.OpExecError, err:
6689
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6690
                         " drives: error '%s'\n"
6691
                         "Please look and recover the instance status" %
6692
                         str(err))
6693

    
6694
  def _AbortMigration(self):
6695
    """Call the hypervisor code to abort a started migration.
6696

6697
    """
6698
    instance = self.instance
6699
    target_node = self.target_node
6700
    migration_info = self.migration_info
6701

    
6702
    abort_result = self.rpc.call_finalize_migration(target_node,
6703
                                                    instance,
6704
                                                    migration_info,
6705
                                                    False)
6706
    abort_msg = abort_result.fail_msg
6707
    if abort_msg:
6708
      logging.error("Aborting migration failed on target node %s: %s",
6709
                    target_node, abort_msg)
6710
      # Don't raise an exception here, as we stil have to try to revert the
6711
      # disk status, even if this step failed.
6712

    
6713
  def _ExecMigration(self):
6714
    """Migrate an instance.
6715

6716
    The migrate is done by:
6717
      - change the disks into dual-master mode
6718
      - wait until disks are fully synchronized again
6719
      - migrate the instance
6720
      - change disks on the new secondary node (the old primary) to secondary
6721
      - wait until disks are fully synchronized
6722
      - change disks into single-master mode
6723

6724
    """
6725
    instance = self.instance
6726
    target_node = self.target_node
6727
    source_node = self.source_node
6728

    
6729
    self.feedback_fn("* checking disk consistency between source and target")
6730
    for dev in instance.disks:
6731
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6732
        raise errors.OpExecError("Disk %s is degraded or not fully"
6733
                                 " synchronized on target node,"
6734
                                 " aborting migrate." % dev.iv_name)
6735

    
6736
    # First get the migration information from the remote node
6737
    result = self.rpc.call_migration_info(source_node, instance)
6738
    msg = result.fail_msg
6739
    if msg:
6740
      log_err = ("Failed fetching source migration information from %s: %s" %
6741
                 (source_node, msg))
6742
      logging.error(log_err)
6743
      raise errors.OpExecError(log_err)
6744

    
6745
    self.migration_info = migration_info = result.payload
6746

    
6747
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6748
      # Then switch the disks to master/master mode
6749
      self._EnsureSecondary(target_node)
6750
      self._GoStandalone()
6751
      self._GoReconnect(True)
6752
      self._WaitUntilSync()
6753

    
6754
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6755
    result = self.rpc.call_accept_instance(target_node,
6756
                                           instance,
6757
                                           migration_info,
6758
                                           self.nodes_ip[target_node])
6759

    
6760
    msg = result.fail_msg
6761
    if msg:
6762
      logging.error("Instance pre-migration failed, trying to revert"
6763
                    " disk status: %s", msg)
6764
      self.feedback_fn("Pre-migration failed, aborting")
6765
      self._AbortMigration()
6766
      self._RevertDiskStatus()
6767
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6768
                               (instance.name, msg))
6769

    
6770
    self.feedback_fn("* migrating instance to %s" % target_node)
6771
    result = self.rpc.call_instance_migrate(source_node, instance,
6772
                                            self.nodes_ip[target_node],
6773
                                            self.live)
6774
    msg = result.fail_msg
6775
    if msg:
6776
      logging.error("Instance migration failed, trying to revert"
6777
                    " disk status: %s", msg)
6778
      self.feedback_fn("Migration failed, aborting")
6779
      self._AbortMigration()
6780
      self._RevertDiskStatus()
6781
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6782
                               (instance.name, msg))
6783

    
6784
    instance.primary_node = target_node
6785
    # distribute new instance config to the other nodes
6786
    self.cfg.Update(instance, self.feedback_fn)
6787

    
6788
    result = self.rpc.call_finalize_migration(target_node,
6789
                                              instance,
6790
                                              migration_info,
6791
                                              True)
6792
    msg = result.fail_msg
6793
    if msg:
6794
      logging.error("Instance migration succeeded, but finalization failed:"
6795
                    " %s", msg)
6796
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6797
                               msg)
6798

    
6799
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6800
      self._EnsureSecondary(source_node)
6801
      self._WaitUntilSync()
6802
      self._GoStandalone()
6803
      self._GoReconnect(False)
6804
      self._WaitUntilSync()
6805

    
6806
    self.feedback_fn("* done")
6807

    
6808
  def _ExecFailover(self):
6809
    """Failover an instance.
6810

6811
    The failover is done by shutting it down on its present node and
6812
    starting it on the secondary.
6813

6814
    """
6815
    instance = self.instance
6816
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
6817

    
6818
    source_node = instance.primary_node
6819
    target_node = self.target_node
6820

    
6821
    if instance.admin_up:
6822
      self.feedback_fn("* checking disk consistency between source and target")
6823
      for dev in instance.disks:
6824
        # for drbd, these are drbd over lvm
6825
        if not _CheckDiskConsistency(self, dev, target_node, False):
6826
          if not self.ignore_consistency:
6827
            raise errors.OpExecError("Disk %s is degraded on target node,"
6828
                                     " aborting failover." % dev.iv_name)
6829
    else:
6830
      self.feedback_fn("* not checking disk consistency as instance is not"
6831
                       " running")
6832

    
6833
    self.feedback_fn("* shutting down instance on source node")
6834
    logging.info("Shutting down instance %s on node %s",
6835
                 instance.name, source_node)
6836

    
6837
    result = self.rpc.call_instance_shutdown(source_node, instance,
6838
                                             self.shutdown_timeout)
6839
    msg = result.fail_msg
6840
    if msg:
6841
      if self.ignore_consistency or primary_node.offline:
6842
        self.lu.LogWarning("Could not shutdown instance %s on node %s."
6843
                           " Proceeding anyway. Please make sure node"
6844
                           " %s is down. Error details: %s",
6845
                           instance.name, source_node, source_node, msg)
6846
      else:
6847
        raise errors.OpExecError("Could not shutdown instance %s on"
6848
                                 " node %s: %s" %
6849
                                 (instance.name, source_node, msg))
6850

    
6851
    self.feedback_fn("* deactivating the instance's disks on source node")
6852
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
6853
      raise errors.OpExecError("Can't shut down the instance's disks.")
6854

    
6855
    instance.primary_node = target_node
6856
    # distribute new instance config to the other nodes
6857
    self.cfg.Update(instance, self.feedback_fn)
6858

    
6859
    # Only start the instance if it's marked as up
6860
    if instance.admin_up:
6861
      self.feedback_fn("* activating the instance's disks on target node")
6862
      logging.info("Starting instance %s on node %s",
6863
                   instance.name, target_node)
6864

    
6865
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6866
                                           ignore_secondaries=True)
6867
      if not disks_ok:
6868
        _ShutdownInstanceDisks(self, instance)
6869
        raise errors.OpExecError("Can't activate the instance's disks")
6870

    
6871
      self.feedback_fn("* starting the instance on the target node")
6872
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6873
      msg = result.fail_msg
6874
      if msg:
6875
        _ShutdownInstanceDisks(self, instance)
6876
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6877
                                 (instance.name, target_node, msg))
6878

    
6879
  def Exec(self, feedback_fn):
6880
    """Perform the migration.
6881

6882
    """
6883
    self.feedback_fn = feedback_fn
6884
    self.source_node = self.instance.primary_node
6885

    
6886
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6887
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
6888
      self.target_node = self.instance.secondary_nodes[0]
6889
      # Otherwise self.target_node has been populated either
6890
      # directly, or through an iallocator.
6891

    
6892
    self.all_nodes = [self.source_node, self.target_node]
6893
    self.nodes_ip = {
6894
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6895
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6896
      }
6897

    
6898
    if self.failover:
6899
      feedback_fn("Failover instance %s" % self.instance.name)
6900
      self._ExecFailover()
6901
    else:
6902
      feedback_fn("Migrating instance %s" % self.instance.name)
6903

    
6904
      if self.cleanup:
6905
        return self._ExecCleanup()
6906
      else:
6907
        return self._ExecMigration()
6908

    
6909

    
6910
def _CreateBlockDev(lu, node, instance, device, force_create,
6911
                    info, force_open):
6912
  """Create a tree of block devices on a given node.
6913

6914
  If this device type has to be created on secondaries, create it and
6915
  all its children.
6916

6917
  If not, just recurse to children keeping the same 'force' value.
6918

6919
  @param lu: the lu on whose behalf we execute
6920
  @param node: the node on which to create the device
6921
  @type instance: L{objects.Instance}
6922
  @param instance: the instance which owns the device
6923
  @type device: L{objects.Disk}
6924
  @param device: the device to create
6925
  @type force_create: boolean
6926
  @param force_create: whether to force creation of this device; this
6927
      will be change to True whenever we find a device which has
6928
      CreateOnSecondary() attribute
6929
  @param info: the extra 'metadata' we should attach to the device
6930
      (this will be represented as a LVM tag)
6931
  @type force_open: boolean
6932
  @param force_open: this parameter will be passes to the
6933
      L{backend.BlockdevCreate} function where it specifies
6934
      whether we run on primary or not, and it affects both
6935
      the child assembly and the device own Open() execution
6936

6937
  """
6938
  if device.CreateOnSecondary():
6939
    force_create = True
6940

    
6941
  if device.children:
6942
    for child in device.children:
6943
      _CreateBlockDev(lu, node, instance, child, force_create,
6944
                      info, force_open)
6945

    
6946
  if not force_create:
6947
    return
6948

    
6949
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6950

    
6951

    
6952
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6953
  """Create a single block device on a given node.
6954

6955
  This will not recurse over children of the device, so they must be
6956
  created in advance.
6957

6958
  @param lu: the lu on whose behalf we execute
6959
  @param node: the node on which to create the device
6960
  @type instance: L{objects.Instance}
6961
  @param instance: the instance which owns the device
6962
  @type device: L{objects.Disk}
6963
  @param device: the device to create
6964
  @param info: the extra 'metadata' we should attach to the device
6965
      (this will be represented as a LVM tag)
6966
  @type force_open: boolean
6967
  @param force_open: this parameter will be passes to the
6968
      L{backend.BlockdevCreate} function where it specifies
6969
      whether we run on primary or not, and it affects both
6970
      the child assembly and the device own Open() execution
6971

6972
  """
6973
  lu.cfg.SetDiskID(device, node)
6974
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6975
                                       instance.name, force_open, info)
6976
  result.Raise("Can't create block device %s on"
6977
               " node %s for instance %s" % (device, node, instance.name))
6978
  if device.physical_id is None:
6979
    device.physical_id = result.payload
6980

    
6981

    
6982
def _GenerateUniqueNames(lu, exts):
6983
  """Generate a suitable LV name.
6984

6985
  This will generate a logical volume name for the given instance.
6986

6987
  """
6988
  results = []
6989
  for val in exts:
6990
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6991
    results.append("%s%s" % (new_id, val))
6992
  return results
6993

    
6994

    
6995
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6996
                         p_minor, s_minor):
6997
  """Generate a drbd8 device complete with its children.
6998

6999
  """
7000
  port = lu.cfg.AllocatePort()
7001
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7002
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7003
                          logical_id=(vgname, names[0]))
7004
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7005
                          logical_id=(vgname, names[1]))
7006
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7007
                          logical_id=(primary, secondary, port,
7008
                                      p_minor, s_minor,
7009
                                      shared_secret),
7010
                          children=[dev_data, dev_meta],
7011
                          iv_name=iv_name)
7012
  return drbd_dev
7013

    
7014

    
7015
def _GenerateDiskTemplate(lu, template_name,
7016
                          instance_name, primary_node,
7017
                          secondary_nodes, disk_info,
7018
                          file_storage_dir, file_driver,
7019
                          base_index, feedback_fn):
7020
  """Generate the entire disk layout for a given template type.
7021

7022
  """
7023
  #TODO: compute space requirements
7024

    
7025
  vgname = lu.cfg.GetVGName()
7026
  disk_count = len(disk_info)
7027
  disks = []
7028
  if template_name == constants.DT_DISKLESS:
7029
    pass
7030
  elif template_name == constants.DT_PLAIN:
7031
    if len(secondary_nodes) != 0:
7032
      raise errors.ProgrammerError("Wrong template configuration")
7033

    
7034
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7035
                                      for i in range(disk_count)])
7036
    for idx, disk in enumerate(disk_info):
7037
      disk_index = idx + base_index
7038
      vg = disk.get(constants.IDISK_VG, vgname)
7039
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7040
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7041
                              size=disk[constants.IDISK_SIZE],
7042
                              logical_id=(vg, names[idx]),
7043
                              iv_name="disk/%d" % disk_index,
7044
                              mode=disk[constants.IDISK_MODE])
7045
      disks.append(disk_dev)
7046
  elif template_name == constants.DT_DRBD8:
7047
    if len(secondary_nodes) != 1:
7048
      raise errors.ProgrammerError("Wrong template configuration")
7049
    remote_node = secondary_nodes[0]
7050
    minors = lu.cfg.AllocateDRBDMinor(
7051
      [primary_node, remote_node] * len(disk_info), instance_name)
7052

    
7053
    names = []
7054
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7055
                                               for i in range(disk_count)]):
7056
      names.append(lv_prefix + "_data")
7057
      names.append(lv_prefix + "_meta")
7058
    for idx, disk in enumerate(disk_info):
7059
      disk_index = idx + base_index
7060
      vg = disk.get(constants.IDISK_VG, vgname)
7061
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7062
                                      disk[constants.IDISK_SIZE], vg,
7063
                                      names[idx * 2:idx * 2 + 2],
7064
                                      "disk/%d" % disk_index,
7065
                                      minors[idx * 2], minors[idx * 2 + 1])
7066
      disk_dev.mode = disk[constants.IDISK_MODE]
7067
      disks.append(disk_dev)
7068
  elif template_name == constants.DT_FILE:
7069
    if len(secondary_nodes) != 0:
7070
      raise errors.ProgrammerError("Wrong template configuration")
7071

    
7072
    opcodes.RequireFileStorage()
7073

    
7074
    for idx, disk in enumerate(disk_info):
7075
      disk_index = idx + base_index
7076
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7077
                              size=disk[constants.IDISK_SIZE],
7078
                              iv_name="disk/%d" % disk_index,
7079
                              logical_id=(file_driver,
7080
                                          "%s/disk%d" % (file_storage_dir,
7081
                                                         disk_index)),
7082
                              mode=disk[constants.IDISK_MODE])
7083
      disks.append(disk_dev)
7084
  elif template_name == constants.DT_SHARED_FILE:
7085
    if len(secondary_nodes) != 0:
7086
      raise errors.ProgrammerError("Wrong template configuration")
7087

    
7088
    opcodes.RequireSharedFileStorage()
7089

    
7090
    for idx, disk in enumerate(disk_info):
7091
      disk_index = idx + base_index
7092
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7093
                              size=disk[constants.IDISK_SIZE],
7094
                              iv_name="disk/%d" % disk_index,
7095
                              logical_id=(file_driver,
7096
                                          "%s/disk%d" % (file_storage_dir,
7097
                                                         disk_index)),
7098
                              mode=disk[constants.IDISK_MODE])
7099
      disks.append(disk_dev)
7100
  elif template_name == constants.DT_BLOCK:
7101
    if len(secondary_nodes) != 0:
7102
      raise errors.ProgrammerError("Wrong template configuration")
7103

    
7104
    for idx, disk in enumerate(disk_info):
7105
      disk_index = idx + base_index
7106
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7107
                              size=disk[constants.IDISK_SIZE],
7108
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7109
                                          disk[constants.IDISK_ADOPT]),
7110
                              iv_name="disk/%d" % disk_index,
7111
                              mode=disk[constants.IDISK_MODE])
7112
      disks.append(disk_dev)
7113

    
7114
  else:
7115
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7116
  return disks
7117

    
7118

    
7119
def _GetInstanceInfoText(instance):
7120
  """Compute that text that should be added to the disk's metadata.
7121

7122
  """
7123
  return "originstname+%s" % instance.name
7124

    
7125

    
7126
def _CalcEta(time_taken, written, total_size):
7127
  """Calculates the ETA based on size written and total size.
7128

7129
  @param time_taken: The time taken so far
7130
  @param written: amount written so far
7131
  @param total_size: The total size of data to be written
7132
  @return: The remaining time in seconds
7133

7134
  """
7135
  avg_time = time_taken / float(written)
7136
  return (total_size - written) * avg_time
7137

    
7138

    
7139
def _WipeDisks(lu, instance):
7140
  """Wipes instance disks.
7141

7142
  @type lu: L{LogicalUnit}
7143
  @param lu: the logical unit on whose behalf we execute
7144
  @type instance: L{objects.Instance}
7145
  @param instance: the instance whose disks we should create
7146
  @return: the success of the wipe
7147

7148
  """
7149
  node = instance.primary_node
7150

    
7151
  for device in instance.disks:
7152
    lu.cfg.SetDiskID(device, node)
7153

    
7154
  logging.info("Pause sync of instance %s disks", instance.name)
7155
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7156

    
7157
  for idx, success in enumerate(result.payload):
7158
    if not success:
7159
      logging.warn("pause-sync of instance %s for disks %d failed",
7160
                   instance.name, idx)
7161

    
7162
  try:
7163
    for idx, device in enumerate(instance.disks):
7164
      lu.LogInfo("* Wiping disk %d", idx)
7165
      logging.info("Wiping disk %d for instance %s, node %s",
7166
                   idx, instance.name, node)
7167

    
7168
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7169
      # MAX_WIPE_CHUNK at max
7170
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7171
                            constants.MIN_WIPE_CHUNK_PERCENT)
7172

    
7173
      offset = 0
7174
      size = device.size
7175
      last_output = 0
7176
      start_time = time.time()
7177

    
7178
      while offset < size:
7179
        wipe_size = min(wipe_chunk_size, size - offset)
7180
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7181
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7182
                     (idx, offset, wipe_size))
7183
        now = time.time()
7184
        offset += wipe_size
7185
        if now - last_output >= 60:
7186
          eta = _CalcEta(now - start_time, offset, size)
7187
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7188
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7189
          last_output = now
7190
  finally:
7191
    logging.info("Resume sync of instance %s disks", instance.name)
7192

    
7193
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7194

    
7195
    for idx, success in enumerate(result.payload):
7196
      if not success:
7197
        lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
7198
                      " look at the status and troubleshoot the issue.", idx)
7199
        logging.warn("resume-sync of instance %s for disks %d failed",
7200
                     instance.name, idx)
7201

    
7202

    
7203
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7204
  """Create all disks for an instance.
7205

7206
  This abstracts away some work from AddInstance.
7207

7208
  @type lu: L{LogicalUnit}
7209
  @param lu: the logical unit on whose behalf we execute
7210
  @type instance: L{objects.Instance}
7211
  @param instance: the instance whose disks we should create
7212
  @type to_skip: list
7213
  @param to_skip: list of indices to skip
7214
  @type target_node: string
7215
  @param target_node: if passed, overrides the target node for creation
7216
  @rtype: boolean
7217
  @return: the success of the creation
7218

7219
  """
7220
  info = _GetInstanceInfoText(instance)
7221
  if target_node is None:
7222
    pnode = instance.primary_node
7223
    all_nodes = instance.all_nodes
7224
  else:
7225
    pnode = target_node
7226
    all_nodes = [pnode]
7227

    
7228
  if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7229
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7230
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7231

    
7232
    result.Raise("Failed to create directory '%s' on"
7233
                 " node %s" % (file_storage_dir, pnode))
7234

    
7235
  # Note: this needs to be kept in sync with adding of disks in
7236
  # LUInstanceSetParams
7237
  for idx, device in enumerate(instance.disks):
7238
    if to_skip and idx in to_skip:
7239
      continue
7240
    logging.info("Creating volume %s for instance %s",
7241
                 device.iv_name, instance.name)
7242
    #HARDCODE
7243
    for node in all_nodes:
7244
      f_create = node == pnode
7245
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7246

    
7247

    
7248
def _RemoveDisks(lu, instance, target_node=None):
7249
  """Remove all disks for an instance.
7250

7251
  This abstracts away some work from `AddInstance()` and
7252
  `RemoveInstance()`. Note that in case some of the devices couldn't
7253
  be removed, the removal will continue with the other ones (compare
7254
  with `_CreateDisks()`).
7255

7256
  @type lu: L{LogicalUnit}
7257
  @param lu: the logical unit on whose behalf we execute
7258
  @type instance: L{objects.Instance}
7259
  @param instance: the instance whose disks we should remove
7260
  @type target_node: string
7261
  @param target_node: used to override the node on which to remove the disks
7262
  @rtype: boolean
7263
  @return: the success of the removal
7264

7265
  """
7266
  logging.info("Removing block devices for instance %s", instance.name)
7267

    
7268
  all_result = True
7269
  for device in instance.disks:
7270
    if target_node:
7271
      edata = [(target_node, device)]
7272
    else:
7273
      edata = device.ComputeNodeTree(instance.primary_node)
7274
    for node, disk in edata:
7275
      lu.cfg.SetDiskID(disk, node)
7276
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7277
      if msg:
7278
        lu.LogWarning("Could not remove block device %s on node %s,"
7279
                      " continuing anyway: %s", device.iv_name, node, msg)
7280
        all_result = False
7281

    
7282
  if instance.disk_template == constants.DT_FILE:
7283
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7284
    if target_node:
7285
      tgt = target_node
7286
    else:
7287
      tgt = instance.primary_node
7288
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7289
    if result.fail_msg:
7290
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7291
                    file_storage_dir, instance.primary_node, result.fail_msg)
7292
      all_result = False
7293

    
7294
  return all_result
7295

    
7296

    
7297
def _ComputeDiskSizePerVG(disk_template, disks):
7298
  """Compute disk size requirements in the volume group
7299

7300
  """
7301
  def _compute(disks, payload):
7302
    """Universal algorithm.
7303

7304
    """
7305
    vgs = {}
7306
    for disk in disks:
7307
      vgs[disk[constants.IDISK_VG]] = \
7308
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7309

    
7310
    return vgs
7311

    
7312
  # Required free disk space as a function of disk and swap space
7313
  req_size_dict = {
7314
    constants.DT_DISKLESS: {},
7315
    constants.DT_PLAIN: _compute(disks, 0),
7316
    # 128 MB are added for drbd metadata for each disk
7317
    constants.DT_DRBD8: _compute(disks, 128),
7318
    constants.DT_FILE: {},
7319
    constants.DT_SHARED_FILE: {},
7320
  }
7321

    
7322
  if disk_template not in req_size_dict:
7323
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7324
                                 " is unknown" %  disk_template)
7325

    
7326
  return req_size_dict[disk_template]
7327

    
7328

    
7329
def _ComputeDiskSize(disk_template, disks):
7330
  """Compute disk size requirements in the volume group
7331

7332
  """
7333
  # Required free disk space as a function of disk and swap space
7334
  req_size_dict = {
7335
    constants.DT_DISKLESS: None,
7336
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7337
    # 128 MB are added for drbd metadata for each disk
7338
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7339
    constants.DT_FILE: None,
7340
    constants.DT_SHARED_FILE: 0,
7341
    constants.DT_BLOCK: 0,
7342
  }
7343

    
7344
  if disk_template not in req_size_dict:
7345
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7346
                                 " is unknown" %  disk_template)
7347

    
7348
  return req_size_dict[disk_template]
7349

    
7350

    
7351
def _FilterVmNodes(lu, nodenames):
7352
  """Filters out non-vm_capable nodes from a list.
7353

7354
  @type lu: L{LogicalUnit}
7355
  @param lu: the logical unit for which we check
7356
  @type nodenames: list
7357
  @param nodenames: the list of nodes on which we should check
7358
  @rtype: list
7359
  @return: the list of vm-capable nodes
7360

7361
  """
7362
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7363
  return [name for name in nodenames if name not in vm_nodes]
7364

    
7365

    
7366
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7367
  """Hypervisor parameter validation.
7368

7369
  This function abstract the hypervisor parameter validation to be
7370
  used in both instance create and instance modify.
7371

7372
  @type lu: L{LogicalUnit}
7373
  @param lu: the logical unit for which we check
7374
  @type nodenames: list
7375
  @param nodenames: the list of nodes on which we should check
7376
  @type hvname: string
7377
  @param hvname: the name of the hypervisor we should use
7378
  @type hvparams: dict
7379
  @param hvparams: the parameters which we need to check
7380
  @raise errors.OpPrereqError: if the parameters are not valid
7381

7382
  """
7383
  nodenames = _FilterVmNodes(lu, nodenames)
7384
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7385
                                                  hvname,
7386
                                                  hvparams)
7387
  for node in nodenames:
7388
    info = hvinfo[node]
7389
    if info.offline:
7390
      continue
7391
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7392

    
7393

    
7394
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7395
  """OS parameters validation.
7396

7397
  @type lu: L{LogicalUnit}
7398
  @param lu: the logical unit for which we check
7399
  @type required: boolean
7400
  @param required: whether the validation should fail if the OS is not
7401
      found
7402
  @type nodenames: list
7403
  @param nodenames: the list of nodes on which we should check
7404
  @type osname: string
7405
  @param osname: the name of the hypervisor we should use
7406
  @type osparams: dict
7407
  @param osparams: the parameters which we need to check
7408
  @raise errors.OpPrereqError: if the parameters are not valid
7409

7410
  """
7411
  nodenames = _FilterVmNodes(lu, nodenames)
7412
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7413
                                   [constants.OS_VALIDATE_PARAMETERS],
7414
                                   osparams)
7415
  for node, nres in result.items():
7416
    # we don't check for offline cases since this should be run only
7417
    # against the master node and/or an instance's nodes
7418
    nres.Raise("OS Parameters validation failed on node %s" % node)
7419
    if not nres.payload:
7420
      lu.LogInfo("OS %s not found on node %s, validation skipped",
7421
                 osname, node)
7422

    
7423

    
7424
class LUInstanceCreate(LogicalUnit):
7425
  """Create an instance.
7426

7427
  """
7428
  HPATH = "instance-add"
7429
  HTYPE = constants.HTYPE_INSTANCE
7430
  REQ_BGL = False
7431

    
7432
  def CheckArguments(self):
7433
    """Check arguments.
7434

7435
    """
7436
    # do not require name_check to ease forward/backward compatibility
7437
    # for tools
7438
    if self.op.no_install and self.op.start:
7439
      self.LogInfo("No-installation mode selected, disabling startup")
7440
      self.op.start = False
7441
    # validate/normalize the instance name
7442
    self.op.instance_name = \
7443
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7444

    
7445
    if self.op.ip_check and not self.op.name_check:
7446
      # TODO: make the ip check more flexible and not depend on the name check
7447
      raise errors.OpPrereqError("Cannot do ip check without a name check",
7448
                                 errors.ECODE_INVAL)
7449

    
7450
    # check nics' parameter names
7451
    for nic in self.op.nics:
7452
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7453

    
7454
    # check disks. parameter names and consistent adopt/no-adopt strategy
7455
    has_adopt = has_no_adopt = False
7456
    for disk in self.op.disks:
7457
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7458
      if constants.IDISK_ADOPT in disk:
7459
        has_adopt = True
7460
      else:
7461
        has_no_adopt = True
7462
    if has_adopt and has_no_adopt:
7463
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7464
                                 errors.ECODE_INVAL)
7465
    if has_adopt:
7466
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7467
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7468
                                   " '%s' disk template" %
7469
                                   self.op.disk_template,
7470
                                   errors.ECODE_INVAL)
7471
      if self.op.iallocator is not None:
7472
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7473
                                   " iallocator script", errors.ECODE_INVAL)
7474
      if self.op.mode == constants.INSTANCE_IMPORT:
7475
        raise errors.OpPrereqError("Disk adoption not allowed for"
7476
                                   " instance import", errors.ECODE_INVAL)
7477
    else:
7478
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
7479
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7480
                                   " but no 'adopt' parameter given" %
7481
                                   self.op.disk_template,
7482
                                   errors.ECODE_INVAL)
7483

    
7484
    self.adopt_disks = has_adopt
7485

    
7486
    # instance name verification
7487
    if self.op.name_check:
7488
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7489
      self.op.instance_name = self.hostname1.name
7490
      # used in CheckPrereq for ip ping check
7491
      self.check_ip = self.hostname1.ip
7492
    else:
7493
      self.check_ip = None
7494

    
7495
    # file storage checks
7496
    if (self.op.file_driver and
7497
        not self.op.file_driver in constants.FILE_DRIVER):
7498
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7499
                                 self.op.file_driver, errors.ECODE_INVAL)
7500

    
7501
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7502
      raise errors.OpPrereqError("File storage directory path not absolute",
7503
                                 errors.ECODE_INVAL)
7504

    
7505
    ### Node/iallocator related checks
7506
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7507

    
7508
    if self.op.pnode is not None:
7509
      if self.op.disk_template in constants.DTS_INT_MIRROR:
7510
        if self.op.snode is None:
7511
          raise errors.OpPrereqError("The networked disk templates need"
7512
                                     " a mirror node", errors.ECODE_INVAL)
7513
      elif self.op.snode:
7514
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7515
                        " template")
7516
        self.op.snode = None
7517

    
7518
    self._cds = _GetClusterDomainSecret()
7519

    
7520
    if self.op.mode == constants.INSTANCE_IMPORT:
7521
      # On import force_variant must be True, because if we forced it at
7522
      # initial install, our only chance when importing it back is that it
7523
      # works again!
7524
      self.op.force_variant = True
7525

    
7526
      if self.op.no_install:
7527
        self.LogInfo("No-installation mode has no effect during import")
7528

    
7529
    elif self.op.mode == constants.INSTANCE_CREATE:
7530
      if self.op.os_type is None:
7531
        raise errors.OpPrereqError("No guest OS specified",
7532
                                   errors.ECODE_INVAL)
7533
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7534
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7535
                                   " installation" % self.op.os_type,
7536
                                   errors.ECODE_STATE)
7537
      if self.op.disk_template is None:
7538
        raise errors.OpPrereqError("No disk template specified",
7539
                                   errors.ECODE_INVAL)
7540

    
7541
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7542
      # Check handshake to ensure both clusters have the same domain secret
7543
      src_handshake = self.op.source_handshake
7544
      if not src_handshake:
7545
        raise errors.OpPrereqError("Missing source handshake",
7546
                                   errors.ECODE_INVAL)
7547

    
7548
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7549
                                                           src_handshake)
7550
      if errmsg:
7551
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7552
                                   errors.ECODE_INVAL)
7553

    
7554
      # Load and check source CA
7555
      self.source_x509_ca_pem = self.op.source_x509_ca
7556
      if not self.source_x509_ca_pem:
7557
        raise errors.OpPrereqError("Missing source X509 CA",
7558
                                   errors.ECODE_INVAL)
7559

    
7560
      try:
7561
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7562
                                                    self._cds)
7563
      except OpenSSL.crypto.Error, err:
7564
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7565
                                   (err, ), errors.ECODE_INVAL)
7566

    
7567
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7568
      if errcode is not None:
7569
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7570
                                   errors.ECODE_INVAL)
7571

    
7572
      self.source_x509_ca = cert
7573

    
7574
      src_instance_name = self.op.source_instance_name
7575
      if not src_instance_name:
7576
        raise errors.OpPrereqError("Missing source instance name",
7577
                                   errors.ECODE_INVAL)
7578

    
7579
      self.source_instance_name = \
7580
          netutils.GetHostname(name=src_instance_name).name
7581

    
7582
    else:
7583
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
7584
                                 self.op.mode, errors.ECODE_INVAL)
7585

    
7586
  def ExpandNames(self):
7587
    """ExpandNames for CreateInstance.
7588

7589
    Figure out the right locks for instance creation.
7590

7591
    """
7592
    self.needed_locks = {}
7593

    
7594
    instance_name = self.op.instance_name
7595
    # this is just a preventive check, but someone might still add this
7596
    # instance in the meantime, and creation will fail at lock-add time
7597
    if instance_name in self.cfg.GetInstanceList():
7598
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7599
                                 instance_name, errors.ECODE_EXISTS)
7600

    
7601
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7602

    
7603
    if self.op.iallocator:
7604
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7605
    else:
7606
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7607
      nodelist = [self.op.pnode]
7608
      if self.op.snode is not None:
7609
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7610
        nodelist.append(self.op.snode)
7611
      self.needed_locks[locking.LEVEL_NODE] = nodelist
7612

    
7613
    # in case of import lock the source node too
7614
    if self.op.mode == constants.INSTANCE_IMPORT:
7615
      src_node = self.op.src_node
7616
      src_path = self.op.src_path
7617

    
7618
      if src_path is None:
7619
        self.op.src_path = src_path = self.op.instance_name
7620

    
7621
      if src_node is None:
7622
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7623
        self.op.src_node = None
7624
        if os.path.isabs(src_path):
7625
          raise errors.OpPrereqError("Importing an instance from an absolute"
7626
                                     " path requires a source node option.",
7627
                                     errors.ECODE_INVAL)
7628
      else:
7629
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7630
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7631
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
7632
        if not os.path.isabs(src_path):
7633
          self.op.src_path = src_path = \
7634
            utils.PathJoin(constants.EXPORT_DIR, src_path)
7635

    
7636
  def _RunAllocator(self):
7637
    """Run the allocator based on input opcode.
7638

7639
    """
7640
    nics = [n.ToDict() for n in self.nics]
7641
    ial = IAllocator(self.cfg, self.rpc,
7642
                     mode=constants.IALLOCATOR_MODE_ALLOC,
7643
                     name=self.op.instance_name,
7644
                     disk_template=self.op.disk_template,
7645
                     tags=[],
7646
                     os=self.op.os_type,
7647
                     vcpus=self.be_full[constants.BE_VCPUS],
7648
                     mem_size=self.be_full[constants.BE_MEMORY],
7649
                     disks=self.disks,
7650
                     nics=nics,
7651
                     hypervisor=self.op.hypervisor,
7652
                     )
7653

    
7654
    ial.Run(self.op.iallocator)
7655

    
7656
    if not ial.success:
7657
      raise errors.OpPrereqError("Can't compute nodes using"
7658
                                 " iallocator '%s': %s" %
7659
                                 (self.op.iallocator, ial.info),
7660
                                 errors.ECODE_NORES)
7661
    if len(ial.result) != ial.required_nodes:
7662
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7663
                                 " of nodes (%s), required %s" %
7664
                                 (self.op.iallocator, len(ial.result),
7665
                                  ial.required_nodes), errors.ECODE_FAULT)
7666
    self.op.pnode = ial.result[0]
7667
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7668
                 self.op.instance_name, self.op.iallocator,
7669
                 utils.CommaJoin(ial.result))
7670
    if ial.required_nodes == 2:
7671
      self.op.snode = ial.result[1]
7672

    
7673
  def BuildHooksEnv(self):
7674
    """Build hooks env.
7675

7676
    This runs on master, primary and secondary nodes of the instance.
7677

7678
    """
7679
    env = {
7680
      "ADD_MODE": self.op.mode,
7681
      }
7682
    if self.op.mode == constants.INSTANCE_IMPORT:
7683
      env["SRC_NODE"] = self.op.src_node
7684
      env["SRC_PATH"] = self.op.src_path
7685
      env["SRC_IMAGES"] = self.src_images
7686

    
7687
    env.update(_BuildInstanceHookEnv(
7688
      name=self.op.instance_name,
7689
      primary_node=self.op.pnode,
7690
      secondary_nodes=self.secondaries,
7691
      status=self.op.start,
7692
      os_type=self.op.os_type,
7693
      memory=self.be_full[constants.BE_MEMORY],
7694
      vcpus=self.be_full[constants.BE_VCPUS],
7695
      nics=_NICListToTuple(self, self.nics),
7696
      disk_template=self.op.disk_template,
7697
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
7698
             for d in self.disks],
7699
      bep=self.be_full,
7700
      hvp=self.hv_full,
7701
      hypervisor_name=self.op.hypervisor,
7702
    ))
7703

    
7704
    return env
7705

    
7706
  def BuildHooksNodes(self):
7707
    """Build hooks nodes.
7708

7709
    """
7710
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7711
    return nl, nl
7712

    
7713
  def _ReadExportInfo(self):
7714
    """Reads the export information from disk.
7715

7716
    It will override the opcode source node and path with the actual
7717
    information, if these two were not specified before.
7718

7719
    @return: the export information
7720

7721
    """
7722
    assert self.op.mode == constants.INSTANCE_IMPORT
7723

    
7724
    src_node = self.op.src_node
7725
    src_path = self.op.src_path
7726

    
7727
    if src_node is None:
7728
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7729
      exp_list = self.rpc.call_export_list(locked_nodes)
7730
      found = False
7731
      for node in exp_list:
7732
        if exp_list[node].fail_msg:
7733
          continue
7734
        if src_path in exp_list[node].payload:
7735
          found = True
7736
          self.op.src_node = src_node = node
7737
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7738
                                                       src_path)
7739
          break
7740
      if not found:
7741
        raise errors.OpPrereqError("No export found for relative path %s" %
7742
                                    src_path, errors.ECODE_INVAL)
7743

    
7744
    _CheckNodeOnline(self, src_node)
7745
    result = self.rpc.call_export_info(src_node, src_path)
7746
    result.Raise("No export or invalid export found in dir %s" % src_path)
7747

    
7748
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7749
    if not export_info.has_section(constants.INISECT_EXP):
7750
      raise errors.ProgrammerError("Corrupted export config",
7751
                                   errors.ECODE_ENVIRON)
7752

    
7753
    ei_version = export_info.get(constants.INISECT_EXP, "version")
7754
    if (int(ei_version) != constants.EXPORT_VERSION):
7755
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7756
                                 (ei_version, constants.EXPORT_VERSION),
7757
                                 errors.ECODE_ENVIRON)
7758
    return export_info
7759

    
7760
  def _ReadExportParams(self, einfo):
7761
    """Use export parameters as defaults.
7762

7763
    In case the opcode doesn't specify (as in override) some instance
7764
    parameters, then try to use them from the export information, if
7765
    that declares them.
7766

7767
    """
7768
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7769

    
7770
    if self.op.disk_template is None:
7771
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
7772
        self.op.disk_template = einfo.get(constants.INISECT_INS,
7773
                                          "disk_template")
7774
      else:
7775
        raise errors.OpPrereqError("No disk template specified and the export"
7776
                                   " is missing the disk_template information",
7777
                                   errors.ECODE_INVAL)
7778

    
7779
    if not self.op.disks:
7780
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7781
        disks = []
7782
        # TODO: import the disk iv_name too
7783
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7784
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7785
          disks.append({constants.IDISK_SIZE: disk_sz})
7786
        self.op.disks = disks
7787
      else:
7788
        raise errors.OpPrereqError("No disk info specified and the export"
7789
                                   " is missing the disk information",
7790
                                   errors.ECODE_INVAL)
7791

    
7792
    if (not self.op.nics and
7793
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7794
      nics = []
7795
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7796
        ndict = {}
7797
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7798
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7799
          ndict[name] = v
7800
        nics.append(ndict)
7801
      self.op.nics = nics
7802

    
7803
    if (self.op.hypervisor is None and
7804
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7805
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7806
    if einfo.has_section(constants.INISECT_HYP):
7807
      # use the export parameters but do not override the ones
7808
      # specified by the user
7809
      for name, value in einfo.items(constants.INISECT_HYP):
7810
        if name not in self.op.hvparams:
7811
          self.op.hvparams[name] = value
7812

    
7813
    if einfo.has_section(constants.INISECT_BEP):
7814
      # use the parameters, without overriding
7815
      for name, value in einfo.items(constants.INISECT_BEP):
7816
        if name not in self.op.beparams:
7817
          self.op.beparams[name] = value
7818
    else:
7819
      # try to read the parameters old style, from the main section
7820
      for name in constants.BES_PARAMETERS:
7821
        if (name not in self.op.beparams and
7822
            einfo.has_option(constants.INISECT_INS, name)):
7823
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7824

    
7825
    if einfo.has_section(constants.INISECT_OSP):
7826
      # use the parameters, without overriding
7827
      for name, value in einfo.items(constants.INISECT_OSP):
7828
        if name not in self.op.osparams:
7829
          self.op.osparams[name] = value
7830

    
7831
  def _RevertToDefaults(self, cluster):
7832
    """Revert the instance parameters to the default values.
7833

7834
    """
7835
    # hvparams
7836
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7837
    for name in self.op.hvparams.keys():
7838
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7839
        del self.op.hvparams[name]
7840
    # beparams
7841
    be_defs = cluster.SimpleFillBE({})
7842
    for name in self.op.beparams.keys():
7843
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7844
        del self.op.beparams[name]
7845
    # nic params
7846
    nic_defs = cluster.SimpleFillNIC({})
7847
    for nic in self.op.nics:
7848
      for name in constants.NICS_PARAMETERS:
7849
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7850
          del nic[name]
7851
    # osparams
7852
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7853
    for name in self.op.osparams.keys():
7854
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7855
        del self.op.osparams[name]
7856

    
7857
  def CheckPrereq(self):
7858
    """Check prerequisites.
7859

7860
    """
7861
    if self.op.mode == constants.INSTANCE_IMPORT:
7862
      export_info = self._ReadExportInfo()
7863
      self._ReadExportParams(export_info)
7864

    
7865
    if (not self.cfg.GetVGName() and
7866
        self.op.disk_template not in constants.DTS_NOT_LVM):
7867
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7868
                                 " instances", errors.ECODE_STATE)
7869

    
7870
    if self.op.hypervisor is None:
7871
      self.op.hypervisor = self.cfg.GetHypervisorType()
7872

    
7873
    cluster = self.cfg.GetClusterInfo()
7874
    enabled_hvs = cluster.enabled_hypervisors
7875
    if self.op.hypervisor not in enabled_hvs:
7876
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7877
                                 " cluster (%s)" % (self.op.hypervisor,
7878
                                  ",".join(enabled_hvs)),
7879
                                 errors.ECODE_STATE)
7880

    
7881
    # check hypervisor parameter syntax (locally)
7882
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7883
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7884
                                      self.op.hvparams)
7885
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7886
    hv_type.CheckParameterSyntax(filled_hvp)
7887
    self.hv_full = filled_hvp
7888
    # check that we don't specify global parameters on an instance
7889
    _CheckGlobalHvParams(self.op.hvparams)
7890

    
7891
    # fill and remember the beparams dict
7892
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7893
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7894

    
7895
    # build os parameters
7896
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7897

    
7898
    # now that hvp/bep are in final format, let's reset to defaults,
7899
    # if told to do so
7900
    if self.op.identify_defaults:
7901
      self._RevertToDefaults(cluster)
7902

    
7903
    # NIC buildup
7904
    self.nics = []
7905
    for idx, nic in enumerate(self.op.nics):
7906
      nic_mode_req = nic.get(constants.INIC_MODE, None)
7907
      nic_mode = nic_mode_req
7908
      if nic_mode is None:
7909
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7910

    
7911
      # in routed mode, for the first nic, the default ip is 'auto'
7912
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7913
        default_ip_mode = constants.VALUE_AUTO
7914
      else:
7915
        default_ip_mode = constants.VALUE_NONE
7916

    
7917
      # ip validity checks
7918
      ip = nic.get(constants.INIC_IP, default_ip_mode)
7919
      if ip is None or ip.lower() == constants.VALUE_NONE:
7920
        nic_ip = None
7921
      elif ip.lower() == constants.VALUE_AUTO:
7922
        if not self.op.name_check:
7923
          raise errors.OpPrereqError("IP address set to auto but name checks"
7924
                                     " have been skipped",
7925
                                     errors.ECODE_INVAL)
7926
        nic_ip = self.hostname1.ip
7927
      else:
7928
        if not netutils.IPAddress.IsValid(ip):
7929
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7930
                                     errors.ECODE_INVAL)
7931
        nic_ip = ip
7932

    
7933
      # TODO: check the ip address for uniqueness
7934
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7935
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7936
                                   errors.ECODE_INVAL)
7937

    
7938
      # MAC address verification
7939
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
7940
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7941
        mac = utils.NormalizeAndValidateMac(mac)
7942

    
7943
        try:
7944
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7945
        except errors.ReservationError:
7946
          raise errors.OpPrereqError("MAC address %s already in use"
7947
                                     " in cluster" % mac,
7948
                                     errors.ECODE_NOTUNIQUE)
7949

    
7950
      #  Build nic parameters
7951
      link = nic.get(constants.INIC_LINK, None)
7952
      nicparams = {}
7953
      if nic_mode_req:
7954
        nicparams[constants.NIC_MODE] = nic_mode_req
7955
      if link:
7956
        nicparams[constants.NIC_LINK] = link
7957

    
7958
      check_params = cluster.SimpleFillNIC(nicparams)
7959
      objects.NIC.CheckParameterSyntax(check_params)
7960
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7961

    
7962
    # disk checks/pre-build
7963
    default_vg = self.cfg.GetVGName()
7964
    self.disks = []
7965
    for disk in self.op.disks:
7966
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
7967
      if mode not in constants.DISK_ACCESS_SET:
7968
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7969
                                   mode, errors.ECODE_INVAL)
7970
      size = disk.get(constants.IDISK_SIZE, None)
7971
      if size is None:
7972
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7973
      try:
7974
        size = int(size)
7975
      except (TypeError, ValueError):
7976
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7977
                                   errors.ECODE_INVAL)
7978
      new_disk = {
7979
        constants.IDISK_SIZE: size,
7980
        constants.IDISK_MODE: mode,
7981
        constants.IDISK_VG: disk.get(constants.IDISK_VG, default_vg),
7982
        }
7983
      if constants.IDISK_ADOPT in disk:
7984
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
7985
      self.disks.append(new_disk)
7986

    
7987
    if self.op.mode == constants.INSTANCE_IMPORT:
7988

    
7989
      # Check that the new instance doesn't have less disks than the export
7990
      instance_disks = len(self.disks)
7991
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7992
      if instance_disks < export_disks:
7993
        raise errors.OpPrereqError("Not enough disks to import."
7994
                                   " (instance: %d, export: %d)" %
7995
                                   (instance_disks, export_disks),
7996
                                   errors.ECODE_INVAL)
7997

    
7998
      disk_images = []
7999
      for idx in range(export_disks):
8000
        option = 'disk%d_dump' % idx
8001
        if export_info.has_option(constants.INISECT_INS, option):
8002
          # FIXME: are the old os-es, disk sizes, etc. useful?
8003
          export_name = export_info.get(constants.INISECT_INS, option)
8004
          image = utils.PathJoin(self.op.src_path, export_name)
8005
          disk_images.append(image)
8006
        else:
8007
          disk_images.append(False)
8008

    
8009
      self.src_images = disk_images
8010

    
8011
      old_name = export_info.get(constants.INISECT_INS, 'name')
8012
      try:
8013
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8014
      except (TypeError, ValueError), err:
8015
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8016
                                   " an integer: %s" % str(err),
8017
                                   errors.ECODE_STATE)
8018
      if self.op.instance_name == old_name:
8019
        for idx, nic in enumerate(self.nics):
8020
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8021
            nic_mac_ini = 'nic%d_mac' % idx
8022
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8023

    
8024
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8025

    
8026
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8027
    if self.op.ip_check:
8028
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8029
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8030
                                   (self.check_ip, self.op.instance_name),
8031
                                   errors.ECODE_NOTUNIQUE)
8032

    
8033
    #### mac address generation
8034
    # By generating here the mac address both the allocator and the hooks get
8035
    # the real final mac address rather than the 'auto' or 'generate' value.
8036
    # There is a race condition between the generation and the instance object
8037
    # creation, which means that we know the mac is valid now, but we're not
8038
    # sure it will be when we actually add the instance. If things go bad
8039
    # adding the instance will abort because of a duplicate mac, and the
8040
    # creation job will fail.
8041
    for nic in self.nics:
8042
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8043
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8044

    
8045
    #### allocator run
8046

    
8047
    if self.op.iallocator is not None:
8048
      self._RunAllocator()
8049

    
8050
    #### node related checks
8051

    
8052
    # check primary node
8053
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8054
    assert self.pnode is not None, \
8055
      "Cannot retrieve locked node %s" % self.op.pnode
8056
    if pnode.offline:
8057
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8058
                                 pnode.name, errors.ECODE_STATE)
8059
    if pnode.drained:
8060
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8061
                                 pnode.name, errors.ECODE_STATE)
8062
    if not pnode.vm_capable:
8063
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8064
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8065

    
8066
    self.secondaries = []
8067

    
8068
    # mirror node verification
8069
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8070
      if self.op.snode == pnode.name:
8071
        raise errors.OpPrereqError("The secondary node cannot be the"
8072
                                   " primary node.", errors.ECODE_INVAL)
8073
      _CheckNodeOnline(self, self.op.snode)
8074
      _CheckNodeNotDrained(self, self.op.snode)
8075
      _CheckNodeVmCapable(self, self.op.snode)
8076
      self.secondaries.append(self.op.snode)
8077

    
8078
    nodenames = [pnode.name] + self.secondaries
8079

    
8080
    if not self.adopt_disks:
8081
      # Check lv size requirements, if not adopting
8082
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8083
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8084

    
8085
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8086
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8087
                                disk[constants.IDISK_ADOPT])
8088
                     for disk in self.disks])
8089
      if len(all_lvs) != len(self.disks):
8090
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8091
                                   errors.ECODE_INVAL)
8092
      for lv_name in all_lvs:
8093
        try:
8094
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8095
          # to ReserveLV uses the same syntax
8096
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8097
        except errors.ReservationError:
8098
          raise errors.OpPrereqError("LV named %s used by another instance" %
8099
                                     lv_name, errors.ECODE_NOTUNIQUE)
8100

    
8101
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8102
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8103

    
8104
      node_lvs = self.rpc.call_lv_list([pnode.name],
8105
                                       vg_names.payload.keys())[pnode.name]
8106
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8107
      node_lvs = node_lvs.payload
8108

    
8109
      delta = all_lvs.difference(node_lvs.keys())
8110
      if delta:
8111
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8112
                                   utils.CommaJoin(delta),
8113
                                   errors.ECODE_INVAL)
8114
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8115
      if online_lvs:
8116
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8117
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8118
                                   errors.ECODE_STATE)
8119
      # update the size of disk based on what is found
8120
      for dsk in self.disks:
8121
        dsk[constants.IDISK_SIZE] = \
8122
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8123
                                        dsk[constants.IDISK_ADOPT])][0]))
8124

    
8125
    elif self.op.disk_template == constants.DT_BLOCK:
8126
      # Normalize and de-duplicate device paths
8127
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8128
                       for disk in self.disks])
8129
      if len(all_disks) != len(self.disks):
8130
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8131
                                   errors.ECODE_INVAL)
8132
      baddisks = [d for d in all_disks
8133
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8134
      if baddisks:
8135
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8136
                                   " cannot be adopted" %
8137
                                   (", ".join(baddisks),
8138
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8139
                                   errors.ECODE_INVAL)
8140

    
8141
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8142
                                            list(all_disks))[pnode.name]
8143
      node_disks.Raise("Cannot get block device information from node %s" %
8144
                       pnode.name)
8145
      node_disks = node_disks.payload
8146
      delta = all_disks.difference(node_disks.keys())
8147
      if delta:
8148
        raise errors.OpPrereqError("Missing block device(s): %s" %
8149
                                   utils.CommaJoin(delta),
8150
                                   errors.ECODE_INVAL)
8151
      for dsk in self.disks:
8152
        dsk[constants.IDISK_SIZE] = \
8153
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8154

    
8155
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8156

    
8157
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8158
    # check OS parameters (remotely)
8159
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8160

    
8161
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8162

    
8163
    # memory check on primary node
8164
    if self.op.start:
8165
      _CheckNodeFreeMemory(self, self.pnode.name,
8166
                           "creating instance %s" % self.op.instance_name,
8167
                           self.be_full[constants.BE_MEMORY],
8168
                           self.op.hypervisor)
8169

    
8170
    self.dry_run_result = list(nodenames)
8171

    
8172
  def Exec(self, feedback_fn):
8173
    """Create and add the instance to the cluster.
8174

8175
    """
8176
    instance = self.op.instance_name
8177
    pnode_name = self.pnode.name
8178

    
8179
    ht_kind = self.op.hypervisor
8180
    if ht_kind in constants.HTS_REQ_PORT:
8181
      network_port = self.cfg.AllocatePort()
8182
    else:
8183
      network_port = None
8184

    
8185
    if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8186
      # this is needed because os.path.join does not accept None arguments
8187
      if self.op.file_storage_dir is None:
8188
        string_file_storage_dir = ""
8189
      else:
8190
        string_file_storage_dir = self.op.file_storage_dir
8191

    
8192
      # build the full file storage dir path
8193
      if self.op.disk_template == constants.DT_SHARED_FILE:
8194
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8195
      else:
8196
        get_fsd_fn = self.cfg.GetFileStorageDir
8197

    
8198
      file_storage_dir = utils.PathJoin(get_fsd_fn(),
8199
                                        string_file_storage_dir, instance)
8200
    else:
8201
      file_storage_dir = ""
8202

    
8203
    disks = _GenerateDiskTemplate(self,
8204
                                  self.op.disk_template,
8205
                                  instance, pnode_name,
8206
                                  self.secondaries,
8207
                                  self.disks,
8208
                                  file_storage_dir,
8209
                                  self.op.file_driver,
8210
                                  0,
8211
                                  feedback_fn)
8212

    
8213
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8214
                            primary_node=pnode_name,
8215
                            nics=self.nics, disks=disks,
8216
                            disk_template=self.op.disk_template,
8217
                            admin_up=False,
8218
                            network_port=network_port,
8219
                            beparams=self.op.beparams,
8220
                            hvparams=self.op.hvparams,
8221
                            hypervisor=self.op.hypervisor,
8222
                            osparams=self.op.osparams,
8223
                            )
8224

    
8225
    if self.adopt_disks:
8226
      if self.op.disk_template == constants.DT_PLAIN:
8227
        # rename LVs to the newly-generated names; we need to construct
8228
        # 'fake' LV disks with the old data, plus the new unique_id
8229
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8230
        rename_to = []
8231
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8232
          rename_to.append(t_dsk.logical_id)
8233
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8234
          self.cfg.SetDiskID(t_dsk, pnode_name)
8235
        result = self.rpc.call_blockdev_rename(pnode_name,
8236
                                               zip(tmp_disks, rename_to))
8237
        result.Raise("Failed to rename adoped LVs")
8238
    else:
8239
      feedback_fn("* creating instance disks...")
8240
      try:
8241
        _CreateDisks(self, iobj)
8242
      except errors.OpExecError:
8243
        self.LogWarning("Device creation failed, reverting...")
8244
        try:
8245
          _RemoveDisks(self, iobj)
8246
        finally:
8247
          self.cfg.ReleaseDRBDMinors(instance)
8248
          raise
8249

    
8250
      if self.cfg.GetClusterInfo().prealloc_wipe_disks:
8251
        feedback_fn("* wiping instance disks...")
8252
        try:
8253
          _WipeDisks(self, iobj)
8254
        except errors.OpExecError:
8255
          self.LogWarning("Device wiping failed, reverting...")
8256
          try:
8257
            _RemoveDisks(self, iobj)
8258
          finally:
8259
            self.cfg.ReleaseDRBDMinors(instance)
8260
            raise
8261

    
8262
    feedback_fn("adding instance %s to cluster config" % instance)
8263

    
8264
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8265

    
8266
    # Declare that we don't want to remove the instance lock anymore, as we've
8267
    # added the instance to the config
8268
    del self.remove_locks[locking.LEVEL_INSTANCE]
8269
    # Unlock all the nodes
8270
    if self.op.mode == constants.INSTANCE_IMPORT:
8271
      nodes_keep = [self.op.src_node]
8272
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
8273
                       if node != self.op.src_node]
8274
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
8275
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
8276
    else:
8277
      self.context.glm.release(locking.LEVEL_NODE)
8278
      del self.acquired_locks[locking.LEVEL_NODE]
8279

    
8280
    if self.op.wait_for_sync:
8281
      disk_abort = not _WaitForSync(self, iobj)
8282
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8283
      # make sure the disks are not degraded (still sync-ing is ok)
8284
      time.sleep(15)
8285
      feedback_fn("* checking mirrors status")
8286
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8287
    else:
8288
      disk_abort = False
8289

    
8290
    if disk_abort:
8291
      _RemoveDisks(self, iobj)
8292
      self.cfg.RemoveInstance(iobj.name)
8293
      # Make sure the instance lock gets removed
8294
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8295
      raise errors.OpExecError("There are some degraded disks for"
8296
                               " this instance")
8297

    
8298
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8299
      if self.op.mode == constants.INSTANCE_CREATE:
8300
        if not self.op.no_install:
8301
          feedback_fn("* running the instance OS create scripts...")
8302
          # FIXME: pass debug option from opcode to backend
8303
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8304
                                                 self.op.debug_level)
8305
          result.Raise("Could not add os for instance %s"
8306
                       " on node %s" % (instance, pnode_name))
8307

    
8308
      elif self.op.mode == constants.INSTANCE_IMPORT:
8309
        feedback_fn("* running the instance OS import scripts...")
8310

    
8311
        transfers = []
8312

    
8313
        for idx, image in enumerate(self.src_images):
8314
          if not image:
8315
            continue
8316

    
8317
          # FIXME: pass debug option from opcode to backend
8318
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8319
                                             constants.IEIO_FILE, (image, ),
8320
                                             constants.IEIO_SCRIPT,
8321
                                             (iobj.disks[idx], idx),
8322
                                             None)
8323
          transfers.append(dt)
8324

    
8325
        import_result = \
8326
          masterd.instance.TransferInstanceData(self, feedback_fn,
8327
                                                self.op.src_node, pnode_name,
8328
                                                self.pnode.secondary_ip,
8329
                                                iobj, transfers)
8330
        if not compat.all(import_result):
8331
          self.LogWarning("Some disks for instance %s on node %s were not"
8332
                          " imported successfully" % (instance, pnode_name))
8333

    
8334
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8335
        feedback_fn("* preparing remote import...")
8336
        # The source cluster will stop the instance before attempting to make a
8337
        # connection. In some cases stopping an instance can take a long time,
8338
        # hence the shutdown timeout is added to the connection timeout.
8339
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8340
                           self.op.source_shutdown_timeout)
8341
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8342

    
8343
        assert iobj.primary_node == self.pnode.name
8344
        disk_results = \
8345
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8346
                                        self.source_x509_ca,
8347
                                        self._cds, timeouts)
8348
        if not compat.all(disk_results):
8349
          # TODO: Should the instance still be started, even if some disks
8350
          # failed to import (valid for local imports, too)?
8351
          self.LogWarning("Some disks for instance %s on node %s were not"
8352
                          " imported successfully" % (instance, pnode_name))
8353

    
8354
        # Run rename script on newly imported instance
8355
        assert iobj.name == instance
8356
        feedback_fn("Running rename script for %s" % instance)
8357
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8358
                                                   self.source_instance_name,
8359
                                                   self.op.debug_level)
8360
        if result.fail_msg:
8361
          self.LogWarning("Failed to run rename script for %s on node"
8362
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
8363

    
8364
      else:
8365
        # also checked in the prereq part
8366
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8367
                                     % self.op.mode)
8368

    
8369
    if self.op.start:
8370
      iobj.admin_up = True
8371
      self.cfg.Update(iobj, feedback_fn)
8372
      logging.info("Starting instance %s on node %s", instance, pnode_name)
8373
      feedback_fn("* starting instance...")
8374
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8375
      result.Raise("Could not start instance")
8376

    
8377
    return list(iobj.all_nodes)
8378

    
8379

    
8380
class LUInstanceConsole(NoHooksLU):
8381
  """Connect to an instance's console.
8382

8383
  This is somewhat special in that it returns the command line that
8384
  you need to run on the master node in order to connect to the
8385
  console.
8386

8387
  """
8388
  REQ_BGL = False
8389

    
8390
  def ExpandNames(self):
8391
    self._ExpandAndLockInstance()
8392

    
8393
  def CheckPrereq(self):
8394
    """Check prerequisites.
8395

8396
    This checks that the instance is in the cluster.
8397

8398
    """
8399
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8400
    assert self.instance is not None, \
8401
      "Cannot retrieve locked instance %s" % self.op.instance_name
8402
    _CheckNodeOnline(self, self.instance.primary_node)
8403

    
8404
  def Exec(self, feedback_fn):
8405
    """Connect to the console of an instance
8406

8407
    """
8408
    instance = self.instance
8409
    node = instance.primary_node
8410

    
8411
    node_insts = self.rpc.call_instance_list([node],
8412
                                             [instance.hypervisor])[node]
8413
    node_insts.Raise("Can't get node information from %s" % node)
8414

    
8415
    if instance.name not in node_insts.payload:
8416
      if instance.admin_up:
8417
        state = constants.INSTST_ERRORDOWN
8418
      else:
8419
        state = constants.INSTST_ADMINDOWN
8420
      raise errors.OpExecError("Instance %s is not running (state %s)" %
8421
                               (instance.name, state))
8422

    
8423
    logging.debug("Connecting to console of %s on %s", instance.name, node)
8424

    
8425
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8426

    
8427

    
8428
def _GetInstanceConsole(cluster, instance):
8429
  """Returns console information for an instance.
8430

8431
  @type cluster: L{objects.Cluster}
8432
  @type instance: L{objects.Instance}
8433
  @rtype: dict
8434

8435
  """
8436
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
8437
  # beparams and hvparams are passed separately, to avoid editing the
8438
  # instance and then saving the defaults in the instance itself.
8439
  hvparams = cluster.FillHV(instance)
8440
  beparams = cluster.FillBE(instance)
8441
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8442

    
8443
  assert console.instance == instance.name
8444
  assert console.Validate()
8445

    
8446
  return console.ToDict()
8447

    
8448

    
8449
class LUInstanceReplaceDisks(LogicalUnit):
8450
  """Replace the disks of an instance.
8451

8452
  """
8453
  HPATH = "mirrors-replace"
8454
  HTYPE = constants.HTYPE_INSTANCE
8455
  REQ_BGL = False
8456

    
8457
  def CheckArguments(self):
8458
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8459
                                  self.op.iallocator)
8460

    
8461
  def ExpandNames(self):
8462
    self._ExpandAndLockInstance()
8463

    
8464
    if self.op.iallocator is not None:
8465
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8466

    
8467
    elif self.op.remote_node is not None:
8468
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8469
      self.op.remote_node = remote_node
8470

    
8471
      # Warning: do not remove the locking of the new secondary here
8472
      # unless DRBD8.AddChildren is changed to work in parallel;
8473
      # currently it doesn't since parallel invocations of
8474
      # FindUnusedMinor will conflict
8475
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8476
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8477

    
8478
    else:
8479
      self.needed_locks[locking.LEVEL_NODE] = []
8480
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8481

    
8482
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8483
                                   self.op.iallocator, self.op.remote_node,
8484
                                   self.op.disks, False, self.op.early_release)
8485

    
8486
    self.tasklets = [self.replacer]
8487

    
8488
  def DeclareLocks(self, level):
8489
    # If we're not already locking all nodes in the set we have to declare the
8490
    # instance's primary/secondary nodes.
8491
    if (level == locking.LEVEL_NODE and
8492
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8493
      self._LockInstancesNodes()
8494

    
8495
  def BuildHooksEnv(self):
8496
    """Build hooks env.
8497

8498
    This runs on the master, the primary and all the secondaries.
8499

8500
    """
8501
    instance = self.replacer.instance
8502
    env = {
8503
      "MODE": self.op.mode,
8504
      "NEW_SECONDARY": self.op.remote_node,
8505
      "OLD_SECONDARY": instance.secondary_nodes[0],
8506
      }
8507
    env.update(_BuildInstanceHookEnvByObject(self, instance))
8508
    return env
8509

    
8510
  def BuildHooksNodes(self):
8511
    """Build hooks nodes.
8512

8513
    """
8514
    instance = self.replacer.instance
8515
    nl = [
8516
      self.cfg.GetMasterNode(),
8517
      instance.primary_node,
8518
      ]
8519
    if self.op.remote_node is not None:
8520
      nl.append(self.op.remote_node)
8521
    return nl, nl
8522

    
8523

    
8524
class TLReplaceDisks(Tasklet):
8525
  """Replaces disks for an instance.
8526

8527
  Note: Locking is not within the scope of this class.
8528

8529
  """
8530
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8531
               disks, delay_iallocator, early_release):
8532
    """Initializes this class.
8533

8534
    """
8535
    Tasklet.__init__(self, lu)
8536

    
8537
    # Parameters
8538
    self.instance_name = instance_name
8539
    self.mode = mode
8540
    self.iallocator_name = iallocator_name
8541
    self.remote_node = remote_node
8542
    self.disks = disks
8543
    self.delay_iallocator = delay_iallocator
8544
    self.early_release = early_release
8545

    
8546
    # Runtime data
8547
    self.instance = None
8548
    self.new_node = None
8549
    self.target_node = None
8550
    self.other_node = None
8551
    self.remote_node_info = None
8552
    self.node_secondary_ip = None
8553

    
8554
  @staticmethod
8555
  def CheckArguments(mode, remote_node, iallocator):
8556
    """Helper function for users of this class.
8557

8558
    """
8559
    # check for valid parameter combination
8560
    if mode == constants.REPLACE_DISK_CHG:
8561
      if remote_node is None and iallocator is None:
8562
        raise errors.OpPrereqError("When changing the secondary either an"
8563
                                   " iallocator script must be used or the"
8564
                                   " new node given", errors.ECODE_INVAL)
8565

    
8566
      if remote_node is not None and iallocator is not None:
8567
        raise errors.OpPrereqError("Give either the iallocator or the new"
8568
                                   " secondary, not both", errors.ECODE_INVAL)
8569

    
8570
    elif remote_node is not None or iallocator is not None:
8571
      # Not replacing the secondary
8572
      raise errors.OpPrereqError("The iallocator and new node options can"
8573
                                 " only be used when changing the"
8574
                                 " secondary node", errors.ECODE_INVAL)
8575

    
8576
  @staticmethod
8577
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8578
    """Compute a new secondary node using an IAllocator.
8579

8580
    """
8581
    ial = IAllocator(lu.cfg, lu.rpc,
8582
                     mode=constants.IALLOCATOR_MODE_RELOC,
8583
                     name=instance_name,
8584
                     relocate_from=relocate_from)
8585

    
8586
    ial.Run(iallocator_name)
8587

    
8588
    if not ial.success:
8589
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8590
                                 " %s" % (iallocator_name, ial.info),
8591
                                 errors.ECODE_NORES)
8592

    
8593
    if len(ial.result) != ial.required_nodes:
8594
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8595
                                 " of nodes (%s), required %s" %
8596
                                 (iallocator_name,
8597
                                  len(ial.result), ial.required_nodes),
8598
                                 errors.ECODE_FAULT)
8599

    
8600
    remote_node_name = ial.result[0]
8601

    
8602
    lu.LogInfo("Selected new secondary for instance '%s': %s",
8603
               instance_name, remote_node_name)
8604

    
8605
    return remote_node_name
8606

    
8607
  def _FindFaultyDisks(self, node_name):
8608
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8609
                                    node_name, True)
8610

    
8611
  def _CheckDisksActivated(self, instance):
8612
    """Checks if the instance disks are activated.
8613

8614
    @param instance: The instance to check disks
8615
    @return: True if they are activated, False otherwise
8616

8617
    """
8618
    nodes = instance.all_nodes
8619

    
8620
    for idx, dev in enumerate(instance.disks):
8621
      for node in nodes:
8622
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8623
        self.cfg.SetDiskID(dev, node)
8624

    
8625
        result = self.rpc.call_blockdev_find(node, dev)
8626

    
8627
        if result.offline:
8628
          continue
8629
        elif result.fail_msg or not result.payload:
8630
          return False
8631

    
8632
    return True
8633

    
8634

    
8635
  def CheckPrereq(self):
8636
    """Check prerequisites.
8637

8638
    This checks that the instance is in the cluster.
8639

8640
    """
8641
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8642
    assert instance is not None, \
8643
      "Cannot retrieve locked instance %s" % self.instance_name
8644

    
8645
    if instance.disk_template != constants.DT_DRBD8:
8646
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8647
                                 " instances", errors.ECODE_INVAL)
8648

    
8649
    if len(instance.secondary_nodes) != 1:
8650
      raise errors.OpPrereqError("The instance has a strange layout,"
8651
                                 " expected one secondary but found %d" %
8652
                                 len(instance.secondary_nodes),
8653
                                 errors.ECODE_FAULT)
8654

    
8655
    if not self.delay_iallocator:
8656
      self._CheckPrereq2()
8657

    
8658
  def _CheckPrereq2(self):
8659
    """Check prerequisites, second part.
8660

8661
    This function should always be part of CheckPrereq. It was separated and is
8662
    now called from Exec because during node evacuation iallocator was only
8663
    called with an unmodified cluster model, not taking planned changes into
8664
    account.
8665

8666
    """
8667
    instance = self.instance
8668
    secondary_node = instance.secondary_nodes[0]
8669

    
8670
    if self.iallocator_name is None:
8671
      remote_node = self.remote_node
8672
    else:
8673
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8674
                                       instance.name, instance.secondary_nodes)
8675

    
8676
    if remote_node is not None:
8677
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8678
      assert self.remote_node_info is not None, \
8679
        "Cannot retrieve locked node %s" % remote_node
8680
    else:
8681
      self.remote_node_info = None
8682

    
8683
    if remote_node == self.instance.primary_node:
8684
      raise errors.OpPrereqError("The specified node is the primary node of"
8685
                                 " the instance.", errors.ECODE_INVAL)
8686

    
8687
    if remote_node == secondary_node:
8688
      raise errors.OpPrereqError("The specified node is already the"
8689
                                 " secondary node of the instance.",
8690
                                 errors.ECODE_INVAL)
8691

    
8692
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8693
                                    constants.REPLACE_DISK_CHG):
8694
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
8695
                                 errors.ECODE_INVAL)
8696

    
8697
    if self.mode == constants.REPLACE_DISK_AUTO:
8698
      if not self._CheckDisksActivated(instance):
8699
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
8700
                                   " first" % self.instance_name,
8701
                                   errors.ECODE_STATE)
8702
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
8703
      faulty_secondary = self._FindFaultyDisks(secondary_node)
8704

    
8705
      if faulty_primary and faulty_secondary:
8706
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8707
                                   " one node and can not be repaired"
8708
                                   " automatically" % self.instance_name,
8709
                                   errors.ECODE_STATE)
8710

    
8711
      if faulty_primary:
8712
        self.disks = faulty_primary
8713
        self.target_node = instance.primary_node
8714
        self.other_node = secondary_node
8715
        check_nodes = [self.target_node, self.other_node]
8716
      elif faulty_secondary:
8717
        self.disks = faulty_secondary
8718
        self.target_node = secondary_node
8719
        self.other_node = instance.primary_node
8720
        check_nodes = [self.target_node, self.other_node]
8721
      else:
8722
        self.disks = []
8723
        check_nodes = []
8724

    
8725
    else:
8726
      # Non-automatic modes
8727
      if self.mode == constants.REPLACE_DISK_PRI:
8728
        self.target_node = instance.primary_node
8729
        self.other_node = secondary_node
8730
        check_nodes = [self.target_node, self.other_node]
8731

    
8732
      elif self.mode == constants.REPLACE_DISK_SEC:
8733
        self.target_node = secondary_node
8734
        self.other_node = instance.primary_node
8735
        check_nodes = [self.target_node, self.other_node]
8736

    
8737
      elif self.mode == constants.REPLACE_DISK_CHG:
8738
        self.new_node = remote_node
8739
        self.other_node = instance.primary_node
8740
        self.target_node = secondary_node
8741
        check_nodes = [self.new_node, self.other_node]
8742

    
8743
        _CheckNodeNotDrained(self.lu, remote_node)
8744
        _CheckNodeVmCapable(self.lu, remote_node)
8745

    
8746
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
8747
        assert old_node_info is not None
8748
        if old_node_info.offline and not self.early_release:
8749
          # doesn't make sense to delay the release
8750
          self.early_release = True
8751
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8752
                          " early-release mode", secondary_node)
8753

    
8754
      else:
8755
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8756
                                     self.mode)
8757

    
8758
      # If not specified all disks should be replaced
8759
      if not self.disks:
8760
        self.disks = range(len(self.instance.disks))
8761

    
8762
    for node in check_nodes:
8763
      _CheckNodeOnline(self.lu, node)
8764

    
8765
    # Check whether disks are valid
8766
    for disk_idx in self.disks:
8767
      instance.FindDisk(disk_idx)
8768

    
8769
    # Get secondary node IP addresses
8770
    node_2nd_ip = {}
8771

    
8772
    for node_name in [self.target_node, self.other_node, self.new_node]:
8773
      if node_name is not None:
8774
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8775

    
8776
    self.node_secondary_ip = node_2nd_ip
8777

    
8778
  def Exec(self, feedback_fn):
8779
    """Execute disk replacement.
8780

8781
    This dispatches the disk replacement to the appropriate handler.
8782

8783
    """
8784
    if self.delay_iallocator:
8785
      self._CheckPrereq2()
8786

    
8787
    if not self.disks:
8788
      feedback_fn("No disks need replacement")
8789
      return
8790

    
8791
    feedback_fn("Replacing disk(s) %s for %s" %
8792
                (utils.CommaJoin(self.disks), self.instance.name))
8793

    
8794
    activate_disks = (not self.instance.admin_up)
8795

    
8796
    # Activate the instance disks if we're replacing them on a down instance
8797
    if activate_disks:
8798
      _StartInstanceDisks(self.lu, self.instance, True)
8799

    
8800
    try:
8801
      # Should we replace the secondary node?
8802
      if self.new_node is not None:
8803
        fn = self._ExecDrbd8Secondary
8804
      else:
8805
        fn = self._ExecDrbd8DiskOnly
8806

    
8807
      return fn(feedback_fn)
8808

    
8809
    finally:
8810
      # Deactivate the instance disks if we're replacing them on a
8811
      # down instance
8812
      if activate_disks:
8813
        _SafeShutdownInstanceDisks(self.lu, self.instance)
8814

    
8815
  def _CheckVolumeGroup(self, nodes):
8816
    self.lu.LogInfo("Checking volume groups")
8817

    
8818
    vgname = self.cfg.GetVGName()
8819

    
8820
    # Make sure volume group exists on all involved nodes
8821
    results = self.rpc.call_vg_list(nodes)
8822
    if not results:
8823
      raise errors.OpExecError("Can't list volume groups on the nodes")
8824

    
8825
    for node in nodes:
8826
      res = results[node]
8827
      res.Raise("Error checking node %s" % node)
8828
      if vgname not in res.payload:
8829
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
8830
                                 (vgname, node))
8831

    
8832
  def _CheckDisksExistence(self, nodes):
8833
    # Check disk existence
8834
    for idx, dev in enumerate(self.instance.disks):
8835
      if idx not in self.disks:
8836
        continue
8837

    
8838
      for node in nodes:
8839
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8840
        self.cfg.SetDiskID(dev, node)
8841

    
8842
        result = self.rpc.call_blockdev_find(node, dev)
8843

    
8844
        msg = result.fail_msg
8845
        if msg or not result.payload:
8846
          if not msg:
8847
            msg = "disk not found"
8848
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8849
                                   (idx, node, msg))
8850

    
8851
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8852
    for idx, dev in enumerate(self.instance.disks):
8853
      if idx not in self.disks:
8854
        continue
8855

    
8856
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8857
                      (idx, node_name))
8858

    
8859
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8860
                                   ldisk=ldisk):
8861
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8862
                                 " replace disks for instance %s" %
8863
                                 (node_name, self.instance.name))
8864

    
8865
  def _CreateNewStorage(self, node_name):
8866
    vgname = self.cfg.GetVGName()
8867
    iv_names = {}
8868

    
8869
    for idx, dev in enumerate(self.instance.disks):
8870
      if idx not in self.disks:
8871
        continue
8872

    
8873
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8874

    
8875
      self.cfg.SetDiskID(dev, node_name)
8876

    
8877
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8878
      names = _GenerateUniqueNames(self.lu, lv_names)
8879

    
8880
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8881
                             logical_id=(vgname, names[0]))
8882
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8883
                             logical_id=(vgname, names[1]))
8884

    
8885
      new_lvs = [lv_data, lv_meta]
8886
      old_lvs = dev.children
8887
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8888

    
8889
      # we pass force_create=True to force the LVM creation
8890
      for new_lv in new_lvs:
8891
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8892
                        _GetInstanceInfoText(self.instance), False)
8893

    
8894
    return iv_names
8895

    
8896
  def _CheckDevices(self, node_name, iv_names):
8897
    for name, (dev, _, _) in iv_names.iteritems():
8898
      self.cfg.SetDiskID(dev, node_name)
8899

    
8900
      result = self.rpc.call_blockdev_find(node_name, dev)
8901

    
8902
      msg = result.fail_msg
8903
      if msg or not result.payload:
8904
        if not msg:
8905
          msg = "disk not found"
8906
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
8907
                                 (name, msg))
8908

    
8909
      if result.payload.is_degraded:
8910
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
8911

    
8912
  def _RemoveOldStorage(self, node_name, iv_names):
8913
    for name, (_, old_lvs, _) in iv_names.iteritems():
8914
      self.lu.LogInfo("Remove logical volumes for %s" % name)
8915

    
8916
      for lv in old_lvs:
8917
        self.cfg.SetDiskID(lv, node_name)
8918

    
8919
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8920
        if msg:
8921
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
8922
                             hint="remove unused LVs manually")
8923

    
8924
  def _ReleaseNodeLock(self, node_name):
8925
    """Releases the lock for a given node."""
8926
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8927

    
8928
  def _ExecDrbd8DiskOnly(self, feedback_fn):
8929
    """Replace a disk on the primary or secondary for DRBD 8.
8930

8931
    The algorithm for replace is quite complicated:
8932

8933
      1. for each disk to be replaced:
8934

8935
        1. create new LVs on the target node with unique names
8936
        1. detach old LVs from the drbd device
8937
        1. rename old LVs to name_replaced.<time_t>
8938
        1. rename new LVs to old LVs
8939
        1. attach the new LVs (with the old names now) to the drbd device
8940

8941
      1. wait for sync across all devices
8942

8943
      1. for each modified disk:
8944

8945
        1. remove old LVs (which have the name name_replaces.<time_t>)
8946

8947
    Failures are not very well handled.
8948

8949
    """
8950
    steps_total = 6
8951

    
8952
    # Step: check device activation
8953
    self.lu.LogStep(1, steps_total, "Check device existence")
8954
    self._CheckDisksExistence([self.other_node, self.target_node])
8955
    self._CheckVolumeGroup([self.target_node, self.other_node])
8956

    
8957
    # Step: check other node consistency
8958
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8959
    self._CheckDisksConsistency(self.other_node,
8960
                                self.other_node == self.instance.primary_node,
8961
                                False)
8962

    
8963
    # Step: create new storage
8964
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8965
    iv_names = self._CreateNewStorage(self.target_node)
8966

    
8967
    # Step: for each lv, detach+rename*2+attach
8968
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8969
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8970
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8971

    
8972
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8973
                                                     old_lvs)
8974
      result.Raise("Can't detach drbd from local storage on node"
8975
                   " %s for device %s" % (self.target_node, dev.iv_name))
8976
      #dev.children = []
8977
      #cfg.Update(instance)
8978

    
8979
      # ok, we created the new LVs, so now we know we have the needed
8980
      # storage; as such, we proceed on the target node to rename
8981
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8982
      # using the assumption that logical_id == physical_id (which in
8983
      # turn is the unique_id on that node)
8984

    
8985
      # FIXME(iustin): use a better name for the replaced LVs
8986
      temp_suffix = int(time.time())
8987
      ren_fn = lambda d, suff: (d.physical_id[0],
8988
                                d.physical_id[1] + "_replaced-%s" % suff)
8989

    
8990
      # Build the rename list based on what LVs exist on the node
8991
      rename_old_to_new = []
8992
      for to_ren in old_lvs:
8993
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8994
        if not result.fail_msg and result.payload:
8995
          # device exists
8996
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8997

    
8998
      self.lu.LogInfo("Renaming the old LVs on the target node")
8999
      result = self.rpc.call_blockdev_rename(self.target_node,
9000
                                             rename_old_to_new)
9001
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9002

    
9003
      # Now we rename the new LVs to the old LVs
9004
      self.lu.LogInfo("Renaming the new LVs on the target node")
9005
      rename_new_to_old = [(new, old.physical_id)
9006
                           for old, new in zip(old_lvs, new_lvs)]
9007
      result = self.rpc.call_blockdev_rename(self.target_node,
9008
                                             rename_new_to_old)
9009
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9010

    
9011
      for old, new in zip(old_lvs, new_lvs):
9012
        new.logical_id = old.logical_id
9013
        self.cfg.SetDiskID(new, self.target_node)
9014

    
9015
      for disk in old_lvs:
9016
        disk.logical_id = ren_fn(disk, temp_suffix)
9017
        self.cfg.SetDiskID(disk, self.target_node)
9018

    
9019
      # Now that the new lvs have the old name, we can add them to the device
9020
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9021
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9022
                                                  new_lvs)
9023
      msg = result.fail_msg
9024
      if msg:
9025
        for new_lv in new_lvs:
9026
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9027
                                               new_lv).fail_msg
9028
          if msg2:
9029
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9030
                               hint=("cleanup manually the unused logical"
9031
                                     "volumes"))
9032
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9033

    
9034
      dev.children = new_lvs
9035

    
9036
      self.cfg.Update(self.instance, feedback_fn)
9037

    
9038
    cstep = 5
9039
    if self.early_release:
9040
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9041
      cstep += 1
9042
      self._RemoveOldStorage(self.target_node, iv_names)
9043
      # WARNING: we release both node locks here, do not do other RPCs
9044
      # than WaitForSync to the primary node
9045
      self._ReleaseNodeLock([self.target_node, self.other_node])
9046

    
9047
    # Wait for sync
9048
    # This can fail as the old devices are degraded and _WaitForSync
9049
    # does a combined result over all disks, so we don't check its return value
9050
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9051
    cstep += 1
9052
    _WaitForSync(self.lu, self.instance)
9053

    
9054
    # Check all devices manually
9055
    self._CheckDevices(self.instance.primary_node, iv_names)
9056

    
9057
    # Step: remove old storage
9058
    if not self.early_release:
9059
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9060
      cstep += 1
9061
      self._RemoveOldStorage(self.target_node, iv_names)
9062

    
9063
  def _ExecDrbd8Secondary(self, feedback_fn):
9064
    """Replace the secondary node for DRBD 8.
9065

9066
    The algorithm for replace is quite complicated:
9067
      - for all disks of the instance:
9068
        - create new LVs on the new node with same names
9069
        - shutdown the drbd device on the old secondary
9070
        - disconnect the drbd network on the primary
9071
        - create the drbd device on the new secondary
9072
        - network attach the drbd on the primary, using an artifice:
9073
          the drbd code for Attach() will connect to the network if it
9074
          finds a device which is connected to the good local disks but
9075
          not network enabled
9076
      - wait for sync across all devices
9077
      - remove all disks from the old secondary
9078

9079
    Failures are not very well handled.
9080

9081
    """
9082
    steps_total = 6
9083

    
9084
    # Step: check device activation
9085
    self.lu.LogStep(1, steps_total, "Check device existence")
9086
    self._CheckDisksExistence([self.instance.primary_node])
9087
    self._CheckVolumeGroup([self.instance.primary_node])
9088

    
9089
    # Step: check other node consistency
9090
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9091
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9092

    
9093
    # Step: create new storage
9094
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9095
    for idx, dev in enumerate(self.instance.disks):
9096
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9097
                      (self.new_node, idx))
9098
      # we pass force_create=True to force LVM creation
9099
      for new_lv in dev.children:
9100
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9101
                        _GetInstanceInfoText(self.instance), False)
9102

    
9103
    # Step 4: dbrd minors and drbd setups changes
9104
    # after this, we must manually remove the drbd minors on both the
9105
    # error and the success paths
9106
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9107
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9108
                                         for dev in self.instance.disks],
9109
                                        self.instance.name)
9110
    logging.debug("Allocated minors %r", minors)
9111

    
9112
    iv_names = {}
9113
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9114
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9115
                      (self.new_node, idx))
9116
      # create new devices on new_node; note that we create two IDs:
9117
      # one without port, so the drbd will be activated without
9118
      # networking information on the new node at this stage, and one
9119
      # with network, for the latter activation in step 4
9120
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9121
      if self.instance.primary_node == o_node1:
9122
        p_minor = o_minor1
9123
      else:
9124
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9125
        p_minor = o_minor2
9126

    
9127
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9128
                      p_minor, new_minor, o_secret)
9129
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9130
                    p_minor, new_minor, o_secret)
9131

    
9132
      iv_names[idx] = (dev, dev.children, new_net_id)
9133
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9134
                    new_net_id)
9135
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9136
                              logical_id=new_alone_id,
9137
                              children=dev.children,
9138
                              size=dev.size)
9139
      try:
9140
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9141
                              _GetInstanceInfoText(self.instance), False)
9142
      except errors.GenericError:
9143
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9144
        raise
9145

    
9146
    # We have new devices, shutdown the drbd on the old secondary
9147
    for idx, dev in enumerate(self.instance.disks):
9148
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9149
      self.cfg.SetDiskID(dev, self.target_node)
9150
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9151
      if msg:
9152
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9153
                           "node: %s" % (idx, msg),
9154
                           hint=("Please cleanup this device manually as"
9155
                                 " soon as possible"))
9156

    
9157
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9158
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9159
                                               self.node_secondary_ip,
9160
                                               self.instance.disks)\
9161
                                              [self.instance.primary_node]
9162

    
9163
    msg = result.fail_msg
9164
    if msg:
9165
      # detaches didn't succeed (unlikely)
9166
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9167
      raise errors.OpExecError("Can't detach the disks from the network on"
9168
                               " old node: %s" % (msg,))
9169

    
9170
    # if we managed to detach at least one, we update all the disks of
9171
    # the instance to point to the new secondary
9172
    self.lu.LogInfo("Updating instance configuration")
9173
    for dev, _, new_logical_id in iv_names.itervalues():
9174
      dev.logical_id = new_logical_id
9175
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9176

    
9177
    self.cfg.Update(self.instance, feedback_fn)
9178

    
9179
    # and now perform the drbd attach
9180
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9181
                    " (standalone => connected)")
9182
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9183
                                            self.new_node],
9184
                                           self.node_secondary_ip,
9185
                                           self.instance.disks,
9186
                                           self.instance.name,
9187
                                           False)
9188
    for to_node, to_result in result.items():
9189
      msg = to_result.fail_msg
9190
      if msg:
9191
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9192
                           to_node, msg,
9193
                           hint=("please do a gnt-instance info to see the"
9194
                                 " status of disks"))
9195
    cstep = 5
9196
    if self.early_release:
9197
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9198
      cstep += 1
9199
      self._RemoveOldStorage(self.target_node, iv_names)
9200
      # WARNING: we release all node locks here, do not do other RPCs
9201
      # than WaitForSync to the primary node
9202
      self._ReleaseNodeLock([self.instance.primary_node,
9203
                             self.target_node,
9204
                             self.new_node])
9205

    
9206
    # Wait for sync
9207
    # This can fail as the old devices are degraded and _WaitForSync
9208
    # does a combined result over all disks, so we don't check its return value
9209
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9210
    cstep += 1
9211
    _WaitForSync(self.lu, self.instance)
9212

    
9213
    # Check all devices manually
9214
    self._CheckDevices(self.instance.primary_node, iv_names)
9215

    
9216
    # Step: remove old storage
9217
    if not self.early_release:
9218
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9219
      self._RemoveOldStorage(self.target_node, iv_names)
9220

    
9221

    
9222
class LURepairNodeStorage(NoHooksLU):
9223
  """Repairs the volume group on a node.
9224

9225
  """
9226
  REQ_BGL = False
9227

    
9228
  def CheckArguments(self):
9229
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9230

    
9231
    storage_type = self.op.storage_type
9232

    
9233
    if (constants.SO_FIX_CONSISTENCY not in
9234
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9235
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
9236
                                 " repaired" % storage_type,
9237
                                 errors.ECODE_INVAL)
9238

    
9239
  def ExpandNames(self):
9240
    self.needed_locks = {
9241
      locking.LEVEL_NODE: [self.op.node_name],
9242
      }
9243

    
9244
  def _CheckFaultyDisks(self, instance, node_name):
9245
    """Ensure faulty disks abort the opcode or at least warn."""
9246
    try:
9247
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9248
                                  node_name, True):
9249
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9250
                                   " node '%s'" % (instance.name, node_name),
9251
                                   errors.ECODE_STATE)
9252
    except errors.OpPrereqError, err:
9253
      if self.op.ignore_consistency:
9254
        self.proc.LogWarning(str(err.args[0]))
9255
      else:
9256
        raise
9257

    
9258
  def CheckPrereq(self):
9259
    """Check prerequisites.
9260

9261
    """
9262
    # Check whether any instance on this node has faulty disks
9263
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9264
      if not inst.admin_up:
9265
        continue
9266
      check_nodes = set(inst.all_nodes)
9267
      check_nodes.discard(self.op.node_name)
9268
      for inst_node_name in check_nodes:
9269
        self._CheckFaultyDisks(inst, inst_node_name)
9270

    
9271
  def Exec(self, feedback_fn):
9272
    feedback_fn("Repairing storage unit '%s' on %s ..." %
9273
                (self.op.name, self.op.node_name))
9274

    
9275
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9276
    result = self.rpc.call_storage_execute(self.op.node_name,
9277
                                           self.op.storage_type, st_args,
9278
                                           self.op.name,
9279
                                           constants.SO_FIX_CONSISTENCY)
9280
    result.Raise("Failed to repair storage unit '%s' on %s" %
9281
                 (self.op.name, self.op.node_name))
9282

    
9283

    
9284
class LUNodeEvacStrategy(NoHooksLU):
9285
  """Computes the node evacuation strategy.
9286

9287
  """
9288
  REQ_BGL = False
9289

    
9290
  def CheckArguments(self):
9291
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9292

    
9293
  def ExpandNames(self):
9294
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9295
    self.needed_locks = locks = {}
9296
    if self.op.remote_node is None:
9297
      locks[locking.LEVEL_NODE] = locking.ALL_SET
9298
    else:
9299
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9300
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9301

    
9302
  def Exec(self, feedback_fn):
9303
    if self.op.remote_node is not None:
9304
      instances = []
9305
      for node in self.op.nodes:
9306
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9307
      result = []
9308
      for i in instances:
9309
        if i.primary_node == self.op.remote_node:
9310
          raise errors.OpPrereqError("Node %s is the primary node of"
9311
                                     " instance %s, cannot use it as"
9312
                                     " secondary" %
9313
                                     (self.op.remote_node, i.name),
9314
                                     errors.ECODE_INVAL)
9315
        result.append([i.name, self.op.remote_node])
9316
    else:
9317
      ial = IAllocator(self.cfg, self.rpc,
9318
                       mode=constants.IALLOCATOR_MODE_MEVAC,
9319
                       evac_nodes=self.op.nodes)
9320
      ial.Run(self.op.iallocator, validate=True)
9321
      if not ial.success:
9322
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9323
                                 errors.ECODE_NORES)
9324
      result = ial.result
9325
    return result
9326

    
9327

    
9328
class LUInstanceGrowDisk(LogicalUnit):
9329
  """Grow a disk of an instance.
9330

9331
  """
9332
  HPATH = "disk-grow"
9333
  HTYPE = constants.HTYPE_INSTANCE
9334
  REQ_BGL = False
9335

    
9336
  def ExpandNames(self):
9337
    self._ExpandAndLockInstance()
9338
    self.needed_locks[locking.LEVEL_NODE] = []
9339
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9340

    
9341
  def DeclareLocks(self, level):
9342
    if level == locking.LEVEL_NODE:
9343
      self._LockInstancesNodes()
9344

    
9345
  def BuildHooksEnv(self):
9346
    """Build hooks env.
9347

9348
    This runs on the master, the primary and all the secondaries.
9349

9350
    """
9351
    env = {
9352
      "DISK": self.op.disk,
9353
      "AMOUNT": self.op.amount,
9354
      }
9355
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9356
    return env
9357

    
9358
  def BuildHooksNodes(self):
9359
    """Build hooks nodes.
9360

9361
    """
9362
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9363
    return (nl, nl)
9364

    
9365
  def CheckPrereq(self):
9366
    """Check prerequisites.
9367

9368
    This checks that the instance is in the cluster.
9369

9370
    """
9371
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9372
    assert instance is not None, \
9373
      "Cannot retrieve locked instance %s" % self.op.instance_name
9374
    nodenames = list(instance.all_nodes)
9375
    for node in nodenames:
9376
      _CheckNodeOnline(self, node)
9377

    
9378
    self.instance = instance
9379

    
9380
    if instance.disk_template not in constants.DTS_GROWABLE:
9381
      raise errors.OpPrereqError("Instance's disk layout does not support"
9382
                                 " growing.", errors.ECODE_INVAL)
9383

    
9384
    self.disk = instance.FindDisk(self.op.disk)
9385

    
9386
    if instance.disk_template not in (constants.DT_FILE,
9387
                                      constants.DT_SHARED_FILE):
9388
      # TODO: check the free disk space for file, when that feature will be
9389
      # supported
9390
      _CheckNodesFreeDiskPerVG(self, nodenames,
9391
                               self.disk.ComputeGrowth(self.op.amount))
9392

    
9393
  def Exec(self, feedback_fn):
9394
    """Execute disk grow.
9395

9396
    """
9397
    instance = self.instance
9398
    disk = self.disk
9399

    
9400
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9401
    if not disks_ok:
9402
      raise errors.OpExecError("Cannot activate block device to grow")
9403

    
9404
    for node in instance.all_nodes:
9405
      self.cfg.SetDiskID(disk, node)
9406
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9407
      result.Raise("Grow request failed to node %s" % node)
9408

    
9409
      # TODO: Rewrite code to work properly
9410
      # DRBD goes into sync mode for a short amount of time after executing the
9411
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9412
      # calling "resize" in sync mode fails. Sleeping for a short amount of
9413
      # time is a work-around.
9414
      time.sleep(5)
9415

    
9416
    disk.RecordGrow(self.op.amount)
9417
    self.cfg.Update(instance, feedback_fn)
9418
    if self.op.wait_for_sync:
9419
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
9420
      if disk_abort:
9421
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
9422
                             " status.\nPlease check the instance.")
9423
      if not instance.admin_up:
9424
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9425
    elif not instance.admin_up:
9426
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
9427
                           " not supposed to be running because no wait for"
9428
                           " sync mode was requested.")
9429

    
9430

    
9431
class LUInstanceQueryData(NoHooksLU):
9432
  """Query runtime instance data.
9433

9434
  """
9435
  REQ_BGL = False
9436

    
9437
  def ExpandNames(self):
9438
    self.needed_locks = {}
9439

    
9440
    # Use locking if requested or when non-static information is wanted
9441
    if not (self.op.static or self.op.use_locking):
9442
      self.LogWarning("Non-static data requested, locks need to be acquired")
9443
      self.op.use_locking = True
9444

    
9445
    if self.op.instances or not self.op.use_locking:
9446
      # Expand instance names right here
9447
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
9448
    else:
9449
      # Will use acquired locks
9450
      self.wanted_names = None
9451

    
9452
    if self.op.use_locking:
9453
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9454

    
9455
      if self.wanted_names is None:
9456
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9457
      else:
9458
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9459

    
9460
      self.needed_locks[locking.LEVEL_NODE] = []
9461
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9462
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9463

    
9464
  def DeclareLocks(self, level):
9465
    if self.op.use_locking and level == locking.LEVEL_NODE:
9466
      self._LockInstancesNodes()
9467

    
9468
  def CheckPrereq(self):
9469
    """Check prerequisites.
9470

9471
    This only checks the optional instance list against the existing names.
9472

9473
    """
9474
    if self.wanted_names is None:
9475
      assert self.op.use_locking, "Locking was not used"
9476
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9477

    
9478
    self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9479
                             for name in self.wanted_names]
9480

    
9481
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
9482
    """Returns the status of a block device
9483

9484
    """
9485
    if self.op.static or not node:
9486
      return None
9487

    
9488
    self.cfg.SetDiskID(dev, node)
9489

    
9490
    result = self.rpc.call_blockdev_find(node, dev)
9491
    if result.offline:
9492
      return None
9493

    
9494
    result.Raise("Can't compute disk status for %s" % instance_name)
9495

    
9496
    status = result.payload
9497
    if status is None:
9498
      return None
9499

    
9500
    return (status.dev_path, status.major, status.minor,
9501
            status.sync_percent, status.estimated_time,
9502
            status.is_degraded, status.ldisk_status)
9503

    
9504
  def _ComputeDiskStatus(self, instance, snode, dev):
9505
    """Compute block device status.
9506

9507
    """
9508
    if dev.dev_type in constants.LDS_DRBD:
9509
      # we change the snode then (otherwise we use the one passed in)
9510
      if dev.logical_id[0] == instance.primary_node:
9511
        snode = dev.logical_id[1]
9512
      else:
9513
        snode = dev.logical_id[0]
9514

    
9515
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9516
                                              instance.name, dev)
9517
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9518

    
9519
    if dev.children:
9520
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
9521
                      for child in dev.children]
9522
    else:
9523
      dev_children = []
9524

    
9525
    return {
9526
      "iv_name": dev.iv_name,
9527
      "dev_type": dev.dev_type,
9528
      "logical_id": dev.logical_id,
9529
      "physical_id": dev.physical_id,
9530
      "pstatus": dev_pstatus,
9531
      "sstatus": dev_sstatus,
9532
      "children": dev_children,
9533
      "mode": dev.mode,
9534
      "size": dev.size,
9535
      }
9536

    
9537
  def Exec(self, feedback_fn):
9538
    """Gather and return data"""
9539
    result = {}
9540

    
9541
    cluster = self.cfg.GetClusterInfo()
9542

    
9543
    for instance in self.wanted_instances:
9544
      if not self.op.static:
9545
        remote_info = self.rpc.call_instance_info(instance.primary_node,
9546
                                                  instance.name,
9547
                                                  instance.hypervisor)
9548
        remote_info.Raise("Error checking node %s" % instance.primary_node)
9549
        remote_info = remote_info.payload
9550
        if remote_info and "state" in remote_info:
9551
          remote_state = "up"
9552
        else:
9553
          remote_state = "down"
9554
      else:
9555
        remote_state = None
9556
      if instance.admin_up:
9557
        config_state = "up"
9558
      else:
9559
        config_state = "down"
9560

    
9561
      disks = [self._ComputeDiskStatus(instance, None, device)
9562
               for device in instance.disks]
9563

    
9564
      result[instance.name] = {
9565
        "name": instance.name,
9566
        "config_state": config_state,
9567
        "run_state": remote_state,
9568
        "pnode": instance.primary_node,
9569
        "snodes": instance.secondary_nodes,
9570
        "os": instance.os,
9571
        # this happens to be the same format used for hooks
9572
        "nics": _NICListToTuple(self, instance.nics),
9573
        "disk_template": instance.disk_template,
9574
        "disks": disks,
9575
        "hypervisor": instance.hypervisor,
9576
        "network_port": instance.network_port,
9577
        "hv_instance": instance.hvparams,
9578
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
9579
        "be_instance": instance.beparams,
9580
        "be_actual": cluster.FillBE(instance),
9581
        "os_instance": instance.osparams,
9582
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9583
        "serial_no": instance.serial_no,
9584
        "mtime": instance.mtime,
9585
        "ctime": instance.ctime,
9586
        "uuid": instance.uuid,
9587
        }
9588

    
9589
    return result
9590

    
9591

    
9592
class LUInstanceSetParams(LogicalUnit):
9593
  """Modifies an instances's parameters.
9594

9595
  """
9596
  HPATH = "instance-modify"
9597
  HTYPE = constants.HTYPE_INSTANCE
9598
  REQ_BGL = False
9599

    
9600
  def CheckArguments(self):
9601
    if not (self.op.nics or self.op.disks or self.op.disk_template or
9602
            self.op.hvparams or self.op.beparams or self.op.os_name):
9603
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9604

    
9605
    if self.op.hvparams:
9606
      _CheckGlobalHvParams(self.op.hvparams)
9607

    
9608
    # Disk validation
9609
    disk_addremove = 0
9610
    for disk_op, disk_dict in self.op.disks:
9611
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9612
      if disk_op == constants.DDM_REMOVE:
9613
        disk_addremove += 1
9614
        continue
9615
      elif disk_op == constants.DDM_ADD:
9616
        disk_addremove += 1
9617
      else:
9618
        if not isinstance(disk_op, int):
9619
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9620
        if not isinstance(disk_dict, dict):
9621
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9622
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9623

    
9624
      if disk_op == constants.DDM_ADD:
9625
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9626
        if mode not in constants.DISK_ACCESS_SET:
9627
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9628
                                     errors.ECODE_INVAL)
9629
        size = disk_dict.get(constants.IDISK_SIZE, None)
9630
        if size is None:
9631
          raise errors.OpPrereqError("Required disk parameter size missing",
9632
                                     errors.ECODE_INVAL)
9633
        try:
9634
          size = int(size)
9635
        except (TypeError, ValueError), err:
9636
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9637
                                     str(err), errors.ECODE_INVAL)
9638
        disk_dict[constants.IDISK_SIZE] = size
9639
      else:
9640
        # modification of disk
9641
        if constants.IDISK_SIZE in disk_dict:
9642
          raise errors.OpPrereqError("Disk size change not possible, use"
9643
                                     " grow-disk", errors.ECODE_INVAL)
9644

    
9645
    if disk_addremove > 1:
9646
      raise errors.OpPrereqError("Only one disk add or remove operation"
9647
                                 " supported at a time", errors.ECODE_INVAL)
9648

    
9649
    if self.op.disks and self.op.disk_template is not None:
9650
      raise errors.OpPrereqError("Disk template conversion and other disk"
9651
                                 " changes not supported at the same time",
9652
                                 errors.ECODE_INVAL)
9653

    
9654
    if (self.op.disk_template and
9655
        self.op.disk_template in constants.DTS_INT_MIRROR and
9656
        self.op.remote_node is None):
9657
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
9658
                                 " one requires specifying a secondary node",
9659
                                 errors.ECODE_INVAL)
9660

    
9661
    # NIC validation
9662
    nic_addremove = 0
9663
    for nic_op, nic_dict in self.op.nics:
9664
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9665
      if nic_op == constants.DDM_REMOVE:
9666
        nic_addremove += 1
9667
        continue
9668
      elif nic_op == constants.DDM_ADD:
9669
        nic_addremove += 1
9670
      else:
9671
        if not isinstance(nic_op, int):
9672
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9673
        if not isinstance(nic_dict, dict):
9674
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9675
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9676

    
9677
      # nic_dict should be a dict
9678
      nic_ip = nic_dict.get(constants.INIC_IP, None)
9679
      if nic_ip is not None:
9680
        if nic_ip.lower() == constants.VALUE_NONE:
9681
          nic_dict[constants.INIC_IP] = None
9682
        else:
9683
          if not netutils.IPAddress.IsValid(nic_ip):
9684
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9685
                                       errors.ECODE_INVAL)
9686

    
9687
      nic_bridge = nic_dict.get('bridge', None)
9688
      nic_link = nic_dict.get(constants.INIC_LINK, None)
9689
      if nic_bridge and nic_link:
9690
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9691
                                   " at the same time", errors.ECODE_INVAL)
9692
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9693
        nic_dict['bridge'] = None
9694
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9695
        nic_dict[constants.INIC_LINK] = None
9696

    
9697
      if nic_op == constants.DDM_ADD:
9698
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
9699
        if nic_mac is None:
9700
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
9701

    
9702
      if constants.INIC_MAC in nic_dict:
9703
        nic_mac = nic_dict[constants.INIC_MAC]
9704
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9705
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9706

    
9707
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9708
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9709
                                     " modifying an existing nic",
9710
                                     errors.ECODE_INVAL)
9711

    
9712
    if nic_addremove > 1:
9713
      raise errors.OpPrereqError("Only one NIC add or remove operation"
9714
                                 " supported at a time", errors.ECODE_INVAL)
9715

    
9716
  def ExpandNames(self):
9717
    self._ExpandAndLockInstance()
9718
    self.needed_locks[locking.LEVEL_NODE] = []
9719
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9720

    
9721
  def DeclareLocks(self, level):
9722
    if level == locking.LEVEL_NODE:
9723
      self._LockInstancesNodes()
9724
      if self.op.disk_template and self.op.remote_node:
9725
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9726
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9727

    
9728
  def BuildHooksEnv(self):
9729
    """Build hooks env.
9730

9731
    This runs on the master, primary and secondaries.
9732

9733
    """
9734
    args = dict()
9735
    if constants.BE_MEMORY in self.be_new:
9736
      args['memory'] = self.be_new[constants.BE_MEMORY]
9737
    if constants.BE_VCPUS in self.be_new:
9738
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
9739
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9740
    # information at all.
9741
    if self.op.nics:
9742
      args['nics'] = []
9743
      nic_override = dict(self.op.nics)
9744
      for idx, nic in enumerate(self.instance.nics):
9745
        if idx in nic_override:
9746
          this_nic_override = nic_override[idx]
9747
        else:
9748
          this_nic_override = {}
9749
        if constants.INIC_IP in this_nic_override:
9750
          ip = this_nic_override[constants.INIC_IP]
9751
        else:
9752
          ip = nic.ip
9753
        if constants.INIC_MAC in this_nic_override:
9754
          mac = this_nic_override[constants.INIC_MAC]
9755
        else:
9756
          mac = nic.mac
9757
        if idx in self.nic_pnew:
9758
          nicparams = self.nic_pnew[idx]
9759
        else:
9760
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9761
        mode = nicparams[constants.NIC_MODE]
9762
        link = nicparams[constants.NIC_LINK]
9763
        args['nics'].append((ip, mac, mode, link))
9764
      if constants.DDM_ADD in nic_override:
9765
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
9766
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
9767
        nicparams = self.nic_pnew[constants.DDM_ADD]
9768
        mode = nicparams[constants.NIC_MODE]
9769
        link = nicparams[constants.NIC_LINK]
9770
        args['nics'].append((ip, mac, mode, link))
9771
      elif constants.DDM_REMOVE in nic_override:
9772
        del args['nics'][-1]
9773

    
9774
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9775
    if self.op.disk_template:
9776
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9777

    
9778
    return env
9779

    
9780
  def BuildHooksNodes(self):
9781
    """Build hooks nodes.
9782

9783
    """
9784
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9785
    return (nl, nl)
9786

    
9787
  def CheckPrereq(self):
9788
    """Check prerequisites.
9789

9790
    This only checks the instance list against the existing names.
9791

9792
    """
9793
    # checking the new params on the primary/secondary nodes
9794

    
9795
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9796
    cluster = self.cluster = self.cfg.GetClusterInfo()
9797
    assert self.instance is not None, \
9798
      "Cannot retrieve locked instance %s" % self.op.instance_name
9799
    pnode = instance.primary_node
9800
    nodelist = list(instance.all_nodes)
9801

    
9802
    # OS change
9803
    if self.op.os_name and not self.op.force:
9804
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9805
                      self.op.force_variant)
9806
      instance_os = self.op.os_name
9807
    else:
9808
      instance_os = instance.os
9809

    
9810
    if self.op.disk_template:
9811
      if instance.disk_template == self.op.disk_template:
9812
        raise errors.OpPrereqError("Instance already has disk template %s" %
9813
                                   instance.disk_template, errors.ECODE_INVAL)
9814

    
9815
      if (instance.disk_template,
9816
          self.op.disk_template) not in self._DISK_CONVERSIONS:
9817
        raise errors.OpPrereqError("Unsupported disk template conversion from"
9818
                                   " %s to %s" % (instance.disk_template,
9819
                                                  self.op.disk_template),
9820
                                   errors.ECODE_INVAL)
9821
      _CheckInstanceDown(self, instance, "cannot change disk template")
9822
      if self.op.disk_template in constants.DTS_INT_MIRROR:
9823
        if self.op.remote_node == pnode:
9824
          raise errors.OpPrereqError("Given new secondary node %s is the same"
9825
                                     " as the primary node of the instance" %
9826
                                     self.op.remote_node, errors.ECODE_STATE)
9827
        _CheckNodeOnline(self, self.op.remote_node)
9828
        _CheckNodeNotDrained(self, self.op.remote_node)
9829
        # FIXME: here we assume that the old instance type is DT_PLAIN
9830
        assert instance.disk_template == constants.DT_PLAIN
9831
        disks = [{constants.IDISK_SIZE: d.size,
9832
                  constants.IDISK_VG: d.logical_id[0]}
9833
                 for d in instance.disks]
9834
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9835
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9836

    
9837
    # hvparams processing
9838
    if self.op.hvparams:
9839
      hv_type = instance.hypervisor
9840
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9841
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9842
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9843

    
9844
      # local check
9845
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9846
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9847
      self.hv_new = hv_new # the new actual values
9848
      self.hv_inst = i_hvdict # the new dict (without defaults)
9849
    else:
9850
      self.hv_new = self.hv_inst = {}
9851

    
9852
    # beparams processing
9853
    if self.op.beparams:
9854
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9855
                                   use_none=True)
9856
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9857
      be_new = cluster.SimpleFillBE(i_bedict)
9858
      self.be_new = be_new # the new actual values
9859
      self.be_inst = i_bedict # the new dict (without defaults)
9860
    else:
9861
      self.be_new = self.be_inst = {}
9862

    
9863
    # osparams processing
9864
    if self.op.osparams:
9865
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9866
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9867
      self.os_inst = i_osdict # the new dict (without defaults)
9868
    else:
9869
      self.os_inst = {}
9870

    
9871
    self.warn = []
9872

    
9873
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9874
      mem_check_list = [pnode]
9875
      if be_new[constants.BE_AUTO_BALANCE]:
9876
        # either we changed auto_balance to yes or it was from before
9877
        mem_check_list.extend(instance.secondary_nodes)
9878
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9879
                                                  instance.hypervisor)
9880
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9881
                                         instance.hypervisor)
9882
      pninfo = nodeinfo[pnode]
9883
      msg = pninfo.fail_msg
9884
      if msg:
9885
        # Assume the primary node is unreachable and go ahead
9886
        self.warn.append("Can't get info from primary node %s: %s" %
9887
                         (pnode,  msg))
9888
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
9889
        self.warn.append("Node data from primary node %s doesn't contain"
9890
                         " free memory information" % pnode)
9891
      elif instance_info.fail_msg:
9892
        self.warn.append("Can't get instance runtime information: %s" %
9893
                        instance_info.fail_msg)
9894
      else:
9895
        if instance_info.payload:
9896
          current_mem = int(instance_info.payload['memory'])
9897
        else:
9898
          # Assume instance not running
9899
          # (there is a slight race condition here, but it's not very probable,
9900
          # and we have no other way to check)
9901
          current_mem = 0
9902
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9903
                    pninfo.payload['memory_free'])
9904
        if miss_mem > 0:
9905
          raise errors.OpPrereqError("This change will prevent the instance"
9906
                                     " from starting, due to %d MB of memory"
9907
                                     " missing on its primary node" % miss_mem,
9908
                                     errors.ECODE_NORES)
9909

    
9910
      if be_new[constants.BE_AUTO_BALANCE]:
9911
        for node, nres in nodeinfo.items():
9912
          if node not in instance.secondary_nodes:
9913
            continue
9914
          msg = nres.fail_msg
9915
          if msg:
9916
            self.warn.append("Can't get info from secondary node %s: %s" %
9917
                             (node, msg))
9918
          elif not isinstance(nres.payload.get('memory_free', None), int):
9919
            self.warn.append("Secondary node %s didn't return free"
9920
                             " memory information" % node)
9921
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9922
            self.warn.append("Not enough memory to failover instance to"
9923
                             " secondary node %s" % node)
9924

    
9925
    # NIC processing
9926
    self.nic_pnew = {}
9927
    self.nic_pinst = {}
9928
    for nic_op, nic_dict in self.op.nics:
9929
      if nic_op == constants.DDM_REMOVE:
9930
        if not instance.nics:
9931
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9932
                                     errors.ECODE_INVAL)
9933
        continue
9934
      if nic_op != constants.DDM_ADD:
9935
        # an existing nic
9936
        if not instance.nics:
9937
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9938
                                     " no NICs" % nic_op,
9939
                                     errors.ECODE_INVAL)
9940
        if nic_op < 0 or nic_op >= len(instance.nics):
9941
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9942
                                     " are 0 to %d" %
9943
                                     (nic_op, len(instance.nics) - 1),
9944
                                     errors.ECODE_INVAL)
9945
        old_nic_params = instance.nics[nic_op].nicparams
9946
        old_nic_ip = instance.nics[nic_op].ip
9947
      else:
9948
        old_nic_params = {}
9949
        old_nic_ip = None
9950

    
9951
      update_params_dict = dict([(key, nic_dict[key])
9952
                                 for key in constants.NICS_PARAMETERS
9953
                                 if key in nic_dict])
9954

    
9955
      if 'bridge' in nic_dict:
9956
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9957

    
9958
      new_nic_params = _GetUpdatedParams(old_nic_params,
9959
                                         update_params_dict)
9960
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9961
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9962
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9963
      self.nic_pinst[nic_op] = new_nic_params
9964
      self.nic_pnew[nic_op] = new_filled_nic_params
9965
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9966

    
9967
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
9968
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9969
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9970
        if msg:
9971
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9972
          if self.op.force:
9973
            self.warn.append(msg)
9974
          else:
9975
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9976
      if new_nic_mode == constants.NIC_MODE_ROUTED:
9977
        if constants.INIC_IP in nic_dict:
9978
          nic_ip = nic_dict[constants.INIC_IP]
9979
        else:
9980
          nic_ip = old_nic_ip
9981
        if nic_ip is None:
9982
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9983
                                     ' on a routed nic', errors.ECODE_INVAL)
9984
      if constants.INIC_MAC in nic_dict:
9985
        nic_mac = nic_dict[constants.INIC_MAC]
9986
        if nic_mac is None:
9987
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9988
                                     errors.ECODE_INVAL)
9989
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9990
          # otherwise generate the mac
9991
          nic_dict[constants.INIC_MAC] = \
9992
            self.cfg.GenerateMAC(self.proc.GetECId())
9993
        else:
9994
          # or validate/reserve the current one
9995
          try:
9996
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9997
          except errors.ReservationError:
9998
            raise errors.OpPrereqError("MAC address %s already in use"
9999
                                       " in cluster" % nic_mac,
10000
                                       errors.ECODE_NOTUNIQUE)
10001

    
10002
    # DISK processing
10003
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10004
      raise errors.OpPrereqError("Disk operations not supported for"
10005
                                 " diskless instances",
10006
                                 errors.ECODE_INVAL)
10007
    for disk_op, _ in self.op.disks:
10008
      if disk_op == constants.DDM_REMOVE:
10009
        if len(instance.disks) == 1:
10010
          raise errors.OpPrereqError("Cannot remove the last disk of"
10011
                                     " an instance", errors.ECODE_INVAL)
10012
        _CheckInstanceDown(self, instance, "cannot remove disks")
10013

    
10014
      if (disk_op == constants.DDM_ADD and
10015
          len(instance.disks) >= constants.MAX_DISKS):
10016
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10017
                                   " add more" % constants.MAX_DISKS,
10018
                                   errors.ECODE_STATE)
10019
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10020
        # an existing disk
10021
        if disk_op < 0 or disk_op >= len(instance.disks):
10022
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
10023
                                     " are 0 to %d" %
10024
                                     (disk_op, len(instance.disks)),
10025
                                     errors.ECODE_INVAL)
10026

    
10027
    return
10028

    
10029
  def _ConvertPlainToDrbd(self, feedback_fn):
10030
    """Converts an instance from plain to drbd.
10031

10032
    """
10033
    feedback_fn("Converting template to drbd")
10034
    instance = self.instance
10035
    pnode = instance.primary_node
10036
    snode = self.op.remote_node
10037

    
10038
    # create a fake disk info for _GenerateDiskTemplate
10039
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode}
10040
                 for d in instance.disks]
10041
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10042
                                      instance.name, pnode, [snode],
10043
                                      disk_info, None, None, 0, feedback_fn)
10044
    info = _GetInstanceInfoText(instance)
10045
    feedback_fn("Creating aditional volumes...")
10046
    # first, create the missing data and meta devices
10047
    for disk in new_disks:
10048
      # unfortunately this is... not too nice
10049
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10050
                            info, True)
10051
      for child in disk.children:
10052
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
10053
    # at this stage, all new LVs have been created, we can rename the
10054
    # old ones
10055
    feedback_fn("Renaming original volumes...")
10056
    rename_list = [(o, n.children[0].logical_id)
10057
                   for (o, n) in zip(instance.disks, new_disks)]
10058
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10059
    result.Raise("Failed to rename original LVs")
10060

    
10061
    feedback_fn("Initializing DRBD devices...")
10062
    # all child devices are in place, we can now create the DRBD devices
10063
    for disk in new_disks:
10064
      for node in [pnode, snode]:
10065
        f_create = node == pnode
10066
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10067

    
10068
    # at this point, the instance has been modified
10069
    instance.disk_template = constants.DT_DRBD8
10070
    instance.disks = new_disks
10071
    self.cfg.Update(instance, feedback_fn)
10072

    
10073
    # disks are created, waiting for sync
10074
    disk_abort = not _WaitForSync(self, instance)
10075
    if disk_abort:
10076
      raise errors.OpExecError("There are some degraded disks for"
10077
                               " this instance, please cleanup manually")
10078

    
10079
  def _ConvertDrbdToPlain(self, feedback_fn):
10080
    """Converts an instance from drbd to plain.
10081

10082
    """
10083
    instance = self.instance
10084
    assert len(instance.secondary_nodes) == 1
10085
    pnode = instance.primary_node
10086
    snode = instance.secondary_nodes[0]
10087
    feedback_fn("Converting template to plain")
10088

    
10089
    old_disks = instance.disks
10090
    new_disks = [d.children[0] for d in old_disks]
10091

    
10092
    # copy over size and mode
10093
    for parent, child in zip(old_disks, new_disks):
10094
      child.size = parent.size
10095
      child.mode = parent.mode
10096

    
10097
    # update instance structure
10098
    instance.disks = new_disks
10099
    instance.disk_template = constants.DT_PLAIN
10100
    self.cfg.Update(instance, feedback_fn)
10101

    
10102
    feedback_fn("Removing volumes on the secondary node...")
10103
    for disk in old_disks:
10104
      self.cfg.SetDiskID(disk, snode)
10105
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10106
      if msg:
10107
        self.LogWarning("Could not remove block device %s on node %s,"
10108
                        " continuing anyway: %s", disk.iv_name, snode, msg)
10109

    
10110
    feedback_fn("Removing unneeded volumes on the primary node...")
10111
    for idx, disk in enumerate(old_disks):
10112
      meta = disk.children[1]
10113
      self.cfg.SetDiskID(meta, pnode)
10114
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10115
      if msg:
10116
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
10117
                        " continuing anyway: %s", idx, pnode, msg)
10118

    
10119
  def Exec(self, feedback_fn):
10120
    """Modifies an instance.
10121

10122
    All parameters take effect only at the next restart of the instance.
10123

10124
    """
10125
    # Process here the warnings from CheckPrereq, as we don't have a
10126
    # feedback_fn there.
10127
    for warn in self.warn:
10128
      feedback_fn("WARNING: %s" % warn)
10129

    
10130
    result = []
10131
    instance = self.instance
10132
    # disk changes
10133
    for disk_op, disk_dict in self.op.disks:
10134
      if disk_op == constants.DDM_REMOVE:
10135
        # remove the last disk
10136
        device = instance.disks.pop()
10137
        device_idx = len(instance.disks)
10138
        for node, disk in device.ComputeNodeTree(instance.primary_node):
10139
          self.cfg.SetDiskID(disk, node)
10140
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10141
          if msg:
10142
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
10143
                            " continuing anyway", device_idx, node, msg)
10144
        result.append(("disk/%d" % device_idx, "remove"))
10145
      elif disk_op == constants.DDM_ADD:
10146
        # add a new disk
10147
        if instance.disk_template in (constants.DT_FILE,
10148
                                        constants.DT_SHARED_FILE):
10149
          file_driver, file_path = instance.disks[0].logical_id
10150
          file_path = os.path.dirname(file_path)
10151
        else:
10152
          file_driver = file_path = None
10153
        disk_idx_base = len(instance.disks)
10154
        new_disk = _GenerateDiskTemplate(self,
10155
                                         instance.disk_template,
10156
                                         instance.name, instance.primary_node,
10157
                                         instance.secondary_nodes,
10158
                                         [disk_dict],
10159
                                         file_path,
10160
                                         file_driver,
10161
                                         disk_idx_base, feedback_fn)[0]
10162
        instance.disks.append(new_disk)
10163
        info = _GetInstanceInfoText(instance)
10164

    
10165
        logging.info("Creating volume %s for instance %s",
10166
                     new_disk.iv_name, instance.name)
10167
        # Note: this needs to be kept in sync with _CreateDisks
10168
        #HARDCODE
10169
        for node in instance.all_nodes:
10170
          f_create = node == instance.primary_node
10171
          try:
10172
            _CreateBlockDev(self, node, instance, new_disk,
10173
                            f_create, info, f_create)
10174
          except errors.OpExecError, err:
10175
            self.LogWarning("Failed to create volume %s (%s) on"
10176
                            " node %s: %s",
10177
                            new_disk.iv_name, new_disk, node, err)
10178
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10179
                       (new_disk.size, new_disk.mode)))
10180
      else:
10181
        # change a given disk
10182
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10183
        result.append(("disk.mode/%d" % disk_op,
10184
                       disk_dict[constants.IDISK_MODE]))
10185

    
10186
    if self.op.disk_template:
10187
      r_shut = _ShutdownInstanceDisks(self, instance)
10188
      if not r_shut:
10189
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10190
                                 " proceed with disk template conversion")
10191
      mode = (instance.disk_template, self.op.disk_template)
10192
      try:
10193
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
10194
      except:
10195
        self.cfg.ReleaseDRBDMinors(instance.name)
10196
        raise
10197
      result.append(("disk_template", self.op.disk_template))
10198

    
10199
    # NIC changes
10200
    for nic_op, nic_dict in self.op.nics:
10201
      if nic_op == constants.DDM_REMOVE:
10202
        # remove the last nic
10203
        del instance.nics[-1]
10204
        result.append(("nic.%d" % len(instance.nics), "remove"))
10205
      elif nic_op == constants.DDM_ADD:
10206
        # mac and bridge should be set, by now
10207
        mac = nic_dict[constants.INIC_MAC]
10208
        ip = nic_dict.get(constants.INIC_IP, None)
10209
        nicparams = self.nic_pinst[constants.DDM_ADD]
10210
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10211
        instance.nics.append(new_nic)
10212
        result.append(("nic.%d" % (len(instance.nics) - 1),
10213
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
10214
                       (new_nic.mac, new_nic.ip,
10215
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10216
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10217
                       )))
10218
      else:
10219
        for key in (constants.INIC_MAC, constants.INIC_IP):
10220
          if key in nic_dict:
10221
            setattr(instance.nics[nic_op], key, nic_dict[key])
10222
        if nic_op in self.nic_pinst:
10223
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10224
        for key, val in nic_dict.iteritems():
10225
          result.append(("nic.%s/%d" % (key, nic_op), val))
10226

    
10227
    # hvparams changes
10228
    if self.op.hvparams:
10229
      instance.hvparams = self.hv_inst
10230
      for key, val in self.op.hvparams.iteritems():
10231
        result.append(("hv/%s" % key, val))
10232

    
10233
    # beparams changes
10234
    if self.op.beparams:
10235
      instance.beparams = self.be_inst
10236
      for key, val in self.op.beparams.iteritems():
10237
        result.append(("be/%s" % key, val))
10238

    
10239
    # OS change
10240
    if self.op.os_name:
10241
      instance.os = self.op.os_name
10242

    
10243
    # osparams changes
10244
    if self.op.osparams:
10245
      instance.osparams = self.os_inst
10246
      for key, val in self.op.osparams.iteritems():
10247
        result.append(("os/%s" % key, val))
10248

    
10249
    self.cfg.Update(instance, feedback_fn)
10250

    
10251
    return result
10252

    
10253
  _DISK_CONVERSIONS = {
10254
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10255
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10256
    }
10257

    
10258

    
10259
class LUBackupQuery(NoHooksLU):
10260
  """Query the exports list
10261

10262
  """
10263
  REQ_BGL = False
10264

    
10265
  def ExpandNames(self):
10266
    self.needed_locks = {}
10267
    self.share_locks[locking.LEVEL_NODE] = 1
10268
    if not self.op.nodes:
10269
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10270
    else:
10271
      self.needed_locks[locking.LEVEL_NODE] = \
10272
        _GetWantedNodes(self, self.op.nodes)
10273

    
10274
  def Exec(self, feedback_fn):
10275
    """Compute the list of all the exported system images.
10276

10277
    @rtype: dict
10278
    @return: a dictionary with the structure node->(export-list)
10279
        where export-list is a list of the instances exported on
10280
        that node.
10281

10282
    """
10283
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
10284
    rpcresult = self.rpc.call_export_list(self.nodes)
10285
    result = {}
10286
    for node in rpcresult:
10287
      if rpcresult[node].fail_msg:
10288
        result[node] = False
10289
      else:
10290
        result[node] = rpcresult[node].payload
10291

    
10292
    return result
10293

    
10294

    
10295
class LUBackupPrepare(NoHooksLU):
10296
  """Prepares an instance for an export and returns useful information.
10297

10298
  """
10299
  REQ_BGL = False
10300

    
10301
  def ExpandNames(self):
10302
    self._ExpandAndLockInstance()
10303

    
10304
  def CheckPrereq(self):
10305
    """Check prerequisites.
10306

10307
    """
10308
    instance_name = self.op.instance_name
10309

    
10310
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10311
    assert self.instance is not None, \
10312
          "Cannot retrieve locked instance %s" % self.op.instance_name
10313
    _CheckNodeOnline(self, self.instance.primary_node)
10314

    
10315
    self._cds = _GetClusterDomainSecret()
10316

    
10317
  def Exec(self, feedback_fn):
10318
    """Prepares an instance for an export.
10319

10320
    """
10321
    instance = self.instance
10322

    
10323
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10324
      salt = utils.GenerateSecret(8)
10325

    
10326
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10327
      result = self.rpc.call_x509_cert_create(instance.primary_node,
10328
                                              constants.RIE_CERT_VALIDITY)
10329
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
10330

    
10331
      (name, cert_pem) = result.payload
10332

    
10333
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10334
                                             cert_pem)
10335

    
10336
      return {
10337
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10338
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10339
                          salt),
10340
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10341
        }
10342

    
10343
    return None
10344

    
10345

    
10346
class LUBackupExport(LogicalUnit):
10347
  """Export an instance to an image in the cluster.
10348

10349
  """
10350
  HPATH = "instance-export"
10351
  HTYPE = constants.HTYPE_INSTANCE
10352
  REQ_BGL = False
10353

    
10354
  def CheckArguments(self):
10355
    """Check the arguments.
10356

10357
    """
10358
    self.x509_key_name = self.op.x509_key_name
10359
    self.dest_x509_ca_pem = self.op.destination_x509_ca
10360

    
10361
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10362
      if not self.x509_key_name:
10363
        raise errors.OpPrereqError("Missing X509 key name for encryption",
10364
                                   errors.ECODE_INVAL)
10365

    
10366
      if not self.dest_x509_ca_pem:
10367
        raise errors.OpPrereqError("Missing destination X509 CA",
10368
                                   errors.ECODE_INVAL)
10369

    
10370
  def ExpandNames(self):
10371
    self._ExpandAndLockInstance()
10372

    
10373
    # Lock all nodes for local exports
10374
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10375
      # FIXME: lock only instance primary and destination node
10376
      #
10377
      # Sad but true, for now we have do lock all nodes, as we don't know where
10378
      # the previous export might be, and in this LU we search for it and
10379
      # remove it from its current node. In the future we could fix this by:
10380
      #  - making a tasklet to search (share-lock all), then create the
10381
      #    new one, then one to remove, after
10382
      #  - removing the removal operation altogether
10383
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10384

    
10385
  def DeclareLocks(self, level):
10386
    """Last minute lock declaration."""
10387
    # All nodes are locked anyway, so nothing to do here.
10388

    
10389
  def BuildHooksEnv(self):
10390
    """Build hooks env.
10391

10392
    This will run on the master, primary node and target node.
10393

10394
    """
10395
    env = {
10396
      "EXPORT_MODE": self.op.mode,
10397
      "EXPORT_NODE": self.op.target_node,
10398
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10399
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10400
      # TODO: Generic function for boolean env variables
10401
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10402
      }
10403

    
10404
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10405

    
10406
    return env
10407

    
10408
  def BuildHooksNodes(self):
10409
    """Build hooks nodes.
10410

10411
    """
10412
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10413

    
10414
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10415
      nl.append(self.op.target_node)
10416

    
10417
    return (nl, nl)
10418

    
10419
  def CheckPrereq(self):
10420
    """Check prerequisites.
10421

10422
    This checks that the instance and node names are valid.
10423

10424
    """
10425
    instance_name = self.op.instance_name
10426

    
10427
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10428
    assert self.instance is not None, \
10429
          "Cannot retrieve locked instance %s" % self.op.instance_name
10430
    _CheckNodeOnline(self, self.instance.primary_node)
10431

    
10432
    if (self.op.remove_instance and self.instance.admin_up and
10433
        not self.op.shutdown):
10434
      raise errors.OpPrereqError("Can not remove instance without shutting it"
10435
                                 " down before")
10436

    
10437
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10438
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10439
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10440
      assert self.dst_node is not None
10441

    
10442
      _CheckNodeOnline(self, self.dst_node.name)
10443
      _CheckNodeNotDrained(self, self.dst_node.name)
10444

    
10445
      self._cds = None
10446
      self.dest_disk_info = None
10447
      self.dest_x509_ca = None
10448

    
10449
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10450
      self.dst_node = None
10451

    
10452
      if len(self.op.target_node) != len(self.instance.disks):
10453
        raise errors.OpPrereqError(("Received destination information for %s"
10454
                                    " disks, but instance %s has %s disks") %
10455
                                   (len(self.op.target_node), instance_name,
10456
                                    len(self.instance.disks)),
10457
                                   errors.ECODE_INVAL)
10458

    
10459
      cds = _GetClusterDomainSecret()
10460

    
10461
      # Check X509 key name
10462
      try:
10463
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10464
      except (TypeError, ValueError), err:
10465
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10466

    
10467
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10468
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10469
                                   errors.ECODE_INVAL)
10470

    
10471
      # Load and verify CA
10472
      try:
10473
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10474
      except OpenSSL.crypto.Error, err:
10475
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10476
                                   (err, ), errors.ECODE_INVAL)
10477

    
10478
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10479
      if errcode is not None:
10480
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10481
                                   (msg, ), errors.ECODE_INVAL)
10482

    
10483
      self.dest_x509_ca = cert
10484

    
10485
      # Verify target information
10486
      disk_info = []
10487
      for idx, disk_data in enumerate(self.op.target_node):
10488
        try:
10489
          (host, port, magic) = \
10490
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10491
        except errors.GenericError, err:
10492
          raise errors.OpPrereqError("Target info for disk %s: %s" %
10493
                                     (idx, err), errors.ECODE_INVAL)
10494

    
10495
        disk_info.append((host, port, magic))
10496

    
10497
      assert len(disk_info) == len(self.op.target_node)
10498
      self.dest_disk_info = disk_info
10499

    
10500
    else:
10501
      raise errors.ProgrammerError("Unhandled export mode %r" %
10502
                                   self.op.mode)
10503

    
10504
    # instance disk type verification
10505
    # TODO: Implement export support for file-based disks
10506
    for disk in self.instance.disks:
10507
      if disk.dev_type == constants.LD_FILE:
10508
        raise errors.OpPrereqError("Export not supported for instances with"
10509
                                   " file-based disks", errors.ECODE_INVAL)
10510

    
10511
  def _CleanupExports(self, feedback_fn):
10512
    """Removes exports of current instance from all other nodes.
10513

10514
    If an instance in a cluster with nodes A..D was exported to node C, its
10515
    exports will be removed from the nodes A, B and D.
10516

10517
    """
10518
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
10519

    
10520
    nodelist = self.cfg.GetNodeList()
10521
    nodelist.remove(self.dst_node.name)
10522

    
10523
    # on one-node clusters nodelist will be empty after the removal
10524
    # if we proceed the backup would be removed because OpBackupQuery
10525
    # substitutes an empty list with the full cluster node list.
10526
    iname = self.instance.name
10527
    if nodelist:
10528
      feedback_fn("Removing old exports for instance %s" % iname)
10529
      exportlist = self.rpc.call_export_list(nodelist)
10530
      for node in exportlist:
10531
        if exportlist[node].fail_msg:
10532
          continue
10533
        if iname in exportlist[node].payload:
10534
          msg = self.rpc.call_export_remove(node, iname).fail_msg
10535
          if msg:
10536
            self.LogWarning("Could not remove older export for instance %s"
10537
                            " on node %s: %s", iname, node, msg)
10538

    
10539
  def Exec(self, feedback_fn):
10540
    """Export an instance to an image in the cluster.
10541

10542
    """
10543
    assert self.op.mode in constants.EXPORT_MODES
10544

    
10545
    instance = self.instance
10546
    src_node = instance.primary_node
10547

    
10548
    if self.op.shutdown:
10549
      # shutdown the instance, but not the disks
10550
      feedback_fn("Shutting down instance %s" % instance.name)
10551
      result = self.rpc.call_instance_shutdown(src_node, instance,
10552
                                               self.op.shutdown_timeout)
10553
      # TODO: Maybe ignore failures if ignore_remove_failures is set
10554
      result.Raise("Could not shutdown instance %s on"
10555
                   " node %s" % (instance.name, src_node))
10556

    
10557
    # set the disks ID correctly since call_instance_start needs the
10558
    # correct drbd minor to create the symlinks
10559
    for disk in instance.disks:
10560
      self.cfg.SetDiskID(disk, src_node)
10561

    
10562
    activate_disks = (not instance.admin_up)
10563

    
10564
    if activate_disks:
10565
      # Activate the instance disks if we'exporting a stopped instance
10566
      feedback_fn("Activating disks for %s" % instance.name)
10567
      _StartInstanceDisks(self, instance, None)
10568

    
10569
    try:
10570
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10571
                                                     instance)
10572

    
10573
      helper.CreateSnapshots()
10574
      try:
10575
        if (self.op.shutdown and instance.admin_up and
10576
            not self.op.remove_instance):
10577
          assert not activate_disks
10578
          feedback_fn("Starting instance %s" % instance.name)
10579
          result = self.rpc.call_instance_start(src_node, instance, None, None)
10580
          msg = result.fail_msg
10581
          if msg:
10582
            feedback_fn("Failed to start instance: %s" % msg)
10583
            _ShutdownInstanceDisks(self, instance)
10584
            raise errors.OpExecError("Could not start instance: %s" % msg)
10585

    
10586
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
10587
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10588
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10589
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
10590
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10591

    
10592
          (key_name, _, _) = self.x509_key_name
10593

    
10594
          dest_ca_pem = \
10595
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10596
                                            self.dest_x509_ca)
10597

    
10598
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10599
                                                     key_name, dest_ca_pem,
10600
                                                     timeouts)
10601
      finally:
10602
        helper.Cleanup()
10603

    
10604
      # Check for backwards compatibility
10605
      assert len(dresults) == len(instance.disks)
10606
      assert compat.all(isinstance(i, bool) for i in dresults), \
10607
             "Not all results are boolean: %r" % dresults
10608

    
10609
    finally:
10610
      if activate_disks:
10611
        feedback_fn("Deactivating disks for %s" % instance.name)
10612
        _ShutdownInstanceDisks(self, instance)
10613

    
10614
    if not (compat.all(dresults) and fin_resu):
10615
      failures = []
10616
      if not fin_resu:
10617
        failures.append("export finalization")
10618
      if not compat.all(dresults):
10619
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10620
                               if not dsk)
10621
        failures.append("disk export: disk(s) %s" % fdsk)
10622

    
10623
      raise errors.OpExecError("Export failed, errors in %s" %
10624
                               utils.CommaJoin(failures))
10625

    
10626
    # At this point, the export was successful, we can cleanup/finish
10627

    
10628
    # Remove instance if requested
10629
    if self.op.remove_instance:
10630
      feedback_fn("Removing instance %s" % instance.name)
10631
      _RemoveInstance(self, feedback_fn, instance,
10632
                      self.op.ignore_remove_failures)
10633

    
10634
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10635
      self._CleanupExports(feedback_fn)
10636

    
10637
    return fin_resu, dresults
10638

    
10639

    
10640
class LUBackupRemove(NoHooksLU):
10641
  """Remove exports related to the named instance.
10642

10643
  """
10644
  REQ_BGL = False
10645

    
10646
  def ExpandNames(self):
10647
    self.needed_locks = {}
10648
    # We need all nodes to be locked in order for RemoveExport to work, but we
10649
    # don't need to lock the instance itself, as nothing will happen to it (and
10650
    # we can remove exports also for a removed instance)
10651
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10652

    
10653
  def Exec(self, feedback_fn):
10654
    """Remove any export.
10655

10656
    """
10657
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10658
    # If the instance was not found we'll try with the name that was passed in.
10659
    # This will only work if it was an FQDN, though.
10660
    fqdn_warn = False
10661
    if not instance_name:
10662
      fqdn_warn = True
10663
      instance_name = self.op.instance_name
10664

    
10665
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10666
    exportlist = self.rpc.call_export_list(locked_nodes)
10667
    found = False
10668
    for node in exportlist:
10669
      msg = exportlist[node].fail_msg
10670
      if msg:
10671
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10672
        continue
10673
      if instance_name in exportlist[node].payload:
10674
        found = True
10675
        result = self.rpc.call_export_remove(node, instance_name)
10676
        msg = result.fail_msg
10677
        if msg:
10678
          logging.error("Could not remove export for instance %s"
10679
                        " on node %s: %s", instance_name, node, msg)
10680

    
10681
    if fqdn_warn and not found:
10682
      feedback_fn("Export not found. If trying to remove an export belonging"
10683
                  " to a deleted instance please use its Fully Qualified"
10684
                  " Domain Name.")
10685

    
10686

    
10687
class LUGroupAdd(LogicalUnit):
10688
  """Logical unit for creating node groups.
10689

10690
  """
10691
  HPATH = "group-add"
10692
  HTYPE = constants.HTYPE_GROUP
10693
  REQ_BGL = False
10694

    
10695
  def ExpandNames(self):
10696
    # We need the new group's UUID here so that we can create and acquire the
10697
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10698
    # that it should not check whether the UUID exists in the configuration.
10699
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10700
    self.needed_locks = {}
10701
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10702

    
10703
  def CheckPrereq(self):
10704
    """Check prerequisites.
10705

10706
    This checks that the given group name is not an existing node group
10707
    already.
10708

10709
    """
10710
    try:
10711
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10712
    except errors.OpPrereqError:
10713
      pass
10714
    else:
10715
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10716
                                 " node group (UUID: %s)" %
10717
                                 (self.op.group_name, existing_uuid),
10718
                                 errors.ECODE_EXISTS)
10719

    
10720
    if self.op.ndparams:
10721
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10722

    
10723
  def BuildHooksEnv(self):
10724
    """Build hooks env.
10725

10726
    """
10727
    return {
10728
      "GROUP_NAME": self.op.group_name,
10729
      }
10730

    
10731
  def BuildHooksNodes(self):
10732
    """Build hooks nodes.
10733

10734
    """
10735
    mn = self.cfg.GetMasterNode()
10736
    return ([mn], [mn])
10737

    
10738
  def Exec(self, feedback_fn):
10739
    """Add the node group to the cluster.
10740

10741
    """
10742
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10743
                                  uuid=self.group_uuid,
10744
                                  alloc_policy=self.op.alloc_policy,
10745
                                  ndparams=self.op.ndparams)
10746

    
10747
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10748
    del self.remove_locks[locking.LEVEL_NODEGROUP]
10749

    
10750

    
10751
class LUGroupAssignNodes(NoHooksLU):
10752
  """Logical unit for assigning nodes to groups.
10753

10754
  """
10755
  REQ_BGL = False
10756

    
10757
  def ExpandNames(self):
10758
    # These raise errors.OpPrereqError on their own:
10759
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10760
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10761

    
10762
    # We want to lock all the affected nodes and groups. We have readily
10763
    # available the list of nodes, and the *destination* group. To gather the
10764
    # list of "source" groups, we need to fetch node information.
10765
    self.node_data = self.cfg.GetAllNodesInfo()
10766
    affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10767
    affected_groups.add(self.group_uuid)
10768

    
10769
    self.needed_locks = {
10770
      locking.LEVEL_NODEGROUP: list(affected_groups),
10771
      locking.LEVEL_NODE: self.op.nodes,
10772
      }
10773

    
10774
  def CheckPrereq(self):
10775
    """Check prerequisites.
10776

10777
    """
10778
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10779
    instance_data = self.cfg.GetAllInstancesInfo()
10780

    
10781
    if self.group is None:
10782
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10783
                               (self.op.group_name, self.group_uuid))
10784

    
10785
    (new_splits, previous_splits) = \
10786
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10787
                                             for node in self.op.nodes],
10788
                                            self.node_data, instance_data)
10789

    
10790
    if new_splits:
10791
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10792

    
10793
      if not self.op.force:
10794
        raise errors.OpExecError("The following instances get split by this"
10795
                                 " change and --force was not given: %s" %
10796
                                 fmt_new_splits)
10797
      else:
10798
        self.LogWarning("This operation will split the following instances: %s",
10799
                        fmt_new_splits)
10800

    
10801
        if previous_splits:
10802
          self.LogWarning("In addition, these already-split instances continue"
10803
                          " to be spit across groups: %s",
10804
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
10805

    
10806
  def Exec(self, feedback_fn):
10807
    """Assign nodes to a new group.
10808

10809
    """
10810
    for node in self.op.nodes:
10811
      self.node_data[node].group = self.group_uuid
10812

    
10813
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10814

    
10815
  @staticmethod
10816
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10817
    """Check for split instances after a node assignment.
10818

10819
    This method considers a series of node assignments as an atomic operation,
10820
    and returns information about split instances after applying the set of
10821
    changes.
10822

10823
    In particular, it returns information about newly split instances, and
10824
    instances that were already split, and remain so after the change.
10825

10826
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10827
    considered.
10828

10829
    @type changes: list of (node_name, new_group_uuid) pairs.
10830
    @param changes: list of node assignments to consider.
10831
    @param node_data: a dict with data for all nodes
10832
    @param instance_data: a dict with all instances to consider
10833
    @rtype: a two-tuple
10834
    @return: a list of instances that were previously okay and result split as a
10835
      consequence of this change, and a list of instances that were previously
10836
      split and this change does not fix.
10837

10838
    """
10839
    changed_nodes = dict((node, group) for node, group in changes
10840
                         if node_data[node].group != group)
10841

    
10842
    all_split_instances = set()
10843
    previously_split_instances = set()
10844

    
10845
    def InstanceNodes(instance):
10846
      return [instance.primary_node] + list(instance.secondary_nodes)
10847

    
10848
    for inst in instance_data.values():
10849
      if inst.disk_template not in constants.DTS_INT_MIRROR:
10850
        continue
10851

    
10852
      instance_nodes = InstanceNodes(inst)
10853

    
10854
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
10855
        previously_split_instances.add(inst.name)
10856

    
10857
      if len(set(changed_nodes.get(node, node_data[node].group)
10858
                 for node in instance_nodes)) > 1:
10859
        all_split_instances.add(inst.name)
10860

    
10861
    return (list(all_split_instances - previously_split_instances),
10862
            list(previously_split_instances & all_split_instances))
10863

    
10864

    
10865
class _GroupQuery(_QueryBase):
10866
  FIELDS = query.GROUP_FIELDS
10867

    
10868
  def ExpandNames(self, lu):
10869
    lu.needed_locks = {}
10870

    
10871
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10872
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10873

    
10874
    if not self.names:
10875
      self.wanted = [name_to_uuid[name]
10876
                     for name in utils.NiceSort(name_to_uuid.keys())]
10877
    else:
10878
      # Accept names to be either names or UUIDs.
10879
      missing = []
10880
      self.wanted = []
10881
      all_uuid = frozenset(self._all_groups.keys())
10882

    
10883
      for name in self.names:
10884
        if name in all_uuid:
10885
          self.wanted.append(name)
10886
        elif name in name_to_uuid:
10887
          self.wanted.append(name_to_uuid[name])
10888
        else:
10889
          missing.append(name)
10890

    
10891
      if missing:
10892
        raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10893
                                   errors.ECODE_NOENT)
10894

    
10895
  def DeclareLocks(self, lu, level):
10896
    pass
10897

    
10898
  def _GetQueryData(self, lu):
10899
    """Computes the list of node groups and their attributes.
10900

10901
    """
10902
    do_nodes = query.GQ_NODE in self.requested_data
10903
    do_instances = query.GQ_INST in self.requested_data
10904

    
10905
    group_to_nodes = None
10906
    group_to_instances = None
10907

    
10908
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10909
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10910
    # latter GetAllInstancesInfo() is not enough, for we have to go through
10911
    # instance->node. Hence, we will need to process nodes even if we only need
10912
    # instance information.
10913
    if do_nodes or do_instances:
10914
      all_nodes = lu.cfg.GetAllNodesInfo()
10915
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10916
      node_to_group = {}
10917

    
10918
      for node in all_nodes.values():
10919
        if node.group in group_to_nodes:
10920
          group_to_nodes[node.group].append(node.name)
10921
          node_to_group[node.name] = node.group
10922

    
10923
      if do_instances:
10924
        all_instances = lu.cfg.GetAllInstancesInfo()
10925
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
10926

    
10927
        for instance in all_instances.values():
10928
          node = instance.primary_node
10929
          if node in node_to_group:
10930
            group_to_instances[node_to_group[node]].append(instance.name)
10931

    
10932
        if not do_nodes:
10933
          # Do not pass on node information if it was not requested.
10934
          group_to_nodes = None
10935

    
10936
    return query.GroupQueryData([self._all_groups[uuid]
10937
                                 for uuid in self.wanted],
10938
                                group_to_nodes, group_to_instances)
10939

    
10940

    
10941
class LUGroupQuery(NoHooksLU):
10942
  """Logical unit for querying node groups.
10943

10944
  """
10945
  REQ_BGL = False
10946

    
10947
  def CheckArguments(self):
10948
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
10949
                          self.op.output_fields, False)
10950

    
10951
  def ExpandNames(self):
10952
    self.gq.ExpandNames(self)
10953

    
10954
  def Exec(self, feedback_fn):
10955
    return self.gq.OldStyleQuery(self)
10956

    
10957

    
10958
class LUGroupSetParams(LogicalUnit):
10959
  """Modifies the parameters of a node group.
10960

10961
  """
10962
  HPATH = "group-modify"
10963
  HTYPE = constants.HTYPE_GROUP
10964
  REQ_BGL = False
10965

    
10966
  def CheckArguments(self):
10967
    all_changes = [
10968
      self.op.ndparams,
10969
      self.op.alloc_policy,
10970
      ]
10971

    
10972
    if all_changes.count(None) == len(all_changes):
10973
      raise errors.OpPrereqError("Please pass at least one modification",
10974
                                 errors.ECODE_INVAL)
10975

    
10976
  def ExpandNames(self):
10977
    # This raises errors.OpPrereqError on its own:
10978
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10979

    
10980
    self.needed_locks = {
10981
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10982
      }
10983

    
10984
  def CheckPrereq(self):
10985
    """Check prerequisites.
10986

10987
    """
10988
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10989

    
10990
    if self.group is None:
10991
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10992
                               (self.op.group_name, self.group_uuid))
10993

    
10994
    if self.op.ndparams:
10995
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10996
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10997
      self.new_ndparams = new_ndparams
10998

    
10999
  def BuildHooksEnv(self):
11000
    """Build hooks env.
11001

11002
    """
11003
    return {
11004
      "GROUP_NAME": self.op.group_name,
11005
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
11006
      }
11007

    
11008
  def BuildHooksNodes(self):
11009
    """Build hooks nodes.
11010

11011
    """
11012
    mn = self.cfg.GetMasterNode()
11013
    return ([mn], [mn])
11014

    
11015
  def Exec(self, feedback_fn):
11016
    """Modifies the node group.
11017

11018
    """
11019
    result = []
11020

    
11021
    if self.op.ndparams:
11022
      self.group.ndparams = self.new_ndparams
11023
      result.append(("ndparams", str(self.group.ndparams)))
11024

    
11025
    if self.op.alloc_policy:
11026
      self.group.alloc_policy = self.op.alloc_policy
11027

    
11028
    self.cfg.Update(self.group, feedback_fn)
11029
    return result
11030

    
11031

    
11032

    
11033
class LUGroupRemove(LogicalUnit):
11034
  HPATH = "group-remove"
11035
  HTYPE = constants.HTYPE_GROUP
11036
  REQ_BGL = False
11037

    
11038
  def ExpandNames(self):
11039
    # This will raises errors.OpPrereqError on its own:
11040
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11041
    self.needed_locks = {
11042
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11043
      }
11044

    
11045
  def CheckPrereq(self):
11046
    """Check prerequisites.
11047

11048
    This checks that the given group name exists as a node group, that is
11049
    empty (i.e., contains no nodes), and that is not the last group of the
11050
    cluster.
11051

11052
    """
11053
    # Verify that the group is empty.
11054
    group_nodes = [node.name
11055
                   for node in self.cfg.GetAllNodesInfo().values()
11056
                   if node.group == self.group_uuid]
11057

    
11058
    if group_nodes:
11059
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
11060
                                 " nodes: %s" %
11061
                                 (self.op.group_name,
11062
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
11063
                                 errors.ECODE_STATE)
11064

    
11065
    # Verify the cluster would not be left group-less.
11066
    if len(self.cfg.GetNodeGroupList()) == 1:
11067
      raise errors.OpPrereqError("Group '%s' is the only group,"
11068
                                 " cannot be removed" %
11069
                                 self.op.group_name,
11070
                                 errors.ECODE_STATE)
11071

    
11072
  def BuildHooksEnv(self):
11073
    """Build hooks env.
11074

11075
    """
11076
    return {
11077
      "GROUP_NAME": self.op.group_name,
11078
      }
11079

    
11080
  def BuildHooksNodes(self):
11081
    """Build hooks nodes.
11082

11083
    """
11084
    mn = self.cfg.GetMasterNode()
11085
    return ([mn], [mn])
11086

    
11087
  def Exec(self, feedback_fn):
11088
    """Remove the node group.
11089

11090
    """
11091
    try:
11092
      self.cfg.RemoveNodeGroup(self.group_uuid)
11093
    except errors.ConfigurationError:
11094
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11095
                               (self.op.group_name, self.group_uuid))
11096

    
11097
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11098

    
11099

    
11100
class LUGroupRename(LogicalUnit):
11101
  HPATH = "group-rename"
11102
  HTYPE = constants.HTYPE_GROUP
11103
  REQ_BGL = False
11104

    
11105
  def ExpandNames(self):
11106
    # This raises errors.OpPrereqError on its own:
11107
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11108

    
11109
    self.needed_locks = {
11110
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11111
      }
11112

    
11113
  def CheckPrereq(self):
11114
    """Check prerequisites.
11115

11116
    Ensures requested new name is not yet used.
11117

11118
    """
11119
    try:
11120
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11121
    except errors.OpPrereqError:
11122
      pass
11123
    else:
11124
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11125
                                 " node group (UUID: %s)" %
11126
                                 (self.op.new_name, new_name_uuid),
11127
                                 errors.ECODE_EXISTS)
11128

    
11129
  def BuildHooksEnv(self):
11130
    """Build hooks env.
11131

11132
    """
11133
    return {
11134
      "OLD_NAME": self.op.group_name,
11135
      "NEW_NAME": self.op.new_name,
11136
      }
11137

    
11138
  def BuildHooksNodes(self):
11139
    """Build hooks nodes.
11140

11141
    """
11142
    mn = self.cfg.GetMasterNode()
11143

    
11144
    all_nodes = self.cfg.GetAllNodesInfo()
11145
    all_nodes.pop(mn, None)
11146

    
11147
    run_nodes = [mn]
11148
    run_nodes.extend(node.name for node in all_nodes.values()
11149
                     if node.group == self.group_uuid)
11150

    
11151
    return (run_nodes, run_nodes)
11152

    
11153
  def Exec(self, feedback_fn):
11154
    """Rename the node group.
11155

11156
    """
11157
    group = self.cfg.GetNodeGroup(self.group_uuid)
11158

    
11159
    if group is None:
11160
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11161
                               (self.op.group_name, self.group_uuid))
11162

    
11163
    group.name = self.op.new_name
11164
    self.cfg.Update(group, feedback_fn)
11165

    
11166
    return self.op.new_name
11167

    
11168

    
11169
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11170
  """Generic tags LU.
11171

11172
  This is an abstract class which is the parent of all the other tags LUs.
11173

11174
  """
11175
  def ExpandNames(self):
11176
    self.group_uuid = None
11177
    self.needed_locks = {}
11178
    if self.op.kind == constants.TAG_NODE:
11179
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11180
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
11181
    elif self.op.kind == constants.TAG_INSTANCE:
11182
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11183
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11184
    elif self.op.kind == constants.TAG_NODEGROUP:
11185
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11186

    
11187
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11188
    # not possible to acquire the BGL based on opcode parameters)
11189

    
11190
  def CheckPrereq(self):
11191
    """Check prerequisites.
11192

11193
    """
11194
    if self.op.kind == constants.TAG_CLUSTER:
11195
      self.target = self.cfg.GetClusterInfo()
11196
    elif self.op.kind == constants.TAG_NODE:
11197
      self.target = self.cfg.GetNodeInfo(self.op.name)
11198
    elif self.op.kind == constants.TAG_INSTANCE:
11199
      self.target = self.cfg.GetInstanceInfo(self.op.name)
11200
    elif self.op.kind == constants.TAG_NODEGROUP:
11201
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
11202
    else:
11203
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11204
                                 str(self.op.kind), errors.ECODE_INVAL)
11205

    
11206

    
11207
class LUTagsGet(TagsLU):
11208
  """Returns the tags of a given object.
11209

11210
  """
11211
  REQ_BGL = False
11212

    
11213
  def ExpandNames(self):
11214
    TagsLU.ExpandNames(self)
11215

    
11216
    # Share locks as this is only a read operation
11217
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11218

    
11219
  def Exec(self, feedback_fn):
11220
    """Returns the tag list.
11221

11222
    """
11223
    return list(self.target.GetTags())
11224

    
11225

    
11226
class LUTagsSearch(NoHooksLU):
11227
  """Searches the tags for a given pattern.
11228

11229
  """
11230
  REQ_BGL = False
11231

    
11232
  def ExpandNames(self):
11233
    self.needed_locks = {}
11234

    
11235
  def CheckPrereq(self):
11236
    """Check prerequisites.
11237

11238
    This checks the pattern passed for validity by compiling it.
11239

11240
    """
11241
    try:
11242
      self.re = re.compile(self.op.pattern)
11243
    except re.error, err:
11244
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11245
                                 (self.op.pattern, err), errors.ECODE_INVAL)
11246

    
11247
  def Exec(self, feedback_fn):
11248
    """Returns the tag list.
11249

11250
    """
11251
    cfg = self.cfg
11252
    tgts = [("/cluster", cfg.GetClusterInfo())]
11253
    ilist = cfg.GetAllInstancesInfo().values()
11254
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11255
    nlist = cfg.GetAllNodesInfo().values()
11256
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11257
    tgts.extend(("/nodegroup/%s" % n.name, n)
11258
                for n in cfg.GetAllNodeGroupsInfo().values())
11259
    results = []
11260
    for path, target in tgts:
11261
      for tag in target.GetTags():
11262
        if self.re.search(tag):
11263
          results.append((path, tag))
11264
    return results
11265

    
11266

    
11267
class LUTagsSet(TagsLU):
11268
  """Sets a tag on a given object.
11269

11270
  """
11271
  REQ_BGL = False
11272

    
11273
  def CheckPrereq(self):
11274
    """Check prerequisites.
11275

11276
    This checks the type and length of the tag name and value.
11277

11278
    """
11279
    TagsLU.CheckPrereq(self)
11280
    for tag in self.op.tags:
11281
      objects.TaggableObject.ValidateTag(tag)
11282

    
11283
  def Exec(self, feedback_fn):
11284
    """Sets the tag.
11285

11286
    """
11287
    try:
11288
      for tag in self.op.tags:
11289
        self.target.AddTag(tag)
11290
    except errors.TagError, err:
11291
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
11292
    self.cfg.Update(self.target, feedback_fn)
11293

    
11294

    
11295
class LUTagsDel(TagsLU):
11296
  """Delete a list of tags from a given object.
11297

11298
  """
11299
  REQ_BGL = False
11300

    
11301
  def CheckPrereq(self):
11302
    """Check prerequisites.
11303

11304
    This checks that we have the given tag.
11305

11306
    """
11307
    TagsLU.CheckPrereq(self)
11308
    for tag in self.op.tags:
11309
      objects.TaggableObject.ValidateTag(tag)
11310
    del_tags = frozenset(self.op.tags)
11311
    cur_tags = self.target.GetTags()
11312

    
11313
    diff_tags = del_tags - cur_tags
11314
    if diff_tags:
11315
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
11316
      raise errors.OpPrereqError("Tag(s) %s not found" %
11317
                                 (utils.CommaJoin(diff_names), ),
11318
                                 errors.ECODE_NOENT)
11319

    
11320
  def Exec(self, feedback_fn):
11321
    """Remove the tag from the object.
11322

11323
    """
11324
    for tag in self.op.tags:
11325
      self.target.RemoveTag(tag)
11326
    self.cfg.Update(self.target, feedback_fn)
11327

    
11328

    
11329
class LUTestDelay(NoHooksLU):
11330
  """Sleep for a specified amount of time.
11331

11332
  This LU sleeps on the master and/or nodes for a specified amount of
11333
  time.
11334

11335
  """
11336
  REQ_BGL = False
11337

    
11338
  def ExpandNames(self):
11339
    """Expand names and set required locks.
11340

11341
    This expands the node list, if any.
11342

11343
    """
11344
    self.needed_locks = {}
11345
    if self.op.on_nodes:
11346
      # _GetWantedNodes can be used here, but is not always appropriate to use
11347
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11348
      # more information.
11349
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11350
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11351

    
11352
  def _TestDelay(self):
11353
    """Do the actual sleep.
11354

11355
    """
11356
    if self.op.on_master:
11357
      if not utils.TestDelay(self.op.duration):
11358
        raise errors.OpExecError("Error during master delay test")
11359
    if self.op.on_nodes:
11360
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11361
      for node, node_result in result.items():
11362
        node_result.Raise("Failure during rpc call to node %s" % node)
11363

    
11364
  def Exec(self, feedback_fn):
11365
    """Execute the test delay opcode, with the wanted repetitions.
11366

11367
    """
11368
    if self.op.repeat == 0:
11369
      self._TestDelay()
11370
    else:
11371
      top_value = self.op.repeat - 1
11372
      for i in range(self.op.repeat):
11373
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11374
        self._TestDelay()
11375

    
11376

    
11377
class LUTestJqueue(NoHooksLU):
11378
  """Utility LU to test some aspects of the job queue.
11379

11380
  """
11381
  REQ_BGL = False
11382

    
11383
  # Must be lower than default timeout for WaitForJobChange to see whether it
11384
  # notices changed jobs
11385
  _CLIENT_CONNECT_TIMEOUT = 20.0
11386
  _CLIENT_CONFIRM_TIMEOUT = 60.0
11387

    
11388
  @classmethod
11389
  def _NotifyUsingSocket(cls, cb, errcls):
11390
    """Opens a Unix socket and waits for another program to connect.
11391

11392
    @type cb: callable
11393
    @param cb: Callback to send socket name to client
11394
    @type errcls: class
11395
    @param errcls: Exception class to use for errors
11396

11397
    """
11398
    # Using a temporary directory as there's no easy way to create temporary
11399
    # sockets without writing a custom loop around tempfile.mktemp and
11400
    # socket.bind
11401
    tmpdir = tempfile.mkdtemp()
11402
    try:
11403
      tmpsock = utils.PathJoin(tmpdir, "sock")
11404

    
11405
      logging.debug("Creating temporary socket at %s", tmpsock)
11406
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11407
      try:
11408
        sock.bind(tmpsock)
11409
        sock.listen(1)
11410

    
11411
        # Send details to client
11412
        cb(tmpsock)
11413

    
11414
        # Wait for client to connect before continuing
11415
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11416
        try:
11417
          (conn, _) = sock.accept()
11418
        except socket.error, err:
11419
          raise errcls("Client didn't connect in time (%s)" % err)
11420
      finally:
11421
        sock.close()
11422
    finally:
11423
      # Remove as soon as client is connected
11424
      shutil.rmtree(tmpdir)
11425

    
11426
    # Wait for client to close
11427
    try:
11428
      try:
11429
        # pylint: disable-msg=E1101
11430
        # Instance of '_socketobject' has no ... member
11431
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11432
        conn.recv(1)
11433
      except socket.error, err:
11434
        raise errcls("Client failed to confirm notification (%s)" % err)
11435
    finally:
11436
      conn.close()
11437

    
11438
  def _SendNotification(self, test, arg, sockname):
11439
    """Sends a notification to the client.
11440

11441
    @type test: string
11442
    @param test: Test name
11443
    @param arg: Test argument (depends on test)
11444
    @type sockname: string
11445
    @param sockname: Socket path
11446

11447
    """
11448
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11449

    
11450
  def _Notify(self, prereq, test, arg):
11451
    """Notifies the client of a test.
11452

11453
    @type prereq: bool
11454
    @param prereq: Whether this is a prereq-phase test
11455
    @type test: string
11456
    @param test: Test name
11457
    @param arg: Test argument (depends on test)
11458

11459
    """
11460
    if prereq:
11461
      errcls = errors.OpPrereqError
11462
    else:
11463
      errcls = errors.OpExecError
11464

    
11465
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11466
                                                  test, arg),
11467
                                   errcls)
11468

    
11469
  def CheckArguments(self):
11470
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11471
    self.expandnames_calls = 0
11472

    
11473
  def ExpandNames(self):
11474
    checkargs_calls = getattr(self, "checkargs_calls", 0)
11475
    if checkargs_calls < 1:
11476
      raise errors.ProgrammerError("CheckArguments was not called")
11477

    
11478
    self.expandnames_calls += 1
11479

    
11480
    if self.op.notify_waitlock:
11481
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
11482

    
11483
    self.LogInfo("Expanding names")
11484

    
11485
    # Get lock on master node (just to get a lock, not for a particular reason)
11486
    self.needed_locks = {
11487
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11488
      }
11489

    
11490
  def Exec(self, feedback_fn):
11491
    if self.expandnames_calls < 1:
11492
      raise errors.ProgrammerError("ExpandNames was not called")
11493

    
11494
    if self.op.notify_exec:
11495
      self._Notify(False, constants.JQT_EXEC, None)
11496

    
11497
    self.LogInfo("Executing")
11498

    
11499
    if self.op.log_messages:
11500
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11501
      for idx, msg in enumerate(self.op.log_messages):
11502
        self.LogInfo("Sending log message %s", idx + 1)
11503
        feedback_fn(constants.JQT_MSGPREFIX + msg)
11504
        # Report how many test messages have been sent
11505
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11506

    
11507
    if self.op.fail:
11508
      raise errors.OpExecError("Opcode failure was requested")
11509

    
11510
    return True
11511

    
11512

    
11513
class IAllocator(object):
11514
  """IAllocator framework.
11515

11516
  An IAllocator instance has three sets of attributes:
11517
    - cfg that is needed to query the cluster
11518
    - input data (all members of the _KEYS class attribute are required)
11519
    - four buffer attributes (in|out_data|text), that represent the
11520
      input (to the external script) in text and data structure format,
11521
      and the output from it, again in two formats
11522
    - the result variables from the script (success, info, nodes) for
11523
      easy usage
11524

11525
  """
11526
  # pylint: disable-msg=R0902
11527
  # lots of instance attributes
11528
  _ALLO_KEYS = [
11529
    "name", "mem_size", "disks", "disk_template",
11530
    "os", "tags", "nics", "vcpus", "hypervisor",
11531
    ]
11532
  _RELO_KEYS = [
11533
    "name", "relocate_from",
11534
    ]
11535
  _EVAC_KEYS = [
11536
    "evac_nodes",
11537
    ]
11538

    
11539
  def __init__(self, cfg, rpc, mode, **kwargs):
11540
    self.cfg = cfg
11541
    self.rpc = rpc
11542
    # init buffer variables
11543
    self.in_text = self.out_text = self.in_data = self.out_data = None
11544
    # init all input fields so that pylint is happy
11545
    self.mode = mode
11546
    self.mem_size = self.disks = self.disk_template = None
11547
    self.os = self.tags = self.nics = self.vcpus = None
11548
    self.hypervisor = None
11549
    self.relocate_from = None
11550
    self.name = None
11551
    self.evac_nodes = None
11552
    # computed fields
11553
    self.required_nodes = None
11554
    # init result fields
11555
    self.success = self.info = self.result = None
11556
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11557
      keyset = self._ALLO_KEYS
11558
      fn = self._AddNewInstance
11559
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11560
      keyset = self._RELO_KEYS
11561
      fn = self._AddRelocateInstance
11562
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11563
      keyset = self._EVAC_KEYS
11564
      fn = self._AddEvacuateNodes
11565
    else:
11566
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11567
                                   " IAllocator" % self.mode)
11568
    for key in kwargs:
11569
      if key not in keyset:
11570
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
11571
                                     " IAllocator" % key)
11572
      setattr(self, key, kwargs[key])
11573

    
11574
    for key in keyset:
11575
      if key not in kwargs:
11576
        raise errors.ProgrammerError("Missing input parameter '%s' to"
11577
                                     " IAllocator" % key)
11578
    self._BuildInputData(fn)
11579

    
11580
  def _ComputeClusterData(self):
11581
    """Compute the generic allocator input data.
11582

11583
    This is the data that is independent of the actual operation.
11584

11585
    """
11586
    cfg = self.cfg
11587
    cluster_info = cfg.GetClusterInfo()
11588
    # cluster data
11589
    data = {
11590
      "version": constants.IALLOCATOR_VERSION,
11591
      "cluster_name": cfg.GetClusterName(),
11592
      "cluster_tags": list(cluster_info.GetTags()),
11593
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11594
      # we don't have job IDs
11595
      }
11596
    ninfo = cfg.GetAllNodesInfo()
11597
    iinfo = cfg.GetAllInstancesInfo().values()
11598
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11599

    
11600
    # node data
11601
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
11602

    
11603
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11604
      hypervisor_name = self.hypervisor
11605
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11606
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11607
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11608
      hypervisor_name = cluster_info.enabled_hypervisors[0]
11609

    
11610
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11611
                                        hypervisor_name)
11612
    node_iinfo = \
11613
      self.rpc.call_all_instances_info(node_list,
11614
                                       cluster_info.enabled_hypervisors)
11615

    
11616
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11617

    
11618
    config_ndata = self._ComputeBasicNodeData(ninfo)
11619
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11620
                                                 i_list, config_ndata)
11621
    assert len(data["nodes"]) == len(ninfo), \
11622
        "Incomplete node data computed"
11623

    
11624
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11625

    
11626
    self.in_data = data
11627

    
11628
  @staticmethod
11629
  def _ComputeNodeGroupData(cfg):
11630
    """Compute node groups data.
11631

11632
    """
11633
    ng = {}
11634
    for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11635
      ng[guuid] = {
11636
        "name": gdata.name,
11637
        "alloc_policy": gdata.alloc_policy,
11638
        }
11639
    return ng
11640

    
11641
  @staticmethod
11642
  def _ComputeBasicNodeData(node_cfg):
11643
    """Compute global node data.
11644

11645
    @rtype: dict
11646
    @returns: a dict of name: (node dict, node config)
11647

11648
    """
11649
    node_results = {}
11650
    for ninfo in node_cfg.values():
11651
      # fill in static (config-based) values
11652
      pnr = {
11653
        "tags": list(ninfo.GetTags()),
11654
        "primary_ip": ninfo.primary_ip,
11655
        "secondary_ip": ninfo.secondary_ip,
11656
        "offline": ninfo.offline,
11657
        "drained": ninfo.drained,
11658
        "master_candidate": ninfo.master_candidate,
11659
        "group": ninfo.group,
11660
        "master_capable": ninfo.master_capable,
11661
        "vm_capable": ninfo.vm_capable,
11662
        }
11663

    
11664
      node_results[ninfo.name] = pnr
11665

    
11666
    return node_results
11667

    
11668
  @staticmethod
11669
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11670
                              node_results):
11671
    """Compute global node data.
11672

11673
    @param node_results: the basic node structures as filled from the config
11674

11675
    """
11676
    # make a copy of the current dict
11677
    node_results = dict(node_results)
11678
    for nname, nresult in node_data.items():
11679
      assert nname in node_results, "Missing basic data for node %s" % nname
11680
      ninfo = node_cfg[nname]
11681

    
11682
      if not (ninfo.offline or ninfo.drained):
11683
        nresult.Raise("Can't get data for node %s" % nname)
11684
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11685
                                nname)
11686
        remote_info = nresult.payload
11687

    
11688
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
11689
                     'vg_size', 'vg_free', 'cpu_total']:
11690
          if attr not in remote_info:
11691
            raise errors.OpExecError("Node '%s' didn't return attribute"
11692
                                     " '%s'" % (nname, attr))
11693
          if not isinstance(remote_info[attr], int):
11694
            raise errors.OpExecError("Node '%s' returned invalid value"
11695
                                     " for '%s': %s" %
11696
                                     (nname, attr, remote_info[attr]))
11697
        # compute memory used by primary instances
11698
        i_p_mem = i_p_up_mem = 0
11699
        for iinfo, beinfo in i_list:
11700
          if iinfo.primary_node == nname:
11701
            i_p_mem += beinfo[constants.BE_MEMORY]
11702
            if iinfo.name not in node_iinfo[nname].payload:
11703
              i_used_mem = 0
11704
            else:
11705
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11706
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11707
            remote_info['memory_free'] -= max(0, i_mem_diff)
11708

    
11709
            if iinfo.admin_up:
11710
              i_p_up_mem += beinfo[constants.BE_MEMORY]
11711

    
11712
        # compute memory used by instances
11713
        pnr_dyn = {
11714
          "total_memory": remote_info['memory_total'],
11715
          "reserved_memory": remote_info['memory_dom0'],
11716
          "free_memory": remote_info['memory_free'],
11717
          "total_disk": remote_info['vg_size'],
11718
          "free_disk": remote_info['vg_free'],
11719
          "total_cpus": remote_info['cpu_total'],
11720
          "i_pri_memory": i_p_mem,
11721
          "i_pri_up_memory": i_p_up_mem,
11722
          }
11723
        pnr_dyn.update(node_results[nname])
11724
        node_results[nname] = pnr_dyn
11725

    
11726
    return node_results
11727

    
11728
  @staticmethod
11729
  def _ComputeInstanceData(cluster_info, i_list):
11730
    """Compute global instance data.
11731

11732
    """
11733
    instance_data = {}
11734
    for iinfo, beinfo in i_list:
11735
      nic_data = []
11736
      for nic in iinfo.nics:
11737
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11738
        nic_dict = {"mac": nic.mac,
11739
                    "ip": nic.ip,
11740
                    "mode": filled_params[constants.NIC_MODE],
11741
                    "link": filled_params[constants.NIC_LINK],
11742
                   }
11743
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11744
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11745
        nic_data.append(nic_dict)
11746
      pir = {
11747
        "tags": list(iinfo.GetTags()),
11748
        "admin_up": iinfo.admin_up,
11749
        "vcpus": beinfo[constants.BE_VCPUS],
11750
        "memory": beinfo[constants.BE_MEMORY],
11751
        "os": iinfo.os,
11752
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11753
        "nics": nic_data,
11754
        "disks": [{constants.IDISK_SIZE: dsk.size,
11755
                   constants.IDISK_MODE: dsk.mode}
11756
                  for dsk in iinfo.disks],
11757
        "disk_template": iinfo.disk_template,
11758
        "hypervisor": iinfo.hypervisor,
11759
        }
11760
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11761
                                                 pir["disks"])
11762
      instance_data[iinfo.name] = pir
11763

    
11764
    return instance_data
11765

    
11766
  def _AddNewInstance(self):
11767
    """Add new instance data to allocator structure.
11768

11769
    This in combination with _AllocatorGetClusterData will create the
11770
    correct structure needed as input for the allocator.
11771

11772
    The checks for the completeness of the opcode must have already been
11773
    done.
11774

11775
    """
11776
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11777

    
11778
    if self.disk_template in constants.DTS_INT_MIRROR:
11779
      self.required_nodes = 2
11780
    else:
11781
      self.required_nodes = 1
11782
    request = {
11783
      "name": self.name,
11784
      "disk_template": self.disk_template,
11785
      "tags": self.tags,
11786
      "os": self.os,
11787
      "vcpus": self.vcpus,
11788
      "memory": self.mem_size,
11789
      "disks": self.disks,
11790
      "disk_space_total": disk_space,
11791
      "nics": self.nics,
11792
      "required_nodes": self.required_nodes,
11793
      }
11794
    return request
11795

    
11796
  def _AddRelocateInstance(self):
11797
    """Add relocate instance data to allocator structure.
11798

11799
    This in combination with _IAllocatorGetClusterData will create the
11800
    correct structure needed as input for the allocator.
11801

11802
    The checks for the completeness of the opcode must have already been
11803
    done.
11804

11805
    """
11806
    instance = self.cfg.GetInstanceInfo(self.name)
11807
    if instance is None:
11808
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
11809
                                   " IAllocator" % self.name)
11810

    
11811
    if instance.disk_template not in constants.DTS_MIRRORED:
11812
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11813
                                 errors.ECODE_INVAL)
11814

    
11815
    if instance.disk_template in constants.DTS_INT_MIRROR and \
11816
        len(instance.secondary_nodes) != 1:
11817
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
11818
                                 errors.ECODE_STATE)
11819

    
11820
    self.required_nodes = 1
11821
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
11822
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11823

    
11824
    request = {
11825
      "name": self.name,
11826
      "disk_space_total": disk_space,
11827
      "required_nodes": self.required_nodes,
11828
      "relocate_from": self.relocate_from,
11829
      }
11830
    return request
11831

    
11832
  def _AddEvacuateNodes(self):
11833
    """Add evacuate nodes data to allocator structure.
11834

11835
    """
11836
    request = {
11837
      "evac_nodes": self.evac_nodes
11838
      }
11839
    return request
11840

    
11841
  def _BuildInputData(self, fn):
11842
    """Build input data structures.
11843

11844
    """
11845
    self._ComputeClusterData()
11846

    
11847
    request = fn()
11848
    request["type"] = self.mode
11849
    self.in_data["request"] = request
11850

    
11851
    self.in_text = serializer.Dump(self.in_data)
11852

    
11853
  def Run(self, name, validate=True, call_fn=None):
11854
    """Run an instance allocator and return the results.
11855

11856
    """
11857
    if call_fn is None:
11858
      call_fn = self.rpc.call_iallocator_runner
11859

    
11860
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11861
    result.Raise("Failure while running the iallocator script")
11862

    
11863
    self.out_text = result.payload
11864
    if validate:
11865
      self._ValidateResult()
11866

    
11867
  def _ValidateResult(self):
11868
    """Process the allocator results.
11869

11870
    This will process and if successful save the result in
11871
    self.out_data and the other parameters.
11872

11873
    """
11874
    try:
11875
      rdict = serializer.Load(self.out_text)
11876
    except Exception, err:
11877
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11878

    
11879
    if not isinstance(rdict, dict):
11880
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
11881

    
11882
    # TODO: remove backwards compatiblity in later versions
11883
    if "nodes" in rdict and "result" not in rdict:
11884
      rdict["result"] = rdict["nodes"]
11885
      del rdict["nodes"]
11886

    
11887
    for key in "success", "info", "result":
11888
      if key not in rdict:
11889
        raise errors.OpExecError("Can't parse iallocator results:"
11890
                                 " missing key '%s'" % key)
11891
      setattr(self, key, rdict[key])
11892

    
11893
    if not isinstance(rdict["result"], list):
11894
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11895
                               " is not a list")
11896

    
11897
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
11898
      assert self.relocate_from is not None
11899
      assert self.required_nodes == 1
11900

    
11901
      node2group = dict((name, ndata["group"])
11902
                        for (name, ndata) in self.in_data["nodes"].items())
11903

    
11904
      fn = compat.partial(self._NodesToGroups, node2group,
11905
                          self.in_data["nodegroups"])
11906

    
11907
      request_groups = fn(self.relocate_from)
11908
      result_groups = fn(rdict["result"])
11909

    
11910
      if result_groups != request_groups:
11911
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
11912
                                 " differ from original groups (%s)" %
11913
                                 (utils.CommaJoin(result_groups),
11914
                                  utils.CommaJoin(request_groups)))
11915

    
11916
    self.out_data = rdict
11917

    
11918
  @staticmethod
11919
  def _NodesToGroups(node2group, groups, nodes):
11920
    """Returns a list of unique group names for a list of nodes.
11921

11922
    @type node2group: dict
11923
    @param node2group: Map from node name to group UUID
11924
    @type groups: dict
11925
    @param groups: Group information
11926
    @type nodes: list
11927
    @param nodes: Node names
11928

11929
    """
11930
    result = set()
11931

    
11932
    for node in nodes:
11933
      try:
11934
        group_uuid = node2group[node]
11935
      except KeyError:
11936
        # Ignore unknown node
11937
        pass
11938
      else:
11939
        try:
11940
          group = groups[group_uuid]
11941
        except KeyError:
11942
          # Can't find group, let's use UUID
11943
          group_name = group_uuid
11944
        else:
11945
          group_name = group["name"]
11946

    
11947
        result.add(group_name)
11948

    
11949
    return sorted(result)
11950

    
11951

    
11952
class LUTestAllocator(NoHooksLU):
11953
  """Run allocator tests.
11954

11955
  This LU runs the allocator tests
11956

11957
  """
11958
  def CheckPrereq(self):
11959
    """Check prerequisites.
11960

11961
    This checks the opcode parameters depending on the director and mode test.
11962

11963
    """
11964
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11965
      for attr in ["mem_size", "disks", "disk_template",
11966
                   "os", "tags", "nics", "vcpus"]:
11967
        if not hasattr(self.op, attr):
11968
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11969
                                     attr, errors.ECODE_INVAL)
11970
      iname = self.cfg.ExpandInstanceName(self.op.name)
11971
      if iname is not None:
11972
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11973
                                   iname, errors.ECODE_EXISTS)
11974
      if not isinstance(self.op.nics, list):
11975
        raise errors.OpPrereqError("Invalid parameter 'nics'",
11976
                                   errors.ECODE_INVAL)
11977
      if not isinstance(self.op.disks, list):
11978
        raise errors.OpPrereqError("Invalid parameter 'disks'",
11979
                                   errors.ECODE_INVAL)
11980
      for row in self.op.disks:
11981
        if (not isinstance(row, dict) or
11982
            "size" not in row or
11983
            not isinstance(row["size"], int) or
11984
            "mode" not in row or
11985
            row["mode"] not in ['r', 'w']):
11986
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
11987
                                     " parameter", errors.ECODE_INVAL)
11988
      if self.op.hypervisor is None:
11989
        self.op.hypervisor = self.cfg.GetHypervisorType()
11990
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11991
      fname = _ExpandInstanceName(self.cfg, self.op.name)
11992
      self.op.name = fname
11993
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11994
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11995
      if not hasattr(self.op, "evac_nodes"):
11996
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11997
                                   " opcode input", errors.ECODE_INVAL)
11998
    else:
11999
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12000
                                 self.op.mode, errors.ECODE_INVAL)
12001

    
12002
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12003
      if self.op.allocator is None:
12004
        raise errors.OpPrereqError("Missing allocator name",
12005
                                   errors.ECODE_INVAL)
12006
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12007
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
12008
                                 self.op.direction, errors.ECODE_INVAL)
12009

    
12010
  def Exec(self, feedback_fn):
12011
    """Run the allocator test.
12012

12013
    """
12014
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12015
      ial = IAllocator(self.cfg, self.rpc,
12016
                       mode=self.op.mode,
12017
                       name=self.op.name,
12018
                       mem_size=self.op.mem_size,
12019
                       disks=self.op.disks,
12020
                       disk_template=self.op.disk_template,
12021
                       os=self.op.os,
12022
                       tags=self.op.tags,
12023
                       nics=self.op.nics,
12024
                       vcpus=self.op.vcpus,
12025
                       hypervisor=self.op.hypervisor,
12026
                       )
12027
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12028
      ial = IAllocator(self.cfg, self.rpc,
12029
                       mode=self.op.mode,
12030
                       name=self.op.name,
12031
                       relocate_from=list(self.relocate_from),
12032
                       )
12033
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12034
      ial = IAllocator(self.cfg, self.rpc,
12035
                       mode=self.op.mode,
12036
                       evac_nodes=self.op.evac_nodes)
12037
    else:
12038
      raise errors.ProgrammerError("Uncatched mode %s in"
12039
                                   " LUTestAllocator.Exec", self.op.mode)
12040

    
12041
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
12042
      result = ial.in_text
12043
    else:
12044
      ial.Run(self.op.allocator, validate=False)
12045
      result = ial.out_text
12046
    return result
12047

    
12048

    
12049
#: Query type implementations
12050
_QUERY_IMPL = {
12051
  constants.QR_INSTANCE: _InstanceQuery,
12052
  constants.QR_NODE: _NodeQuery,
12053
  constants.QR_GROUP: _GroupQuery,
12054
  constants.QR_OS: _OsQuery,
12055
  }
12056

    
12057
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12058

    
12059

    
12060
def _GetQueryImplementation(name):
12061
  """Returns the implemtnation for a query type.
12062

12063
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
12064

12065
  """
12066
  try:
12067
    return _QUERY_IMPL[name]
12068
  except KeyError:
12069
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12070
                               errors.ECODE_INVAL)