Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ aef59ae7

History | View | Annotate | Download (419.4 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import query
58
from ganeti import qlang
59
from ganeti import opcodes
60

    
61
import ganeti.masterd.instance # pylint: disable-msg=W0611
62

    
63

    
64
def _SupportsOob(cfg, node):
65
  """Tells if node supports OOB.
66

67
  @type cfg: L{config.ConfigWriter}
68
  @param cfg: The cluster configuration
69
  @type node: L{objects.Node}
70
  @param node: The node
71
  @return: The OOB script if supported or an empty string otherwise
72

73
  """
74
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
75

    
76

    
77
class ResultWithJobs:
78
  """Data container for LU results with jobs.
79

80
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
81
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
82
  contained in the C{jobs} attribute and include the job IDs in the opcode
83
  result.
84

85
  """
86
  def __init__(self, jobs, **kwargs):
87
    """Initializes this class.
88

89
    Additional return values can be specified as keyword arguments.
90

91
    @type jobs: list of lists of L{opcode.OpCode}
92
    @param jobs: A list of lists of opcode objects
93

94
    """
95
    self.jobs = jobs
96
    self.other = kwargs
97

    
98

    
99
class LogicalUnit(object):
100
  """Logical Unit base class.
101

102
  Subclasses must follow these rules:
103
    - implement ExpandNames
104
    - implement CheckPrereq (except when tasklets are used)
105
    - implement Exec (except when tasklets are used)
106
    - implement BuildHooksEnv
107
    - implement BuildHooksNodes
108
    - redefine HPATH and HTYPE
109
    - optionally redefine their run requirements:
110
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
111

112
  Note that all commands require root permissions.
113

114
  @ivar dry_run_result: the value (if any) that will be returned to the caller
115
      in dry-run mode (signalled by opcode dry_run parameter)
116

117
  """
118
  HPATH = None
119
  HTYPE = None
120
  REQ_BGL = True
121

    
122
  def __init__(self, processor, op, context, rpc):
123
    """Constructor for LogicalUnit.
124

125
    This needs to be overridden in derived classes in order to check op
126
    validity.
127

128
    """
129
    self.proc = processor
130
    self.op = op
131
    self.cfg = context.cfg
132
    self.context = context
133
    self.rpc = rpc
134
    # Dicts used to declare locking needs to mcpu
135
    self.needed_locks = None
136
    self.acquired_locks = {}
137
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
138
    self.add_locks = {}
139
    self.remove_locks = {}
140
    # Used to force good behavior when calling helper functions
141
    self.recalculate_locks = {}
142
    self.__ssh = None
143
    # logging
144
    self.Log = processor.Log # pylint: disable-msg=C0103
145
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
146
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
147
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
148
    # support for dry-run
149
    self.dry_run_result = None
150
    # support for generic debug attribute
151
    if (not hasattr(self.op, "debug_level") or
152
        not isinstance(self.op.debug_level, int)):
153
      self.op.debug_level = 0
154

    
155
    # Tasklets
156
    self.tasklets = None
157

    
158
    # Validate opcode parameters and set defaults
159
    self.op.Validate(True)
160

    
161
    self.CheckArguments()
162

    
163
  def __GetSSH(self):
164
    """Returns the SshRunner object
165

166
    """
167
    if not self.__ssh:
168
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
169
    return self.__ssh
170

    
171
  ssh = property(fget=__GetSSH)
172

    
173
  def CheckArguments(self):
174
    """Check syntactic validity for the opcode arguments.
175

176
    This method is for doing a simple syntactic check and ensure
177
    validity of opcode parameters, without any cluster-related
178
    checks. While the same can be accomplished in ExpandNames and/or
179
    CheckPrereq, doing these separate is better because:
180

181
      - ExpandNames is left as as purely a lock-related function
182
      - CheckPrereq is run after we have acquired locks (and possible
183
        waited for them)
184

185
    The function is allowed to change the self.op attribute so that
186
    later methods can no longer worry about missing parameters.
187

188
    """
189
    pass
190

    
191
  def ExpandNames(self):
192
    """Expand names for this LU.
193

194
    This method is called before starting to execute the opcode, and it should
195
    update all the parameters of the opcode to their canonical form (e.g. a
196
    short node name must be fully expanded after this method has successfully
197
    completed). This way locking, hooks, logging, etc. can work correctly.
198

199
    LUs which implement this method must also populate the self.needed_locks
200
    member, as a dict with lock levels as keys, and a list of needed lock names
201
    as values. Rules:
202

203
      - use an empty dict if you don't need any lock
204
      - if you don't need any lock at a particular level omit that level
205
      - don't put anything for the BGL level
206
      - if you want all locks at a level use locking.ALL_SET as a value
207

208
    If you need to share locks (rather than acquire them exclusively) at one
209
    level you can modify self.share_locks, setting a true value (usually 1) for
210
    that level. By default locks are not shared.
211

212
    This function can also define a list of tasklets, which then will be
213
    executed in order instead of the usual LU-level CheckPrereq and Exec
214
    functions, if those are not defined by the LU.
215

216
    Examples::
217

218
      # Acquire all nodes and one instance
219
      self.needed_locks = {
220
        locking.LEVEL_NODE: locking.ALL_SET,
221
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
222
      }
223
      # Acquire just two nodes
224
      self.needed_locks = {
225
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
226
      }
227
      # Acquire no locks
228
      self.needed_locks = {} # No, you can't leave it to the default value None
229

230
    """
231
    # The implementation of this method is mandatory only if the new LU is
232
    # concurrent, so that old LUs don't need to be changed all at the same
233
    # time.
234
    if self.REQ_BGL:
235
      self.needed_locks = {} # Exclusive LUs don't need locks.
236
    else:
237
      raise NotImplementedError
238

    
239
  def DeclareLocks(self, level):
240
    """Declare LU locking needs for a level
241

242
    While most LUs can just declare their locking needs at ExpandNames time,
243
    sometimes there's the need to calculate some locks after having acquired
244
    the ones before. This function is called just before acquiring locks at a
245
    particular level, but after acquiring the ones at lower levels, and permits
246
    such calculations. It can be used to modify self.needed_locks, and by
247
    default it does nothing.
248

249
    This function is only called if you have something already set in
250
    self.needed_locks for the level.
251

252
    @param level: Locking level which is going to be locked
253
    @type level: member of ganeti.locking.LEVELS
254

255
    """
256

    
257
  def CheckPrereq(self):
258
    """Check prerequisites for this LU.
259

260
    This method should check that the prerequisites for the execution
261
    of this LU are fulfilled. It can do internode communication, but
262
    it should be idempotent - no cluster or system changes are
263
    allowed.
264

265
    The method should raise errors.OpPrereqError in case something is
266
    not fulfilled. Its return value is ignored.
267

268
    This method should also update all the parameters of the opcode to
269
    their canonical form if it hasn't been done by ExpandNames before.
270

271
    """
272
    if self.tasklets is not None:
273
      for (idx, tl) in enumerate(self.tasklets):
274
        logging.debug("Checking prerequisites for tasklet %s/%s",
275
                      idx + 1, len(self.tasklets))
276
        tl.CheckPrereq()
277
    else:
278
      pass
279

    
280
  def Exec(self, feedback_fn):
281
    """Execute the LU.
282

283
    This method should implement the actual work. It should raise
284
    errors.OpExecError for failures that are somewhat dealt with in
285
    code, or expected.
286

287
    """
288
    if self.tasklets is not None:
289
      for (idx, tl) in enumerate(self.tasklets):
290
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
291
        tl.Exec(feedback_fn)
292
    else:
293
      raise NotImplementedError
294

    
295
  def BuildHooksEnv(self):
296
    """Build hooks environment for this LU.
297

298
    @rtype: dict
299
    @return: Dictionary containing the environment that will be used for
300
      running the hooks for this LU. The keys of the dict must not be prefixed
301
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
302
      will extend the environment with additional variables. If no environment
303
      should be defined, an empty dictionary should be returned (not C{None}).
304
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
305
      will not be called.
306

307
    """
308
    raise NotImplementedError
309

    
310
  def BuildHooksNodes(self):
311
    """Build list of nodes to run LU's hooks.
312

313
    @rtype: tuple; (list, list)
314
    @return: Tuple containing a list of node names on which the hook
315
      should run before the execution and a list of node names on which the
316
      hook should run after the execution. No nodes should be returned as an
317
      empty list (and not None).
318
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
319
      will not be called.
320

321
    """
322
    raise NotImplementedError
323

    
324
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
325
    """Notify the LU about the results of its hooks.
326

327
    This method is called every time a hooks phase is executed, and notifies
328
    the Logical Unit about the hooks' result. The LU can then use it to alter
329
    its result based on the hooks.  By default the method does nothing and the
330
    previous result is passed back unchanged but any LU can define it if it
331
    wants to use the local cluster hook-scripts somehow.
332

333
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
334
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
335
    @param hook_results: the results of the multi-node hooks rpc call
336
    @param feedback_fn: function used send feedback back to the caller
337
    @param lu_result: the previous Exec result this LU had, or None
338
        in the PRE phase
339
    @return: the new Exec result, based on the previous result
340
        and hook results
341

342
    """
343
    # API must be kept, thus we ignore the unused argument and could
344
    # be a function warnings
345
    # pylint: disable-msg=W0613,R0201
346
    return lu_result
347

    
348
  def _ExpandAndLockInstance(self):
349
    """Helper function to expand and lock an instance.
350

351
    Many LUs that work on an instance take its name in self.op.instance_name
352
    and need to expand it and then declare the expanded name for locking. This
353
    function does it, and then updates self.op.instance_name to the expanded
354
    name. It also initializes needed_locks as a dict, if this hasn't been done
355
    before.
356

357
    """
358
    if self.needed_locks is None:
359
      self.needed_locks = {}
360
    else:
361
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
362
        "_ExpandAndLockInstance called with instance-level locks set"
363
    self.op.instance_name = _ExpandInstanceName(self.cfg,
364
                                                self.op.instance_name)
365
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
366

    
367
  def _LockInstancesNodes(self, primary_only=False):
368
    """Helper function to declare instances' nodes for locking.
369

370
    This function should be called after locking one or more instances to lock
371
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
372
    with all primary or secondary nodes for instances already locked and
373
    present in self.needed_locks[locking.LEVEL_INSTANCE].
374

375
    It should be called from DeclareLocks, and for safety only works if
376
    self.recalculate_locks[locking.LEVEL_NODE] is set.
377

378
    In the future it may grow parameters to just lock some instance's nodes, or
379
    to just lock primaries or secondary nodes, if needed.
380

381
    If should be called in DeclareLocks in a way similar to::
382

383
      if level == locking.LEVEL_NODE:
384
        self._LockInstancesNodes()
385

386
    @type primary_only: boolean
387
    @param primary_only: only lock primary nodes of locked instances
388

389
    """
390
    assert locking.LEVEL_NODE in self.recalculate_locks, \
391
      "_LockInstancesNodes helper function called with no nodes to recalculate"
392

    
393
    # TODO: check if we're really been called with the instance locks held
394

    
395
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
396
    # future we might want to have different behaviors depending on the value
397
    # of self.recalculate_locks[locking.LEVEL_NODE]
398
    wanted_nodes = []
399
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
400
      instance = self.context.cfg.GetInstanceInfo(instance_name)
401
      wanted_nodes.append(instance.primary_node)
402
      if not primary_only:
403
        wanted_nodes.extend(instance.secondary_nodes)
404

    
405
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
406
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
407
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
408
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
409

    
410
    del self.recalculate_locks[locking.LEVEL_NODE]
411

    
412

    
413
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
414
  """Simple LU which runs no hooks.
415

416
  This LU is intended as a parent for other LogicalUnits which will
417
  run no hooks, in order to reduce duplicate code.
418

419
  """
420
  HPATH = None
421
  HTYPE = None
422

    
423
  def BuildHooksEnv(self):
424
    """Empty BuildHooksEnv for NoHooksLu.
425

426
    This just raises an error.
427

428
    """
429
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
430

    
431
  def BuildHooksNodes(self):
432
    """Empty BuildHooksNodes for NoHooksLU.
433

434
    """
435
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
436

    
437

    
438
class Tasklet:
439
  """Tasklet base class.
440

441
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
442
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
443
  tasklets know nothing about locks.
444

445
  Subclasses must follow these rules:
446
    - Implement CheckPrereq
447
    - Implement Exec
448

449
  """
450
  def __init__(self, lu):
451
    self.lu = lu
452

    
453
    # Shortcuts
454
    self.cfg = lu.cfg
455
    self.rpc = lu.rpc
456

    
457
  def CheckPrereq(self):
458
    """Check prerequisites for this tasklets.
459

460
    This method should check whether the prerequisites for the execution of
461
    this tasklet are fulfilled. It can do internode communication, but it
462
    should be idempotent - no cluster or system changes are allowed.
463

464
    The method should raise errors.OpPrereqError in case something is not
465
    fulfilled. Its return value is ignored.
466

467
    This method should also update all parameters to their canonical form if it
468
    hasn't been done before.
469

470
    """
471
    pass
472

    
473
  def Exec(self, feedback_fn):
474
    """Execute the tasklet.
475

476
    This method should implement the actual work. It should raise
477
    errors.OpExecError for failures that are somewhat dealt with in code, or
478
    expected.
479

480
    """
481
    raise NotImplementedError
482

    
483

    
484
class _QueryBase:
485
  """Base for query utility classes.
486

487
  """
488
  #: Attribute holding field definitions
489
  FIELDS = None
490

    
491
  def __init__(self, filter_, fields, use_locking):
492
    """Initializes this class.
493

494
    """
495
    self.use_locking = use_locking
496

    
497
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
498
                             namefield="name")
499
    self.requested_data = self.query.RequestedData()
500
    self.names = self.query.RequestedNames()
501

    
502
    # Sort only if no names were requested
503
    self.sort_by_name = not self.names
504

    
505
    self.do_locking = None
506
    self.wanted = None
507

    
508
  def _GetNames(self, lu, all_names, lock_level):
509
    """Helper function to determine names asked for in the query.
510

511
    """
512
    if self.do_locking:
513
      names = lu.acquired_locks[lock_level]
514
    else:
515
      names = all_names
516

    
517
    if self.wanted == locking.ALL_SET:
518
      assert not self.names
519
      # caller didn't specify names, so ordering is not important
520
      return utils.NiceSort(names)
521

    
522
    # caller specified names and we must keep the same order
523
    assert self.names
524
    assert not self.do_locking or lu.acquired_locks[lock_level]
525

    
526
    missing = set(self.wanted).difference(names)
527
    if missing:
528
      raise errors.OpExecError("Some items were removed before retrieving"
529
                               " their data: %s" % missing)
530

    
531
    # Return expanded names
532
    return self.wanted
533

    
534
  def ExpandNames(self, lu):
535
    """Expand names for this query.
536

537
    See L{LogicalUnit.ExpandNames}.
538

539
    """
540
    raise NotImplementedError()
541

    
542
  def DeclareLocks(self, lu, level):
543
    """Declare locks for this query.
544

545
    See L{LogicalUnit.DeclareLocks}.
546

547
    """
548
    raise NotImplementedError()
549

    
550
  def _GetQueryData(self, lu):
551
    """Collects all data for this query.
552

553
    @return: Query data object
554

555
    """
556
    raise NotImplementedError()
557

    
558
  def NewStyleQuery(self, lu):
559
    """Collect data and execute query.
560

561
    """
562
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
563
                                  sort_by_name=self.sort_by_name)
564

    
565
  def OldStyleQuery(self, lu):
566
    """Collect data and execute query.
567

568
    """
569
    return self.query.OldStyleQuery(self._GetQueryData(lu),
570
                                    sort_by_name=self.sort_by_name)
571

    
572

    
573
def _GetWantedNodes(lu, nodes):
574
  """Returns list of checked and expanded node names.
575

576
  @type lu: L{LogicalUnit}
577
  @param lu: the logical unit on whose behalf we execute
578
  @type nodes: list
579
  @param nodes: list of node names or None for all nodes
580
  @rtype: list
581
  @return: the list of nodes, sorted
582
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
583

584
  """
585
  if nodes:
586
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
587

    
588
  return utils.NiceSort(lu.cfg.GetNodeList())
589

    
590

    
591
def _GetWantedInstances(lu, instances):
592
  """Returns list of checked and expanded instance names.
593

594
  @type lu: L{LogicalUnit}
595
  @param lu: the logical unit on whose behalf we execute
596
  @type instances: list
597
  @param instances: list of instance names or None for all instances
598
  @rtype: list
599
  @return: the list of instances, sorted
600
  @raise errors.OpPrereqError: if the instances parameter is wrong type
601
  @raise errors.OpPrereqError: if any of the passed instances is not found
602

603
  """
604
  if instances:
605
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
606
  else:
607
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
608
  return wanted
609

    
610

    
611
def _GetUpdatedParams(old_params, update_dict,
612
                      use_default=True, use_none=False):
613
  """Return the new version of a parameter dictionary.
614

615
  @type old_params: dict
616
  @param old_params: old parameters
617
  @type update_dict: dict
618
  @param update_dict: dict containing new parameter values, or
619
      constants.VALUE_DEFAULT to reset the parameter to its default
620
      value
621
  @param use_default: boolean
622
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
623
      values as 'to be deleted' values
624
  @param use_none: boolean
625
  @type use_none: whether to recognise C{None} values as 'to be
626
      deleted' values
627
  @rtype: dict
628
  @return: the new parameter dictionary
629

630
  """
631
  params_copy = copy.deepcopy(old_params)
632
  for key, val in update_dict.iteritems():
633
    if ((use_default and val == constants.VALUE_DEFAULT) or
634
        (use_none and val is None)):
635
      try:
636
        del params_copy[key]
637
      except KeyError:
638
        pass
639
    else:
640
      params_copy[key] = val
641
  return params_copy
642

    
643

    
644
def _RunPostHook(lu, node_name):
645
  """Runs the post-hook for an opcode on a single node.
646

647
  """
648
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
649
  try:
650
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
651
  except:
652
    # pylint: disable-msg=W0702
653
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
654

    
655

    
656
def _CheckOutputFields(static, dynamic, selected):
657
  """Checks whether all selected fields are valid.
658

659
  @type static: L{utils.FieldSet}
660
  @param static: static fields set
661
  @type dynamic: L{utils.FieldSet}
662
  @param dynamic: dynamic fields set
663

664
  """
665
  f = utils.FieldSet()
666
  f.Extend(static)
667
  f.Extend(dynamic)
668

    
669
  delta = f.NonMatching(selected)
670
  if delta:
671
    raise errors.OpPrereqError("Unknown output fields selected: %s"
672
                               % ",".join(delta), errors.ECODE_INVAL)
673

    
674

    
675
def _CheckGlobalHvParams(params):
676
  """Validates that given hypervisor params are not global ones.
677

678
  This will ensure that instances don't get customised versions of
679
  global params.
680

681
  """
682
  used_globals = constants.HVC_GLOBALS.intersection(params)
683
  if used_globals:
684
    msg = ("The following hypervisor parameters are global and cannot"
685
           " be customized at instance level, please modify them at"
686
           " cluster level: %s" % utils.CommaJoin(used_globals))
687
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
688

    
689

    
690
def _CheckNodeOnline(lu, node, msg=None):
691
  """Ensure that a given node is online.
692

693
  @param lu: the LU on behalf of which we make the check
694
  @param node: the node to check
695
  @param msg: if passed, should be a message to replace the default one
696
  @raise errors.OpPrereqError: if the node is offline
697

698
  """
699
  if msg is None:
700
    msg = "Can't use offline node"
701
  if lu.cfg.GetNodeInfo(node).offline:
702
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
703

    
704

    
705
def _CheckNodeNotDrained(lu, node):
706
  """Ensure that a given node is not drained.
707

708
  @param lu: the LU on behalf of which we make the check
709
  @param node: the node to check
710
  @raise errors.OpPrereqError: if the node is drained
711

712
  """
713
  if lu.cfg.GetNodeInfo(node).drained:
714
    raise errors.OpPrereqError("Can't use drained node %s" % node,
715
                               errors.ECODE_STATE)
716

    
717

    
718
def _CheckNodeVmCapable(lu, node):
719
  """Ensure that a given node is vm capable.
720

721
  @param lu: the LU on behalf of which we make the check
722
  @param node: the node to check
723
  @raise errors.OpPrereqError: if the node is not vm capable
724

725
  """
726
  if not lu.cfg.GetNodeInfo(node).vm_capable:
727
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
728
                               errors.ECODE_STATE)
729

    
730

    
731
def _CheckNodeHasOS(lu, node, os_name, force_variant):
732
  """Ensure that a node supports a given OS.
733

734
  @param lu: the LU on behalf of which we make the check
735
  @param node: the node to check
736
  @param os_name: the OS to query about
737
  @param force_variant: whether to ignore variant errors
738
  @raise errors.OpPrereqError: if the node is not supporting the OS
739

740
  """
741
  result = lu.rpc.call_os_get(node, os_name)
742
  result.Raise("OS '%s' not in supported OS list for node %s" %
743
               (os_name, node),
744
               prereq=True, ecode=errors.ECODE_INVAL)
745
  if not force_variant:
746
    _CheckOSVariant(result.payload, os_name)
747

    
748

    
749
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
750
  """Ensure that a node has the given secondary ip.
751

752
  @type lu: L{LogicalUnit}
753
  @param lu: the LU on behalf of which we make the check
754
  @type node: string
755
  @param node: the node to check
756
  @type secondary_ip: string
757
  @param secondary_ip: the ip to check
758
  @type prereq: boolean
759
  @param prereq: whether to throw a prerequisite or an execute error
760
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
761
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
762

763
  """
764
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
765
  result.Raise("Failure checking secondary ip on node %s" % node,
766
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
767
  if not result.payload:
768
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
769
           " please fix and re-run this command" % secondary_ip)
770
    if prereq:
771
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
772
    else:
773
      raise errors.OpExecError(msg)
774

    
775

    
776
def _GetClusterDomainSecret():
777
  """Reads the cluster domain secret.
778

779
  """
780
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
781
                               strict=True)
782

    
783

    
784
def _CheckInstanceDown(lu, instance, reason):
785
  """Ensure that an instance is not running."""
786
  if instance.admin_up:
787
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
788
                               (instance.name, reason), errors.ECODE_STATE)
789

    
790
  pnode = instance.primary_node
791
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
792
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
793
              prereq=True, ecode=errors.ECODE_ENVIRON)
794

    
795
  if instance.name in ins_l.payload:
796
    raise errors.OpPrereqError("Instance %s is running, %s" %
797
                               (instance.name, reason), errors.ECODE_STATE)
798

    
799

    
800
def _ExpandItemName(fn, name, kind):
801
  """Expand an item name.
802

803
  @param fn: the function to use for expansion
804
  @param name: requested item name
805
  @param kind: text description ('Node' or 'Instance')
806
  @return: the resolved (full) name
807
  @raise errors.OpPrereqError: if the item is not found
808

809
  """
810
  full_name = fn(name)
811
  if full_name is None:
812
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
813
                               errors.ECODE_NOENT)
814
  return full_name
815

    
816

    
817
def _ExpandNodeName(cfg, name):
818
  """Wrapper over L{_ExpandItemName} for nodes."""
819
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
820

    
821

    
822
def _ExpandInstanceName(cfg, name):
823
  """Wrapper over L{_ExpandItemName} for instance."""
824
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
825

    
826

    
827
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
828
                          memory, vcpus, nics, disk_template, disks,
829
                          bep, hvp, hypervisor_name):
830
  """Builds instance related env variables for hooks
831

832
  This builds the hook environment from individual variables.
833

834
  @type name: string
835
  @param name: the name of the instance
836
  @type primary_node: string
837
  @param primary_node: the name of the instance's primary node
838
  @type secondary_nodes: list
839
  @param secondary_nodes: list of secondary nodes as strings
840
  @type os_type: string
841
  @param os_type: the name of the instance's OS
842
  @type status: boolean
843
  @param status: the should_run status of the instance
844
  @type memory: string
845
  @param memory: the memory size of the instance
846
  @type vcpus: string
847
  @param vcpus: the count of VCPUs the instance has
848
  @type nics: list
849
  @param nics: list of tuples (ip, mac, mode, link) representing
850
      the NICs the instance has
851
  @type disk_template: string
852
  @param disk_template: the disk template of the instance
853
  @type disks: list
854
  @param disks: the list of (size, mode) pairs
855
  @type bep: dict
856
  @param bep: the backend parameters for the instance
857
  @type hvp: dict
858
  @param hvp: the hypervisor parameters for the instance
859
  @type hypervisor_name: string
860
  @param hypervisor_name: the hypervisor for the instance
861
  @rtype: dict
862
  @return: the hook environment for this instance
863

864
  """
865
  if status:
866
    str_status = "up"
867
  else:
868
    str_status = "down"
869
  env = {
870
    "OP_TARGET": name,
871
    "INSTANCE_NAME": name,
872
    "INSTANCE_PRIMARY": primary_node,
873
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
874
    "INSTANCE_OS_TYPE": os_type,
875
    "INSTANCE_STATUS": str_status,
876
    "INSTANCE_MEMORY": memory,
877
    "INSTANCE_VCPUS": vcpus,
878
    "INSTANCE_DISK_TEMPLATE": disk_template,
879
    "INSTANCE_HYPERVISOR": hypervisor_name,
880
  }
881

    
882
  if nics:
883
    nic_count = len(nics)
884
    for idx, (ip, mac, mode, link) in enumerate(nics):
885
      if ip is None:
886
        ip = ""
887
      env["INSTANCE_NIC%d_IP" % idx] = ip
888
      env["INSTANCE_NIC%d_MAC" % idx] = mac
889
      env["INSTANCE_NIC%d_MODE" % idx] = mode
890
      env["INSTANCE_NIC%d_LINK" % idx] = link
891
      if mode == constants.NIC_MODE_BRIDGED:
892
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
893
  else:
894
    nic_count = 0
895

    
896
  env["INSTANCE_NIC_COUNT"] = nic_count
897

    
898
  if disks:
899
    disk_count = len(disks)
900
    for idx, (size, mode) in enumerate(disks):
901
      env["INSTANCE_DISK%d_SIZE" % idx] = size
902
      env["INSTANCE_DISK%d_MODE" % idx] = mode
903
  else:
904
    disk_count = 0
905

    
906
  env["INSTANCE_DISK_COUNT"] = disk_count
907

    
908
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
909
    for key, value in source.items():
910
      env["INSTANCE_%s_%s" % (kind, key)] = value
911

    
912
  return env
913

    
914

    
915
def _NICListToTuple(lu, nics):
916
  """Build a list of nic information tuples.
917

918
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
919
  value in LUInstanceQueryData.
920

921
  @type lu:  L{LogicalUnit}
922
  @param lu: the logical unit on whose behalf we execute
923
  @type nics: list of L{objects.NIC}
924
  @param nics: list of nics to convert to hooks tuples
925

926
  """
927
  hooks_nics = []
928
  cluster = lu.cfg.GetClusterInfo()
929
  for nic in nics:
930
    ip = nic.ip
931
    mac = nic.mac
932
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
933
    mode = filled_params[constants.NIC_MODE]
934
    link = filled_params[constants.NIC_LINK]
935
    hooks_nics.append((ip, mac, mode, link))
936
  return hooks_nics
937

    
938

    
939
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
940
  """Builds instance related env variables for hooks from an object.
941

942
  @type lu: L{LogicalUnit}
943
  @param lu: the logical unit on whose behalf we execute
944
  @type instance: L{objects.Instance}
945
  @param instance: the instance for which we should build the
946
      environment
947
  @type override: dict
948
  @param override: dictionary with key/values that will override
949
      our values
950
  @rtype: dict
951
  @return: the hook environment dictionary
952

953
  """
954
  cluster = lu.cfg.GetClusterInfo()
955
  bep = cluster.FillBE(instance)
956
  hvp = cluster.FillHV(instance)
957
  args = {
958
    'name': instance.name,
959
    'primary_node': instance.primary_node,
960
    'secondary_nodes': instance.secondary_nodes,
961
    'os_type': instance.os,
962
    'status': instance.admin_up,
963
    'memory': bep[constants.BE_MEMORY],
964
    'vcpus': bep[constants.BE_VCPUS],
965
    'nics': _NICListToTuple(lu, instance.nics),
966
    'disk_template': instance.disk_template,
967
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
968
    'bep': bep,
969
    'hvp': hvp,
970
    'hypervisor_name': instance.hypervisor,
971
  }
972
  if override:
973
    args.update(override)
974
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
975

    
976

    
977
def _AdjustCandidatePool(lu, exceptions):
978
  """Adjust the candidate pool after node operations.
979

980
  """
981
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
982
  if mod_list:
983
    lu.LogInfo("Promoted nodes to master candidate role: %s",
984
               utils.CommaJoin(node.name for node in mod_list))
985
    for name in mod_list:
986
      lu.context.ReaddNode(name)
987
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
988
  if mc_now > mc_max:
989
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
990
               (mc_now, mc_max))
991

    
992

    
993
def _DecideSelfPromotion(lu, exceptions=None):
994
  """Decide whether I should promote myself as a master candidate.
995

996
  """
997
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
998
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
999
  # the new node will increase mc_max with one, so:
1000
  mc_should = min(mc_should + 1, cp_size)
1001
  return mc_now < mc_should
1002

    
1003

    
1004
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1005
  """Check that the brigdes needed by a list of nics exist.
1006

1007
  """
1008
  cluster = lu.cfg.GetClusterInfo()
1009
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1010
  brlist = [params[constants.NIC_LINK] for params in paramslist
1011
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1012
  if brlist:
1013
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1014
    result.Raise("Error checking bridges on destination node '%s'" %
1015
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1016

    
1017

    
1018
def _CheckInstanceBridgesExist(lu, instance, node=None):
1019
  """Check that the brigdes needed by an instance exist.
1020

1021
  """
1022
  if node is None:
1023
    node = instance.primary_node
1024
  _CheckNicsBridgesExist(lu, instance.nics, node)
1025

    
1026

    
1027
def _CheckOSVariant(os_obj, name):
1028
  """Check whether an OS name conforms to the os variants specification.
1029

1030
  @type os_obj: L{objects.OS}
1031
  @param os_obj: OS object to check
1032
  @type name: string
1033
  @param name: OS name passed by the user, to check for validity
1034

1035
  """
1036
  if not os_obj.supported_variants:
1037
    return
1038
  variant = objects.OS.GetVariant(name)
1039
  if not variant:
1040
    raise errors.OpPrereqError("OS name must include a variant",
1041
                               errors.ECODE_INVAL)
1042

    
1043
  if variant not in os_obj.supported_variants:
1044
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1045

    
1046

    
1047
def _GetNodeInstancesInner(cfg, fn):
1048
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1049

    
1050

    
1051
def _GetNodeInstances(cfg, node_name):
1052
  """Returns a list of all primary and secondary instances on a node.
1053

1054
  """
1055

    
1056
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1057

    
1058

    
1059
def _GetNodePrimaryInstances(cfg, node_name):
1060
  """Returns primary instances on a node.
1061

1062
  """
1063
  return _GetNodeInstancesInner(cfg,
1064
                                lambda inst: node_name == inst.primary_node)
1065

    
1066

    
1067
def _GetNodeSecondaryInstances(cfg, node_name):
1068
  """Returns secondary instances on a node.
1069

1070
  """
1071
  return _GetNodeInstancesInner(cfg,
1072
                                lambda inst: node_name in inst.secondary_nodes)
1073

    
1074

    
1075
def _GetStorageTypeArgs(cfg, storage_type):
1076
  """Returns the arguments for a storage type.
1077

1078
  """
1079
  # Special case for file storage
1080
  if storage_type == constants.ST_FILE:
1081
    # storage.FileStorage wants a list of storage directories
1082
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1083

    
1084
  return []
1085

    
1086

    
1087
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1088
  faulty = []
1089

    
1090
  for dev in instance.disks:
1091
    cfg.SetDiskID(dev, node_name)
1092

    
1093
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1094
  result.Raise("Failed to get disk status from node %s" % node_name,
1095
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1096

    
1097
  for idx, bdev_status in enumerate(result.payload):
1098
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1099
      faulty.append(idx)
1100

    
1101
  return faulty
1102

    
1103

    
1104
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1105
  """Check the sanity of iallocator and node arguments and use the
1106
  cluster-wide iallocator if appropriate.
1107

1108
  Check that at most one of (iallocator, node) is specified. If none is
1109
  specified, then the LU's opcode's iallocator slot is filled with the
1110
  cluster-wide default iallocator.
1111

1112
  @type iallocator_slot: string
1113
  @param iallocator_slot: the name of the opcode iallocator slot
1114
  @type node_slot: string
1115
  @param node_slot: the name of the opcode target node slot
1116

1117
  """
1118
  node = getattr(lu.op, node_slot, None)
1119
  iallocator = getattr(lu.op, iallocator_slot, None)
1120

    
1121
  if node is not None and iallocator is not None:
1122
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1123
                               errors.ECODE_INVAL)
1124
  elif node is None and iallocator is None:
1125
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1126
    if default_iallocator:
1127
      setattr(lu.op, iallocator_slot, default_iallocator)
1128
    else:
1129
      raise errors.OpPrereqError("No iallocator or node given and no"
1130
                                 " cluster-wide default iallocator found."
1131
                                 " Please specify either an iallocator or a"
1132
                                 " node, or set a cluster-wide default"
1133
                                 " iallocator.")
1134

    
1135

    
1136
class LUClusterPostInit(LogicalUnit):
1137
  """Logical unit for running hooks after cluster initialization.
1138

1139
  """
1140
  HPATH = "cluster-init"
1141
  HTYPE = constants.HTYPE_CLUSTER
1142

    
1143
  def BuildHooksEnv(self):
1144
    """Build hooks env.
1145

1146
    """
1147
    return {
1148
      "OP_TARGET": self.cfg.GetClusterName(),
1149
      }
1150

    
1151
  def BuildHooksNodes(self):
1152
    """Build hooks nodes.
1153

1154
    """
1155
    return ([], [self.cfg.GetMasterNode()])
1156

    
1157
  def Exec(self, feedback_fn):
1158
    """Nothing to do.
1159

1160
    """
1161
    return True
1162

    
1163

    
1164
class LUClusterDestroy(LogicalUnit):
1165
  """Logical unit for destroying the cluster.
1166

1167
  """
1168
  HPATH = "cluster-destroy"
1169
  HTYPE = constants.HTYPE_CLUSTER
1170

    
1171
  def BuildHooksEnv(self):
1172
    """Build hooks env.
1173

1174
    """
1175
    return {
1176
      "OP_TARGET": self.cfg.GetClusterName(),
1177
      }
1178

    
1179
  def BuildHooksNodes(self):
1180
    """Build hooks nodes.
1181

1182
    """
1183
    return ([], [])
1184

    
1185
  def CheckPrereq(self):
1186
    """Check prerequisites.
1187

1188
    This checks whether the cluster is empty.
1189

1190
    Any errors are signaled by raising errors.OpPrereqError.
1191

1192
    """
1193
    master = self.cfg.GetMasterNode()
1194

    
1195
    nodelist = self.cfg.GetNodeList()
1196
    if len(nodelist) != 1 or nodelist[0] != master:
1197
      raise errors.OpPrereqError("There are still %d node(s) in"
1198
                                 " this cluster." % (len(nodelist) - 1),
1199
                                 errors.ECODE_INVAL)
1200
    instancelist = self.cfg.GetInstanceList()
1201
    if instancelist:
1202
      raise errors.OpPrereqError("There are still %d instance(s) in"
1203
                                 " this cluster." % len(instancelist),
1204
                                 errors.ECODE_INVAL)
1205

    
1206
  def Exec(self, feedback_fn):
1207
    """Destroys the cluster.
1208

1209
    """
1210
    master = self.cfg.GetMasterNode()
1211

    
1212
    # Run post hooks on master node before it's removed
1213
    _RunPostHook(self, master)
1214

    
1215
    result = self.rpc.call_node_stop_master(master, False)
1216
    result.Raise("Could not disable the master role")
1217

    
1218
    return master
1219

    
1220

    
1221
def _VerifyCertificate(filename):
1222
  """Verifies a certificate for LUClusterVerify.
1223

1224
  @type filename: string
1225
  @param filename: Path to PEM file
1226

1227
  """
1228
  try:
1229
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1230
                                           utils.ReadFile(filename))
1231
  except Exception, err: # pylint: disable-msg=W0703
1232
    return (LUClusterVerify.ETYPE_ERROR,
1233
            "Failed to load X509 certificate %s: %s" % (filename, err))
1234

    
1235
  (errcode, msg) = \
1236
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1237
                                constants.SSL_CERT_EXPIRATION_ERROR)
1238

    
1239
  if msg:
1240
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1241
  else:
1242
    fnamemsg = None
1243

    
1244
  if errcode is None:
1245
    return (None, fnamemsg)
1246
  elif errcode == utils.CERT_WARNING:
1247
    return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1248
  elif errcode == utils.CERT_ERROR:
1249
    return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1250

    
1251
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1252

    
1253

    
1254
class LUClusterVerify(LogicalUnit):
1255
  """Verifies the cluster status.
1256

1257
  """
1258
  HPATH = "cluster-verify"
1259
  HTYPE = constants.HTYPE_CLUSTER
1260
  REQ_BGL = False
1261

    
1262
  TCLUSTER = "cluster"
1263
  TNODE = "node"
1264
  TINSTANCE = "instance"
1265

    
1266
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1267
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1268
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1269
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1270
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1271
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1272
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1273
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1274
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1275
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1276
  ENODEDRBD = (TNODE, "ENODEDRBD")
1277
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1278
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1279
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1280
  ENODEHV = (TNODE, "ENODEHV")
1281
  ENODELVM = (TNODE, "ENODELVM")
1282
  ENODEN1 = (TNODE, "ENODEN1")
1283
  ENODENET = (TNODE, "ENODENET")
1284
  ENODEOS = (TNODE, "ENODEOS")
1285
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1286
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1287
  ENODERPC = (TNODE, "ENODERPC")
1288
  ENODESSH = (TNODE, "ENODESSH")
1289
  ENODEVERSION = (TNODE, "ENODEVERSION")
1290
  ENODESETUP = (TNODE, "ENODESETUP")
1291
  ENODETIME = (TNODE, "ENODETIME")
1292
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1293

    
1294
  ETYPE_FIELD = "code"
1295
  ETYPE_ERROR = "ERROR"
1296
  ETYPE_WARNING = "WARNING"
1297

    
1298
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1299

    
1300
  class NodeImage(object):
1301
    """A class representing the logical and physical status of a node.
1302

1303
    @type name: string
1304
    @ivar name: the node name to which this object refers
1305
    @ivar volumes: a structure as returned from
1306
        L{ganeti.backend.GetVolumeList} (runtime)
1307
    @ivar instances: a list of running instances (runtime)
1308
    @ivar pinst: list of configured primary instances (config)
1309
    @ivar sinst: list of configured secondary instances (config)
1310
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1311
        instances for which this node is secondary (config)
1312
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1313
    @ivar dfree: free disk, as reported by the node (runtime)
1314
    @ivar offline: the offline status (config)
1315
    @type rpc_fail: boolean
1316
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1317
        not whether the individual keys were correct) (runtime)
1318
    @type lvm_fail: boolean
1319
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1320
    @type hyp_fail: boolean
1321
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1322
    @type ghost: boolean
1323
    @ivar ghost: whether this is a known node or not (config)
1324
    @type os_fail: boolean
1325
    @ivar os_fail: whether the RPC call didn't return valid OS data
1326
    @type oslist: list
1327
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1328
    @type vm_capable: boolean
1329
    @ivar vm_capable: whether the node can host instances
1330

1331
    """
1332
    def __init__(self, offline=False, name=None, vm_capable=True):
1333
      self.name = name
1334
      self.volumes = {}
1335
      self.instances = []
1336
      self.pinst = []
1337
      self.sinst = []
1338
      self.sbp = {}
1339
      self.mfree = 0
1340
      self.dfree = 0
1341
      self.offline = offline
1342
      self.vm_capable = vm_capable
1343
      self.rpc_fail = False
1344
      self.lvm_fail = False
1345
      self.hyp_fail = False
1346
      self.ghost = False
1347
      self.os_fail = False
1348
      self.oslist = {}
1349

    
1350
  def ExpandNames(self):
1351
    self.needed_locks = {
1352
      locking.LEVEL_NODE: locking.ALL_SET,
1353
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1354
    }
1355
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1356

    
1357
  def _Error(self, ecode, item, msg, *args, **kwargs):
1358
    """Format an error message.
1359

1360
    Based on the opcode's error_codes parameter, either format a
1361
    parseable error code, or a simpler error string.
1362

1363
    This must be called only from Exec and functions called from Exec.
1364

1365
    """
1366
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1367
    itype, etxt = ecode
1368
    # first complete the msg
1369
    if args:
1370
      msg = msg % args
1371
    # then format the whole message
1372
    if self.op.error_codes:
1373
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1374
    else:
1375
      if item:
1376
        item = " " + item
1377
      else:
1378
        item = ""
1379
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1380
    # and finally report it via the feedback_fn
1381
    self._feedback_fn("  - %s" % msg)
1382

    
1383
  def _ErrorIf(self, cond, *args, **kwargs):
1384
    """Log an error message if the passed condition is True.
1385

1386
    """
1387
    cond = bool(cond) or self.op.debug_simulate_errors
1388
    if cond:
1389
      self._Error(*args, **kwargs)
1390
    # do not mark the operation as failed for WARN cases only
1391
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1392
      self.bad = self.bad or cond
1393

    
1394
  def _VerifyNode(self, ninfo, nresult):
1395
    """Perform some basic validation on data returned from a node.
1396

1397
      - check the result data structure is well formed and has all the
1398
        mandatory fields
1399
      - check ganeti version
1400

1401
    @type ninfo: L{objects.Node}
1402
    @param ninfo: the node to check
1403
    @param nresult: the results from the node
1404
    @rtype: boolean
1405
    @return: whether overall this call was successful (and we can expect
1406
         reasonable values in the respose)
1407

1408
    """
1409
    node = ninfo.name
1410
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1411

    
1412
    # main result, nresult should be a non-empty dict
1413
    test = not nresult or not isinstance(nresult, dict)
1414
    _ErrorIf(test, self.ENODERPC, node,
1415
                  "unable to verify node: no data returned")
1416
    if test:
1417
      return False
1418

    
1419
    # compares ganeti version
1420
    local_version = constants.PROTOCOL_VERSION
1421
    remote_version = nresult.get("version", None)
1422
    test = not (remote_version and
1423
                isinstance(remote_version, (list, tuple)) and
1424
                len(remote_version) == 2)
1425
    _ErrorIf(test, self.ENODERPC, node,
1426
             "connection to node returned invalid data")
1427
    if test:
1428
      return False
1429

    
1430
    test = local_version != remote_version[0]
1431
    _ErrorIf(test, self.ENODEVERSION, node,
1432
             "incompatible protocol versions: master %s,"
1433
             " node %s", local_version, remote_version[0])
1434
    if test:
1435
      return False
1436

    
1437
    # node seems compatible, we can actually try to look into its results
1438

    
1439
    # full package version
1440
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1441
                  self.ENODEVERSION, node,
1442
                  "software version mismatch: master %s, node %s",
1443
                  constants.RELEASE_VERSION, remote_version[1],
1444
                  code=self.ETYPE_WARNING)
1445

    
1446
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1447
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1448
      for hv_name, hv_result in hyp_result.iteritems():
1449
        test = hv_result is not None
1450
        _ErrorIf(test, self.ENODEHV, node,
1451
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1452

    
1453
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1454
    if ninfo.vm_capable and isinstance(hvp_result, list):
1455
      for item, hv_name, hv_result in hvp_result:
1456
        _ErrorIf(True, self.ENODEHV, node,
1457
                 "hypervisor %s parameter verify failure (source %s): %s",
1458
                 hv_name, item, hv_result)
1459

    
1460
    test = nresult.get(constants.NV_NODESETUP,
1461
                           ["Missing NODESETUP results"])
1462
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1463
             "; ".join(test))
1464

    
1465
    return True
1466

    
1467
  def _VerifyNodeTime(self, ninfo, nresult,
1468
                      nvinfo_starttime, nvinfo_endtime):
1469
    """Check the node time.
1470

1471
    @type ninfo: L{objects.Node}
1472
    @param ninfo: the node to check
1473
    @param nresult: the remote results for the node
1474
    @param nvinfo_starttime: the start time of the RPC call
1475
    @param nvinfo_endtime: the end time of the RPC call
1476

1477
    """
1478
    node = ninfo.name
1479
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1480

    
1481
    ntime = nresult.get(constants.NV_TIME, None)
1482
    try:
1483
      ntime_merged = utils.MergeTime(ntime)
1484
    except (ValueError, TypeError):
1485
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1486
      return
1487

    
1488
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1489
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1490
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1491
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1492
    else:
1493
      ntime_diff = None
1494

    
1495
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1496
             "Node time diverges by at least %s from master node time",
1497
             ntime_diff)
1498

    
1499
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1500
    """Check the node time.
1501

1502
    @type ninfo: L{objects.Node}
1503
    @param ninfo: the node to check
1504
    @param nresult: the remote results for the node
1505
    @param vg_name: the configured VG name
1506

1507
    """
1508
    if vg_name is None:
1509
      return
1510

    
1511
    node = ninfo.name
1512
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1513

    
1514
    # checks vg existence and size > 20G
1515
    vglist = nresult.get(constants.NV_VGLIST, None)
1516
    test = not vglist
1517
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1518
    if not test:
1519
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1520
                                            constants.MIN_VG_SIZE)
1521
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1522

    
1523
    # check pv names
1524
    pvlist = nresult.get(constants.NV_PVLIST, None)
1525
    test = pvlist is None
1526
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1527
    if not test:
1528
      # check that ':' is not present in PV names, since it's a
1529
      # special character for lvcreate (denotes the range of PEs to
1530
      # use on the PV)
1531
      for _, pvname, owner_vg in pvlist:
1532
        test = ":" in pvname
1533
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1534
                 " '%s' of VG '%s'", pvname, owner_vg)
1535

    
1536
  def _VerifyNodeNetwork(self, ninfo, nresult):
1537
    """Check the node time.
1538

1539
    @type ninfo: L{objects.Node}
1540
    @param ninfo: the node to check
1541
    @param nresult: the remote results for the node
1542

1543
    """
1544
    node = ninfo.name
1545
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1546

    
1547
    test = constants.NV_NODELIST not in nresult
1548
    _ErrorIf(test, self.ENODESSH, node,
1549
             "node hasn't returned node ssh connectivity data")
1550
    if not test:
1551
      if nresult[constants.NV_NODELIST]:
1552
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1553
          _ErrorIf(True, self.ENODESSH, node,
1554
                   "ssh communication with node '%s': %s", a_node, a_msg)
1555

    
1556
    test = constants.NV_NODENETTEST not in nresult
1557
    _ErrorIf(test, self.ENODENET, node,
1558
             "node hasn't returned node tcp connectivity data")
1559
    if not test:
1560
      if nresult[constants.NV_NODENETTEST]:
1561
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1562
        for anode in nlist:
1563
          _ErrorIf(True, self.ENODENET, node,
1564
                   "tcp communication with node '%s': %s",
1565
                   anode, nresult[constants.NV_NODENETTEST][anode])
1566

    
1567
    test = constants.NV_MASTERIP not in nresult
1568
    _ErrorIf(test, self.ENODENET, node,
1569
             "node hasn't returned node master IP reachability data")
1570
    if not test:
1571
      if not nresult[constants.NV_MASTERIP]:
1572
        if node == self.master_node:
1573
          msg = "the master node cannot reach the master IP (not configured?)"
1574
        else:
1575
          msg = "cannot reach the master IP"
1576
        _ErrorIf(True, self.ENODENET, node, msg)
1577

    
1578
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1579
                      diskstatus):
1580
    """Verify an instance.
1581

1582
    This function checks to see if the required block devices are
1583
    available on the instance's node.
1584

1585
    """
1586
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1587
    node_current = instanceconfig.primary_node
1588

    
1589
    node_vol_should = {}
1590
    instanceconfig.MapLVsByNode(node_vol_should)
1591

    
1592
    for node in node_vol_should:
1593
      n_img = node_image[node]
1594
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1595
        # ignore missing volumes on offline or broken nodes
1596
        continue
1597
      for volume in node_vol_should[node]:
1598
        test = volume not in n_img.volumes
1599
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1600
                 "volume %s missing on node %s", volume, node)
1601

    
1602
    if instanceconfig.admin_up:
1603
      pri_img = node_image[node_current]
1604
      test = instance not in pri_img.instances and not pri_img.offline
1605
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1606
               "instance not running on its primary node %s",
1607
               node_current)
1608

    
1609
    for node, n_img in node_image.items():
1610
      if node != node_current:
1611
        test = instance in n_img.instances
1612
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1613
                 "instance should not run on node %s", node)
1614

    
1615
    diskdata = [(nname, success, status, idx)
1616
                for (nname, disks) in diskstatus.items()
1617
                for idx, (success, status) in enumerate(disks)]
1618

    
1619
    for nname, success, bdev_status, idx in diskdata:
1620
      # the 'ghost node' construction in Exec() ensures that we have a
1621
      # node here
1622
      snode = node_image[nname]
1623
      bad_snode = snode.ghost or snode.offline
1624
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1625
               self.EINSTANCEFAULTYDISK, instance,
1626
               "couldn't retrieve status for disk/%s on %s: %s",
1627
               idx, nname, bdev_status)
1628
      _ErrorIf((instanceconfig.admin_up and success and
1629
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1630
               self.EINSTANCEFAULTYDISK, instance,
1631
               "disk/%s on %s is faulty", idx, nname)
1632

    
1633
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1634
    """Verify if there are any unknown volumes in the cluster.
1635

1636
    The .os, .swap and backup volumes are ignored. All other volumes are
1637
    reported as unknown.
1638

1639
    @type reserved: L{ganeti.utils.FieldSet}
1640
    @param reserved: a FieldSet of reserved volume names
1641

1642
    """
1643
    for node, n_img in node_image.items():
1644
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1645
        # skip non-healthy nodes
1646
        continue
1647
      for volume in n_img.volumes:
1648
        test = ((node not in node_vol_should or
1649
                volume not in node_vol_should[node]) and
1650
                not reserved.Matches(volume))
1651
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1652
                      "volume %s is unknown", volume)
1653

    
1654
  def _VerifyOrphanInstances(self, instancelist, node_image):
1655
    """Verify the list of running instances.
1656

1657
    This checks what instances are running but unknown to the cluster.
1658

1659
    """
1660
    for node, n_img in node_image.items():
1661
      for o_inst in n_img.instances:
1662
        test = o_inst not in instancelist
1663
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1664
                      "instance %s on node %s should not exist", o_inst, node)
1665

    
1666
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1667
    """Verify N+1 Memory Resilience.
1668

1669
    Check that if one single node dies we can still start all the
1670
    instances it was primary for.
1671

1672
    """
1673
    cluster_info = self.cfg.GetClusterInfo()
1674
    for node, n_img in node_image.items():
1675
      # This code checks that every node which is now listed as
1676
      # secondary has enough memory to host all instances it is
1677
      # supposed to should a single other node in the cluster fail.
1678
      # FIXME: not ready for failover to an arbitrary node
1679
      # FIXME: does not support file-backed instances
1680
      # WARNING: we currently take into account down instances as well
1681
      # as up ones, considering that even if they're down someone
1682
      # might want to start them even in the event of a node failure.
1683
      if n_img.offline:
1684
        # we're skipping offline nodes from the N+1 warning, since
1685
        # most likely we don't have good memory infromation from them;
1686
        # we already list instances living on such nodes, and that's
1687
        # enough warning
1688
        continue
1689
      for prinode, instances in n_img.sbp.items():
1690
        needed_mem = 0
1691
        for instance in instances:
1692
          bep = cluster_info.FillBE(instance_cfg[instance])
1693
          if bep[constants.BE_AUTO_BALANCE]:
1694
            needed_mem += bep[constants.BE_MEMORY]
1695
        test = n_img.mfree < needed_mem
1696
        self._ErrorIf(test, self.ENODEN1, node,
1697
                      "not enough memory to accomodate instance failovers"
1698
                      " should node %s fail", prinode)
1699

    
1700
  @classmethod
1701
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1702
                   (files_all, files_all_opt, files_mc, files_vm)):
1703
    """Verifies file checksums collected from all nodes.
1704

1705
    @param errorif: Callback for reporting errors
1706
    @param nodeinfo: List of L{objects.Node} objects
1707
    @param master_node: Name of master node
1708
    @param all_nvinfo: RPC results
1709

1710
    """
1711
    node_names = frozenset(node.name for node in nodeinfo)
1712

    
1713
    assert master_node in node_names
1714
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1715
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1716
           "Found file listed in more than one file list"
1717

    
1718
    # Define functions determining which nodes to consider for a file
1719
    file2nodefn = dict([(filename, fn)
1720
      for (files, fn) in [(files_all, None),
1721
                          (files_all_opt, None),
1722
                          (files_mc, lambda node: (node.master_candidate or
1723
                                                   node.name == master_node)),
1724
                          (files_vm, lambda node: node.vm_capable)]
1725
      for filename in files])
1726

    
1727
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1728

    
1729
    for node in nodeinfo:
1730
      nresult = all_nvinfo[node.name]
1731

    
1732
      if nresult.fail_msg or not nresult.payload:
1733
        node_files = None
1734
      else:
1735
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
1736

    
1737
      test = not (node_files and isinstance(node_files, dict))
1738
      errorif(test, cls.ENODEFILECHECK, node.name,
1739
              "Node did not return file checksum data")
1740
      if test:
1741
        continue
1742

    
1743
      for (filename, checksum) in node_files.items():
1744
        # Check if the file should be considered for a node
1745
        fn = file2nodefn[filename]
1746
        if fn is None or fn(node):
1747
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
1748

    
1749
    for (filename, checksums) in fileinfo.items():
1750
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1751

    
1752
      # Nodes having the file
1753
      with_file = frozenset(node_name
1754
                            for nodes in fileinfo[filename].values()
1755
                            for node_name in nodes)
1756

    
1757
      # Nodes missing file
1758
      missing_file = node_names - with_file
1759

    
1760
      if filename in files_all_opt:
1761
        # All or no nodes
1762
        errorif(missing_file and missing_file != node_names,
1763
                cls.ECLUSTERFILECHECK, None,
1764
                "File %s is optional, but it must exist on all or no nodes (not"
1765
                " found on %s)",
1766
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
1767
      else:
1768
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
1769
                "File %s is missing from node(s) %s", filename,
1770
                utils.CommaJoin(utils.NiceSort(missing_file)))
1771

    
1772
      # See if there are multiple versions of the file
1773
      test = len(checksums) > 1
1774
      if test:
1775
        variants = ["variant %s on %s" %
1776
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
1777
                    for (idx, (checksum, nodes)) in
1778
                      enumerate(sorted(checksums.items()))]
1779
      else:
1780
        variants = []
1781

    
1782
      errorif(test, cls.ECLUSTERFILECHECK, None,
1783
              "File %s found with %s different checksums (%s)",
1784
              filename, len(checksums), "; ".join(variants))
1785

    
1786
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1787
                      drbd_map):
1788
    """Verifies and the node DRBD status.
1789

1790
    @type ninfo: L{objects.Node}
1791
    @param ninfo: the node to check
1792
    @param nresult: the remote results for the node
1793
    @param instanceinfo: the dict of instances
1794
    @param drbd_helper: the configured DRBD usermode helper
1795
    @param drbd_map: the DRBD map as returned by
1796
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1797

1798
    """
1799
    node = ninfo.name
1800
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1801

    
1802
    if drbd_helper:
1803
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1804
      test = (helper_result == None)
1805
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1806
               "no drbd usermode helper returned")
1807
      if helper_result:
1808
        status, payload = helper_result
1809
        test = not status
1810
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1811
                 "drbd usermode helper check unsuccessful: %s", payload)
1812
        test = status and (payload != drbd_helper)
1813
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1814
                 "wrong drbd usermode helper: %s", payload)
1815

    
1816
    # compute the DRBD minors
1817
    node_drbd = {}
1818
    for minor, instance in drbd_map[node].items():
1819
      test = instance not in instanceinfo
1820
      _ErrorIf(test, self.ECLUSTERCFG, None,
1821
               "ghost instance '%s' in temporary DRBD map", instance)
1822
        # ghost instance should not be running, but otherwise we
1823
        # don't give double warnings (both ghost instance and
1824
        # unallocated minor in use)
1825
      if test:
1826
        node_drbd[minor] = (instance, False)
1827
      else:
1828
        instance = instanceinfo[instance]
1829
        node_drbd[minor] = (instance.name, instance.admin_up)
1830

    
1831
    # and now check them
1832
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1833
    test = not isinstance(used_minors, (tuple, list))
1834
    _ErrorIf(test, self.ENODEDRBD, node,
1835
             "cannot parse drbd status file: %s", str(used_minors))
1836
    if test:
1837
      # we cannot check drbd status
1838
      return
1839

    
1840
    for minor, (iname, must_exist) in node_drbd.items():
1841
      test = minor not in used_minors and must_exist
1842
      _ErrorIf(test, self.ENODEDRBD, node,
1843
               "drbd minor %d of instance %s is not active", minor, iname)
1844
    for minor in used_minors:
1845
      test = minor not in node_drbd
1846
      _ErrorIf(test, self.ENODEDRBD, node,
1847
               "unallocated drbd minor %d is in use", minor)
1848

    
1849
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1850
    """Builds the node OS structures.
1851

1852
    @type ninfo: L{objects.Node}
1853
    @param ninfo: the node to check
1854
    @param nresult: the remote results for the node
1855
    @param nimg: the node image object
1856

1857
    """
1858
    node = ninfo.name
1859
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1860

    
1861
    remote_os = nresult.get(constants.NV_OSLIST, None)
1862
    test = (not isinstance(remote_os, list) or
1863
            not compat.all(isinstance(v, list) and len(v) == 7
1864
                           for v in remote_os))
1865

    
1866
    _ErrorIf(test, self.ENODEOS, node,
1867
             "node hasn't returned valid OS data")
1868

    
1869
    nimg.os_fail = test
1870

    
1871
    if test:
1872
      return
1873

    
1874
    os_dict = {}
1875

    
1876
    for (name, os_path, status, diagnose,
1877
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1878

    
1879
      if name not in os_dict:
1880
        os_dict[name] = []
1881

    
1882
      # parameters is a list of lists instead of list of tuples due to
1883
      # JSON lacking a real tuple type, fix it:
1884
      parameters = [tuple(v) for v in parameters]
1885
      os_dict[name].append((os_path, status, diagnose,
1886
                            set(variants), set(parameters), set(api_ver)))
1887

    
1888
    nimg.oslist = os_dict
1889

    
1890
  def _VerifyNodeOS(self, ninfo, nimg, base):
1891
    """Verifies the node OS list.
1892

1893
    @type ninfo: L{objects.Node}
1894
    @param ninfo: the node to check
1895
    @param nimg: the node image object
1896
    @param base: the 'template' node we match against (e.g. from the master)
1897

1898
    """
1899
    node = ninfo.name
1900
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1901

    
1902
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1903

    
1904
    for os_name, os_data in nimg.oslist.items():
1905
      assert os_data, "Empty OS status for OS %s?!" % os_name
1906
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1907
      _ErrorIf(not f_status, self.ENODEOS, node,
1908
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1909
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1910
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1911
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1912
      # this will catched in backend too
1913
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1914
               and not f_var, self.ENODEOS, node,
1915
               "OS %s with API at least %d does not declare any variant",
1916
               os_name, constants.OS_API_V15)
1917
      # comparisons with the 'base' image
1918
      test = os_name not in base.oslist
1919
      _ErrorIf(test, self.ENODEOS, node,
1920
               "Extra OS %s not present on reference node (%s)",
1921
               os_name, base.name)
1922
      if test:
1923
        continue
1924
      assert base.oslist[os_name], "Base node has empty OS status?"
1925
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1926
      if not b_status:
1927
        # base OS is invalid, skipping
1928
        continue
1929
      for kind, a, b in [("API version", f_api, b_api),
1930
                         ("variants list", f_var, b_var),
1931
                         ("parameters", f_param, b_param)]:
1932
        _ErrorIf(a != b, self.ENODEOS, node,
1933
                 "OS %s %s differs from reference node %s: %s vs. %s",
1934
                 kind, os_name, base.name,
1935
                 utils.CommaJoin(a), utils.CommaJoin(b))
1936

    
1937
    # check any missing OSes
1938
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1939
    _ErrorIf(missing, self.ENODEOS, node,
1940
             "OSes present on reference node %s but missing on this node: %s",
1941
             base.name, utils.CommaJoin(missing))
1942

    
1943
  def _VerifyOob(self, ninfo, nresult):
1944
    """Verifies out of band functionality of a node.
1945

1946
    @type ninfo: L{objects.Node}
1947
    @param ninfo: the node to check
1948
    @param nresult: the remote results for the node
1949

1950
    """
1951
    node = ninfo.name
1952
    # We just have to verify the paths on master and/or master candidates
1953
    # as the oob helper is invoked on the master
1954
    if ((ninfo.master_candidate or ninfo.master_capable) and
1955
        constants.NV_OOB_PATHS in nresult):
1956
      for path_result in nresult[constants.NV_OOB_PATHS]:
1957
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1958

    
1959
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1960
    """Verifies and updates the node volume data.
1961

1962
    This function will update a L{NodeImage}'s internal structures
1963
    with data from the remote call.
1964

1965
    @type ninfo: L{objects.Node}
1966
    @param ninfo: the node to check
1967
    @param nresult: the remote results for the node
1968
    @param nimg: the node image object
1969
    @param vg_name: the configured VG name
1970

1971
    """
1972
    node = ninfo.name
1973
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1974

    
1975
    nimg.lvm_fail = True
1976
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1977
    if vg_name is None:
1978
      pass
1979
    elif isinstance(lvdata, basestring):
1980
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1981
               utils.SafeEncode(lvdata))
1982
    elif not isinstance(lvdata, dict):
1983
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1984
    else:
1985
      nimg.volumes = lvdata
1986
      nimg.lvm_fail = False
1987

    
1988
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1989
    """Verifies and updates the node instance list.
1990

1991
    If the listing was successful, then updates this node's instance
1992
    list. Otherwise, it marks the RPC call as failed for the instance
1993
    list key.
1994

1995
    @type ninfo: L{objects.Node}
1996
    @param ninfo: the node to check
1997
    @param nresult: the remote results for the node
1998
    @param nimg: the node image object
1999

2000
    """
2001
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2002
    test = not isinstance(idata, list)
2003
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2004
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2005
    if test:
2006
      nimg.hyp_fail = True
2007
    else:
2008
      nimg.instances = idata
2009

    
2010
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2011
    """Verifies and computes a node information map
2012

2013
    @type ninfo: L{objects.Node}
2014
    @param ninfo: the node to check
2015
    @param nresult: the remote results for the node
2016
    @param nimg: the node image object
2017
    @param vg_name: the configured VG name
2018

2019
    """
2020
    node = ninfo.name
2021
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2022

    
2023
    # try to read free memory (from the hypervisor)
2024
    hv_info = nresult.get(constants.NV_HVINFO, None)
2025
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2026
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2027
    if not test:
2028
      try:
2029
        nimg.mfree = int(hv_info["memory_free"])
2030
      except (ValueError, TypeError):
2031
        _ErrorIf(True, self.ENODERPC, node,
2032
                 "node returned invalid nodeinfo, check hypervisor")
2033

    
2034
    # FIXME: devise a free space model for file based instances as well
2035
    if vg_name is not None:
2036
      test = (constants.NV_VGLIST not in nresult or
2037
              vg_name not in nresult[constants.NV_VGLIST])
2038
      _ErrorIf(test, self.ENODELVM, node,
2039
               "node didn't return data for the volume group '%s'"
2040
               " - it is either missing or broken", vg_name)
2041
      if not test:
2042
        try:
2043
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2044
        except (ValueError, TypeError):
2045
          _ErrorIf(True, self.ENODERPC, node,
2046
                   "node returned invalid LVM info, check LVM status")
2047

    
2048
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2049
    """Gets per-disk status information for all instances.
2050

2051
    @type nodelist: list of strings
2052
    @param nodelist: Node names
2053
    @type node_image: dict of (name, L{objects.Node})
2054
    @param node_image: Node objects
2055
    @type instanceinfo: dict of (name, L{objects.Instance})
2056
    @param instanceinfo: Instance objects
2057
    @rtype: {instance: {node: [(succes, payload)]}}
2058
    @return: a dictionary of per-instance dictionaries with nodes as
2059
        keys and disk information as values; the disk information is a
2060
        list of tuples (success, payload)
2061

2062
    """
2063
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2064

    
2065
    node_disks = {}
2066
    node_disks_devonly = {}
2067
    diskless_instances = set()
2068
    diskless = constants.DT_DISKLESS
2069

    
2070
    for nname in nodelist:
2071
      node_instances = list(itertools.chain(node_image[nname].pinst,
2072
                                            node_image[nname].sinst))
2073
      diskless_instances.update(inst for inst in node_instances
2074
                                if instanceinfo[inst].disk_template == diskless)
2075
      disks = [(inst, disk)
2076
               for inst in node_instances
2077
               for disk in instanceinfo[inst].disks]
2078

    
2079
      if not disks:
2080
        # No need to collect data
2081
        continue
2082

    
2083
      node_disks[nname] = disks
2084

    
2085
      # Creating copies as SetDiskID below will modify the objects and that can
2086
      # lead to incorrect data returned from nodes
2087
      devonly = [dev.Copy() for (_, dev) in disks]
2088

    
2089
      for dev in devonly:
2090
        self.cfg.SetDiskID(dev, nname)
2091

    
2092
      node_disks_devonly[nname] = devonly
2093

    
2094
    assert len(node_disks) == len(node_disks_devonly)
2095

    
2096
    # Collect data from all nodes with disks
2097
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2098
                                                          node_disks_devonly)
2099

    
2100
    assert len(result) == len(node_disks)
2101

    
2102
    instdisk = {}
2103

    
2104
    for (nname, nres) in result.items():
2105
      disks = node_disks[nname]
2106

    
2107
      if nres.offline:
2108
        # No data from this node
2109
        data = len(disks) * [(False, "node offline")]
2110
      else:
2111
        msg = nres.fail_msg
2112
        _ErrorIf(msg, self.ENODERPC, nname,
2113
                 "while getting disk information: %s", msg)
2114
        if msg:
2115
          # No data from this node
2116
          data = len(disks) * [(False, msg)]
2117
        else:
2118
          data = []
2119
          for idx, i in enumerate(nres.payload):
2120
            if isinstance(i, (tuple, list)) and len(i) == 2:
2121
              data.append(i)
2122
            else:
2123
              logging.warning("Invalid result from node %s, entry %d: %s",
2124
                              nname, idx, i)
2125
              data.append((False, "Invalid result from the remote node"))
2126

    
2127
      for ((inst, _), status) in zip(disks, data):
2128
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2129

    
2130
    # Add empty entries for diskless instances.
2131
    for inst in diskless_instances:
2132
      assert inst not in instdisk
2133
      instdisk[inst] = {}
2134

    
2135
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2136
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2137
                      compat.all(isinstance(s, (tuple, list)) and
2138
                                 len(s) == 2 for s in statuses)
2139
                      for inst, nnames in instdisk.items()
2140
                      for nname, statuses in nnames.items())
2141
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2142

    
2143
    return instdisk
2144

    
2145
  def _VerifyHVP(self, hvp_data):
2146
    """Verifies locally the syntax of the hypervisor parameters.
2147

2148
    """
2149
    for item, hv_name, hv_params in hvp_data:
2150
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2151
             (item, hv_name))
2152
      try:
2153
        hv_class = hypervisor.GetHypervisor(hv_name)
2154
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2155
        hv_class.CheckParameterSyntax(hv_params)
2156
      except errors.GenericError, err:
2157
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2158

    
2159
  def BuildHooksEnv(self):
2160
    """Build hooks env.
2161

2162
    Cluster-Verify hooks just ran in the post phase and their failure makes
2163
    the output be logged in the verify output and the verification to fail.
2164

2165
    """
2166
    cfg = self.cfg
2167

    
2168
    env = {
2169
      "CLUSTER_TAGS": " ".join(cfg.GetClusterInfo().GetTags())
2170
      }
2171

    
2172
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2173
               for node in cfg.GetAllNodesInfo().values())
2174

    
2175
    return env
2176

    
2177
  def BuildHooksNodes(self):
2178
    """Build hooks nodes.
2179

2180
    """
2181
    return ([], self.cfg.GetNodeList())
2182

    
2183
  def Exec(self, feedback_fn):
2184
    """Verify integrity of cluster, performing various test on nodes.
2185

2186
    """
2187
    # This method has too many local variables. pylint: disable-msg=R0914
2188
    self.bad = False
2189
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2190
    verbose = self.op.verbose
2191
    self._feedback_fn = feedback_fn
2192
    feedback_fn("* Verifying global settings")
2193
    for msg in self.cfg.VerifyConfig():
2194
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2195

    
2196
    # Check the cluster certificates
2197
    for cert_filename in constants.ALL_CERT_FILES:
2198
      (errcode, msg) = _VerifyCertificate(cert_filename)
2199
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2200

    
2201
    vg_name = self.cfg.GetVGName()
2202
    drbd_helper = self.cfg.GetDRBDHelper()
2203
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2204
    cluster = self.cfg.GetClusterInfo()
2205
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2206
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2207
    nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2208
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2209
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2210
                        for iname in instancelist)
2211
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2212
    i_non_redundant = [] # Non redundant instances
2213
    i_non_a_balanced = [] # Non auto-balanced instances
2214
    n_offline = 0 # Count of offline nodes
2215
    n_drained = 0 # Count of nodes being drained
2216
    node_vol_should = {}
2217

    
2218
    # FIXME: verify OS list
2219

    
2220
    # File verification
2221
    filemap = _ComputeAncillaryFiles(cluster, False)
2222

    
2223
    # do local checksums
2224
    master_node = self.master_node = self.cfg.GetMasterNode()
2225
    master_ip = self.cfg.GetMasterIP()
2226

    
2227
    # Compute the set of hypervisor parameters
2228
    hvp_data = []
2229
    for hv_name in hypervisors:
2230
      hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2231
    for os_name, os_hvp in cluster.os_hvp.items():
2232
      for hv_name, hv_params in os_hvp.items():
2233
        if not hv_params:
2234
          continue
2235
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2236
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
2237
    # TODO: collapse identical parameter values in a single one
2238
    for instance in instanceinfo.values():
2239
      if not instance.hvparams:
2240
        continue
2241
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2242
                       cluster.FillHV(instance)))
2243
    # and verify them locally
2244
    self._VerifyHVP(hvp_data)
2245

    
2246
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2247
    node_verify_param = {
2248
      constants.NV_FILELIST:
2249
        utils.UniqueSequence(filename
2250
                             for files in filemap
2251
                             for filename in files),
2252
      constants.NV_NODELIST: [node.name for node in nodeinfo
2253
                              if not node.offline],
2254
      constants.NV_HYPERVISOR: hypervisors,
2255
      constants.NV_HVPARAMS: hvp_data,
2256
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2257
                                  node.secondary_ip) for node in nodeinfo
2258
                                 if not node.offline],
2259
      constants.NV_INSTANCELIST: hypervisors,
2260
      constants.NV_VERSION: None,
2261
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2262
      constants.NV_NODESETUP: None,
2263
      constants.NV_TIME: None,
2264
      constants.NV_MASTERIP: (master_node, master_ip),
2265
      constants.NV_OSLIST: None,
2266
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2267
      }
2268

    
2269
    if vg_name is not None:
2270
      node_verify_param[constants.NV_VGLIST] = None
2271
      node_verify_param[constants.NV_LVLIST] = vg_name
2272
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2273
      node_verify_param[constants.NV_DRBDLIST] = None
2274

    
2275
    if drbd_helper:
2276
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2277

    
2278
    # Build our expected cluster state
2279
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2280
                                                 name=node.name,
2281
                                                 vm_capable=node.vm_capable))
2282
                      for node in nodeinfo)
2283

    
2284
    # Gather OOB paths
2285
    oob_paths = []
2286
    for node in nodeinfo:
2287
      path = _SupportsOob(self.cfg, node)
2288
      if path and path not in oob_paths:
2289
        oob_paths.append(path)
2290

    
2291
    if oob_paths:
2292
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2293

    
2294
    for instance in instancelist:
2295
      inst_config = instanceinfo[instance]
2296

    
2297
      for nname in inst_config.all_nodes:
2298
        if nname not in node_image:
2299
          # ghost node
2300
          gnode = self.NodeImage(name=nname)
2301
          gnode.ghost = True
2302
          node_image[nname] = gnode
2303

    
2304
      inst_config.MapLVsByNode(node_vol_should)
2305

    
2306
      pnode = inst_config.primary_node
2307
      node_image[pnode].pinst.append(instance)
2308

    
2309
      for snode in inst_config.secondary_nodes:
2310
        nimg = node_image[snode]
2311
        nimg.sinst.append(instance)
2312
        if pnode not in nimg.sbp:
2313
          nimg.sbp[pnode] = []
2314
        nimg.sbp[pnode].append(instance)
2315

    
2316
    # At this point, we have the in-memory data structures complete,
2317
    # except for the runtime information, which we'll gather next
2318

    
2319
    # Due to the way our RPC system works, exact response times cannot be
2320
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2321
    # time before and after executing the request, we can at least have a time
2322
    # window.
2323
    nvinfo_starttime = time.time()
2324
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2325
                                           self.cfg.GetClusterName())
2326
    nvinfo_endtime = time.time()
2327

    
2328
    all_drbd_map = self.cfg.ComputeDRBDMap()
2329

    
2330
    feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2331
    instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2332

    
2333
    feedback_fn("* Verifying configuration file consistency")
2334
    self._VerifyFiles(_ErrorIf, nodeinfo, master_node, all_nvinfo, filemap)
2335

    
2336
    feedback_fn("* Verifying node status")
2337

    
2338
    refos_img = None
2339

    
2340
    for node_i in nodeinfo:
2341
      node = node_i.name
2342
      nimg = node_image[node]
2343

    
2344
      if node_i.offline:
2345
        if verbose:
2346
          feedback_fn("* Skipping offline node %s" % (node,))
2347
        n_offline += 1
2348
        continue
2349

    
2350
      if node == master_node:
2351
        ntype = "master"
2352
      elif node_i.master_candidate:
2353
        ntype = "master candidate"
2354
      elif node_i.drained:
2355
        ntype = "drained"
2356
        n_drained += 1
2357
      else:
2358
        ntype = "regular"
2359
      if verbose:
2360
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2361

    
2362
      msg = all_nvinfo[node].fail_msg
2363
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2364
      if msg:
2365
        nimg.rpc_fail = True
2366
        continue
2367

    
2368
      nresult = all_nvinfo[node].payload
2369

    
2370
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2371
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2372
      self._VerifyNodeNetwork(node_i, nresult)
2373
      self._VerifyOob(node_i, nresult)
2374

    
2375
      if nimg.vm_capable:
2376
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2377
        self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2378
                             all_drbd_map)
2379

    
2380
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2381
        self._UpdateNodeInstances(node_i, nresult, nimg)
2382
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2383
        self._UpdateNodeOS(node_i, nresult, nimg)
2384
        if not nimg.os_fail:
2385
          if refos_img is None:
2386
            refos_img = nimg
2387
          self._VerifyNodeOS(node_i, nimg, refos_img)
2388

    
2389
    feedback_fn("* Verifying instance status")
2390
    for instance in instancelist:
2391
      if verbose:
2392
        feedback_fn("* Verifying instance %s" % instance)
2393
      inst_config = instanceinfo[instance]
2394
      self._VerifyInstance(instance, inst_config, node_image,
2395
                           instdisk[instance])
2396
      inst_nodes_offline = []
2397

    
2398
      pnode = inst_config.primary_node
2399
      pnode_img = node_image[pnode]
2400
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2401
               self.ENODERPC, pnode, "instance %s, connection to"
2402
               " primary node failed", instance)
2403

    
2404
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2405
               self.EINSTANCEBADNODE, instance,
2406
               "instance is marked as running and lives on offline node %s",
2407
               inst_config.primary_node)
2408

    
2409
      # If the instance is non-redundant we cannot survive losing its primary
2410
      # node, so we are not N+1 compliant. On the other hand we have no disk
2411
      # templates with more than one secondary so that situation is not well
2412
      # supported either.
2413
      # FIXME: does not support file-backed instances
2414
      if not inst_config.secondary_nodes:
2415
        i_non_redundant.append(instance)
2416

    
2417
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2418
               instance, "instance has multiple secondary nodes: %s",
2419
               utils.CommaJoin(inst_config.secondary_nodes),
2420
               code=self.ETYPE_WARNING)
2421

    
2422
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2423
        pnode = inst_config.primary_node
2424
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2425
        instance_groups = {}
2426

    
2427
        for node in instance_nodes:
2428
          instance_groups.setdefault(nodeinfo_byname[node].group,
2429
                                     []).append(node)
2430

    
2431
        pretty_list = [
2432
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2433
          # Sort so that we always list the primary node first.
2434
          for group, nodes in sorted(instance_groups.items(),
2435
                                     key=lambda (_, nodes): pnode in nodes,
2436
                                     reverse=True)]
2437

    
2438
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2439
                      instance, "instance has primary and secondary nodes in"
2440
                      " different groups: %s", utils.CommaJoin(pretty_list),
2441
                      code=self.ETYPE_WARNING)
2442

    
2443
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2444
        i_non_a_balanced.append(instance)
2445

    
2446
      for snode in inst_config.secondary_nodes:
2447
        s_img = node_image[snode]
2448
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2449
                 "instance %s, connection to secondary node failed", instance)
2450

    
2451
        if s_img.offline:
2452
          inst_nodes_offline.append(snode)
2453

    
2454
      # warn that the instance lives on offline nodes
2455
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2456
               "instance has offline secondary node(s) %s",
2457
               utils.CommaJoin(inst_nodes_offline))
2458
      # ... or ghost/non-vm_capable nodes
2459
      for node in inst_config.all_nodes:
2460
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2461
                 "instance lives on ghost node %s", node)
2462
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2463
                 instance, "instance lives on non-vm_capable node %s", node)
2464

    
2465
    feedback_fn("* Verifying orphan volumes")
2466
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2467
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2468

    
2469
    feedback_fn("* Verifying orphan instances")
2470
    self._VerifyOrphanInstances(instancelist, node_image)
2471

    
2472
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2473
      feedback_fn("* Verifying N+1 Memory redundancy")
2474
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2475

    
2476
    feedback_fn("* Other Notes")
2477
    if i_non_redundant:
2478
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2479
                  % len(i_non_redundant))
2480

    
2481
    if i_non_a_balanced:
2482
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2483
                  % len(i_non_a_balanced))
2484

    
2485
    if n_offline:
2486
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2487

    
2488
    if n_drained:
2489
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2490

    
2491
    return not self.bad
2492

    
2493
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2494
    """Analyze the post-hooks' result
2495

2496
    This method analyses the hook result, handles it, and sends some
2497
    nicely-formatted feedback back to the user.
2498

2499
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2500
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2501
    @param hooks_results: the results of the multi-node hooks rpc call
2502
    @param feedback_fn: function used send feedback back to the caller
2503
    @param lu_result: previous Exec result
2504
    @return: the new Exec result, based on the previous result
2505
        and hook results
2506

2507
    """
2508
    # We only really run POST phase hooks, and are only interested in
2509
    # their results
2510
    if phase == constants.HOOKS_PHASE_POST:
2511
      # Used to change hooks' output to proper indentation
2512
      feedback_fn("* Hooks Results")
2513
      assert hooks_results, "invalid result from hooks"
2514

    
2515
      for node_name in hooks_results:
2516
        res = hooks_results[node_name]
2517
        msg = res.fail_msg
2518
        test = msg and not res.offline
2519
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2520
                      "Communication failure in hooks execution: %s", msg)
2521
        if res.offline or msg:
2522
          # No need to investigate payload if node is offline or gave an error.
2523
          # override manually lu_result here as _ErrorIf only
2524
          # overrides self.bad
2525
          lu_result = 1
2526
          continue
2527
        for script, hkr, output in res.payload:
2528
          test = hkr == constants.HKR_FAIL
2529
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2530
                        "Script %s failed, output:", script)
2531
          if test:
2532
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2533
            feedback_fn("%s" % output)
2534
            lu_result = 0
2535

    
2536
      return lu_result
2537

    
2538

    
2539
class LUClusterVerifyDisks(NoHooksLU):
2540
  """Verifies the cluster disks status.
2541

2542
  """
2543
  REQ_BGL = False
2544

    
2545
  def ExpandNames(self):
2546
    self.needed_locks = {
2547
      locking.LEVEL_NODE: locking.ALL_SET,
2548
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2549
    }
2550
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2551

    
2552
  def Exec(self, feedback_fn):
2553
    """Verify integrity of cluster disks.
2554

2555
    @rtype: tuple of three items
2556
    @return: a tuple of (dict of node-to-node_error, list of instances
2557
        which need activate-disks, dict of instance: (node, volume) for
2558
        missing volumes
2559

2560
    """
2561
    result = res_nodes, res_instances, res_missing = {}, [], {}
2562

    
2563
    nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2564
    instances = self.cfg.GetAllInstancesInfo().values()
2565

    
2566
    nv_dict = {}
2567
    for inst in instances:
2568
      inst_lvs = {}
2569
      if not inst.admin_up:
2570
        continue
2571
      inst.MapLVsByNode(inst_lvs)
2572
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2573
      for node, vol_list in inst_lvs.iteritems():
2574
        for vol in vol_list:
2575
          nv_dict[(node, vol)] = inst
2576

    
2577
    if not nv_dict:
2578
      return result
2579

    
2580
    node_lvs = self.rpc.call_lv_list(nodes, [])
2581
    for node, node_res in node_lvs.items():
2582
      if node_res.offline:
2583
        continue
2584
      msg = node_res.fail_msg
2585
      if msg:
2586
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2587
        res_nodes[node] = msg
2588
        continue
2589

    
2590
      lvs = node_res.payload
2591
      for lv_name, (_, _, lv_online) in lvs.items():
2592
        inst = nv_dict.pop((node, lv_name), None)
2593
        if (not lv_online and inst is not None
2594
            and inst.name not in res_instances):
2595
          res_instances.append(inst.name)
2596

    
2597
    # any leftover items in nv_dict are missing LVs, let's arrange the
2598
    # data better
2599
    for key, inst in nv_dict.iteritems():
2600
      if inst.name not in res_missing:
2601
        res_missing[inst.name] = []
2602
      res_missing[inst.name].append(key)
2603

    
2604
    return result
2605

    
2606

    
2607
class LUClusterRepairDiskSizes(NoHooksLU):
2608
  """Verifies the cluster disks sizes.
2609

2610
  """
2611
  REQ_BGL = False
2612

    
2613
  def ExpandNames(self):
2614
    if self.op.instances:
2615
      self.wanted_names = []
2616
      for name in self.op.instances:
2617
        full_name = _ExpandInstanceName(self.cfg, name)
2618
        self.wanted_names.append(full_name)
2619
      self.needed_locks = {
2620
        locking.LEVEL_NODE: [],
2621
        locking.LEVEL_INSTANCE: self.wanted_names,
2622
        }
2623
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2624
    else:
2625
      self.wanted_names = None
2626
      self.needed_locks = {
2627
        locking.LEVEL_NODE: locking.ALL_SET,
2628
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2629
        }
2630
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2631

    
2632
  def DeclareLocks(self, level):
2633
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2634
      self._LockInstancesNodes(primary_only=True)
2635

    
2636
  def CheckPrereq(self):
2637
    """Check prerequisites.
2638

2639
    This only checks the optional instance list against the existing names.
2640

2641
    """
2642
    if self.wanted_names is None:
2643
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2644

    
2645
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2646
                             in self.wanted_names]
2647

    
2648
  def _EnsureChildSizes(self, disk):
2649
    """Ensure children of the disk have the needed disk size.
2650

2651
    This is valid mainly for DRBD8 and fixes an issue where the
2652
    children have smaller disk size.
2653

2654
    @param disk: an L{ganeti.objects.Disk} object
2655

2656
    """
2657
    if disk.dev_type == constants.LD_DRBD8:
2658
      assert disk.children, "Empty children for DRBD8?"
2659
      fchild = disk.children[0]
2660
      mismatch = fchild.size < disk.size
2661
      if mismatch:
2662
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2663
                     fchild.size, disk.size)
2664
        fchild.size = disk.size
2665

    
2666
      # and we recurse on this child only, not on the metadev
2667
      return self._EnsureChildSizes(fchild) or mismatch
2668
    else:
2669
      return False
2670

    
2671
  def Exec(self, feedback_fn):
2672
    """Verify the size of cluster disks.
2673

2674
    """
2675
    # TODO: check child disks too
2676
    # TODO: check differences in size between primary/secondary nodes
2677
    per_node_disks = {}
2678
    for instance in self.wanted_instances:
2679
      pnode = instance.primary_node
2680
      if pnode not in per_node_disks:
2681
        per_node_disks[pnode] = []
2682
      for idx, disk in enumerate(instance.disks):
2683
        per_node_disks[pnode].append((instance, idx, disk))
2684

    
2685
    changed = []
2686
    for node, dskl in per_node_disks.items():
2687
      newl = [v[2].Copy() for v in dskl]
2688
      for dsk in newl:
2689
        self.cfg.SetDiskID(dsk, node)
2690
      result = self.rpc.call_blockdev_getsize(node, newl)
2691
      if result.fail_msg:
2692
        self.LogWarning("Failure in blockdev_getsize call to node"
2693
                        " %s, ignoring", node)
2694
        continue
2695
      if len(result.payload) != len(dskl):
2696
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
2697
                        " result.payload=%s", node, len(dskl), result.payload)
2698
        self.LogWarning("Invalid result from node %s, ignoring node results",
2699
                        node)
2700
        continue
2701
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
2702
        if size is None:
2703
          self.LogWarning("Disk %d of instance %s did not return size"
2704
                          " information, ignoring", idx, instance.name)
2705
          continue
2706
        if not isinstance(size, (int, long)):
2707
          self.LogWarning("Disk %d of instance %s did not return valid"
2708
                          " size information, ignoring", idx, instance.name)
2709
          continue
2710
        size = size >> 20
2711
        if size != disk.size:
2712
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2713
                       " correcting: recorded %d, actual %d", idx,
2714
                       instance.name, disk.size, size)
2715
          disk.size = size
2716
          self.cfg.Update(instance, feedback_fn)
2717
          changed.append((instance.name, idx, size))
2718
        if self._EnsureChildSizes(disk):
2719
          self.cfg.Update(instance, feedback_fn)
2720
          changed.append((instance.name, idx, disk.size))
2721
    return changed
2722

    
2723

    
2724
class LUClusterRename(LogicalUnit):
2725
  """Rename the cluster.
2726

2727
  """
2728
  HPATH = "cluster-rename"
2729
  HTYPE = constants.HTYPE_CLUSTER
2730

    
2731
  def BuildHooksEnv(self):
2732
    """Build hooks env.
2733

2734
    """
2735
    return {
2736
      "OP_TARGET": self.cfg.GetClusterName(),
2737
      "NEW_NAME": self.op.name,
2738
      }
2739

    
2740
  def BuildHooksNodes(self):
2741
    """Build hooks nodes.
2742

2743
    """
2744
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2745

    
2746
  def CheckPrereq(self):
2747
    """Verify that the passed name is a valid one.
2748

2749
    """
2750
    hostname = netutils.GetHostname(name=self.op.name,
2751
                                    family=self.cfg.GetPrimaryIPFamily())
2752

    
2753
    new_name = hostname.name
2754
    self.ip = new_ip = hostname.ip
2755
    old_name = self.cfg.GetClusterName()
2756
    old_ip = self.cfg.GetMasterIP()
2757
    if new_name == old_name and new_ip == old_ip:
2758
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2759
                                 " cluster has changed",
2760
                                 errors.ECODE_INVAL)
2761
    if new_ip != old_ip:
2762
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2763
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2764
                                   " reachable on the network" %
2765
                                   new_ip, errors.ECODE_NOTUNIQUE)
2766

    
2767
    self.op.name = new_name
2768

    
2769
  def Exec(self, feedback_fn):
2770
    """Rename the cluster.
2771

2772
    """
2773
    clustername = self.op.name
2774
    ip = self.ip
2775

    
2776
    # shutdown the master IP
2777
    master = self.cfg.GetMasterNode()
2778
    result = self.rpc.call_node_stop_master(master, False)
2779
    result.Raise("Could not disable the master role")
2780

    
2781
    try:
2782
      cluster = self.cfg.GetClusterInfo()
2783
      cluster.cluster_name = clustername
2784
      cluster.master_ip = ip
2785
      self.cfg.Update(cluster, feedback_fn)
2786

    
2787
      # update the known hosts file
2788
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2789
      node_list = self.cfg.GetOnlineNodeList()
2790
      try:
2791
        node_list.remove(master)
2792
      except ValueError:
2793
        pass
2794
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2795
    finally:
2796
      result = self.rpc.call_node_start_master(master, False, False)
2797
      msg = result.fail_msg
2798
      if msg:
2799
        self.LogWarning("Could not re-enable the master role on"
2800
                        " the master, please restart manually: %s", msg)
2801

    
2802
    return clustername
2803

    
2804

    
2805
class LUClusterSetParams(LogicalUnit):
2806
  """Change the parameters of the cluster.
2807

2808
  """
2809
  HPATH = "cluster-modify"
2810
  HTYPE = constants.HTYPE_CLUSTER
2811
  REQ_BGL = False
2812

    
2813
  def CheckArguments(self):
2814
    """Check parameters
2815

2816
    """
2817
    if self.op.uid_pool:
2818
      uidpool.CheckUidPool(self.op.uid_pool)
2819

    
2820
    if self.op.add_uids:
2821
      uidpool.CheckUidPool(self.op.add_uids)
2822

    
2823
    if self.op.remove_uids:
2824
      uidpool.CheckUidPool(self.op.remove_uids)
2825

    
2826
  def ExpandNames(self):
2827
    # FIXME: in the future maybe other cluster params won't require checking on
2828
    # all nodes to be modified.
2829
    self.needed_locks = {
2830
      locking.LEVEL_NODE: locking.ALL_SET,
2831
    }
2832
    self.share_locks[locking.LEVEL_NODE] = 1
2833

    
2834
  def BuildHooksEnv(self):
2835
    """Build hooks env.
2836

2837
    """
2838
    return {
2839
      "OP_TARGET": self.cfg.GetClusterName(),
2840
      "NEW_VG_NAME": self.op.vg_name,
2841
      }
2842

    
2843
  def BuildHooksNodes(self):
2844
    """Build hooks nodes.
2845

2846
    """
2847
    mn = self.cfg.GetMasterNode()
2848
    return ([mn], [mn])
2849

    
2850
  def CheckPrereq(self):
2851
    """Check prerequisites.
2852

2853
    This checks whether the given params don't conflict and
2854
    if the given volume group is valid.
2855

2856
    """
2857
    if self.op.vg_name is not None and not self.op.vg_name:
2858
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2859
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2860
                                   " instances exist", errors.ECODE_INVAL)
2861

    
2862
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2863
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2864
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2865
                                   " drbd-based instances exist",
2866
                                   errors.ECODE_INVAL)
2867

    
2868
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2869

    
2870
    # if vg_name not None, checks given volume group on all nodes
2871
    if self.op.vg_name:
2872
      vglist = self.rpc.call_vg_list(node_list)
2873
      for node in node_list:
2874
        msg = vglist[node].fail_msg
2875
        if msg:
2876
          # ignoring down node
2877
          self.LogWarning("Error while gathering data on node %s"
2878
                          " (ignoring node): %s", node, msg)
2879
          continue
2880
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2881
                                              self.op.vg_name,
2882
                                              constants.MIN_VG_SIZE)
2883
        if vgstatus:
2884
          raise errors.OpPrereqError("Error on node '%s': %s" %
2885
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2886

    
2887
    if self.op.drbd_helper:
2888
      # checks given drbd helper on all nodes
2889
      helpers = self.rpc.call_drbd_helper(node_list)
2890
      for node in node_list:
2891
        ninfo = self.cfg.GetNodeInfo(node)
2892
        if ninfo.offline:
2893
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2894
          continue
2895
        msg = helpers[node].fail_msg
2896
        if msg:
2897
          raise errors.OpPrereqError("Error checking drbd helper on node"
2898
                                     " '%s': %s" % (node, msg),
2899
                                     errors.ECODE_ENVIRON)
2900
        node_helper = helpers[node].payload
2901
        if node_helper != self.op.drbd_helper:
2902
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2903
                                     (node, node_helper), errors.ECODE_ENVIRON)
2904

    
2905
    self.cluster = cluster = self.cfg.GetClusterInfo()
2906
    # validate params changes
2907
    if self.op.beparams:
2908
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2909
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2910

    
2911
    if self.op.ndparams:
2912
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2913
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2914

    
2915
    if self.op.nicparams:
2916
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2917
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2918
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2919
      nic_errors = []
2920

    
2921
      # check all instances for consistency
2922
      for instance in self.cfg.GetAllInstancesInfo().values():
2923
        for nic_idx, nic in enumerate(instance.nics):
2924
          params_copy = copy.deepcopy(nic.nicparams)
2925
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2926

    
2927
          # check parameter syntax
2928
          try:
2929
            objects.NIC.CheckParameterSyntax(params_filled)
2930
          except errors.ConfigurationError, err:
2931
            nic_errors.append("Instance %s, nic/%d: %s" %
2932
                              (instance.name, nic_idx, err))
2933

    
2934
          # if we're moving instances to routed, check that they have an ip
2935
          target_mode = params_filled[constants.NIC_MODE]
2936
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2937
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2938
                              (instance.name, nic_idx))
2939
      if nic_errors:
2940
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2941
                                   "\n".join(nic_errors))
2942

    
2943
    # hypervisor list/parameters
2944
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2945
    if self.op.hvparams:
2946
      for hv_name, hv_dict in self.op.hvparams.items():
2947
        if hv_name not in self.new_hvparams:
2948
          self.new_hvparams[hv_name] = hv_dict
2949
        else:
2950
          self.new_hvparams[hv_name].update(hv_dict)
2951

    
2952
    # os hypervisor parameters
2953
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2954
    if self.op.os_hvp:
2955
      for os_name, hvs in self.op.os_hvp.items():
2956
        if os_name not in self.new_os_hvp:
2957
          self.new_os_hvp[os_name] = hvs
2958
        else:
2959
          for hv_name, hv_dict in hvs.items():
2960
            if hv_name not in self.new_os_hvp[os_name]:
2961
              self.new_os_hvp[os_name][hv_name] = hv_dict
2962
            else:
2963
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2964

    
2965
    # os parameters
2966
    self.new_osp = objects.FillDict(cluster.osparams, {})
2967
    if self.op.osparams:
2968
      for os_name, osp in self.op.osparams.items():
2969
        if os_name not in self.new_osp:
2970
          self.new_osp[os_name] = {}
2971

    
2972
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2973
                                                  use_none=True)
2974

    
2975
        if not self.new_osp[os_name]:
2976
          # we removed all parameters
2977
          del self.new_osp[os_name]
2978
        else:
2979
          # check the parameter validity (remote check)
2980
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2981
                         os_name, self.new_osp[os_name])
2982

    
2983
    # changes to the hypervisor list
2984
    if self.op.enabled_hypervisors is not None:
2985
      self.hv_list = self.op.enabled_hypervisors
2986
      for hv in self.hv_list:
2987
        # if the hypervisor doesn't already exist in the cluster
2988
        # hvparams, we initialize it to empty, and then (in both
2989
        # cases) we make sure to fill the defaults, as we might not
2990
        # have a complete defaults list if the hypervisor wasn't
2991
        # enabled before
2992
        if hv not in new_hvp:
2993
          new_hvp[hv] = {}
2994
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2995
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2996
    else:
2997
      self.hv_list = cluster.enabled_hypervisors
2998

    
2999
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3000
      # either the enabled list has changed, or the parameters have, validate
3001
      for hv_name, hv_params in self.new_hvparams.items():
3002
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3003
            (self.op.enabled_hypervisors and
3004
             hv_name in self.op.enabled_hypervisors)):
3005
          # either this is a new hypervisor, or its parameters have changed
3006
          hv_class = hypervisor.GetHypervisor(hv_name)
3007
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3008
          hv_class.CheckParameterSyntax(hv_params)
3009
          _CheckHVParams(self, node_list, hv_name, hv_params)
3010

    
3011
    if self.op.os_hvp:
3012
      # no need to check any newly-enabled hypervisors, since the
3013
      # defaults have already been checked in the above code-block
3014
      for os_name, os_hvp in self.new_os_hvp.items():
3015
        for hv_name, hv_params in os_hvp.items():
3016
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3017
          # we need to fill in the new os_hvp on top of the actual hv_p
3018
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3019
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3020
          hv_class = hypervisor.GetHypervisor(hv_name)
3021
          hv_class.CheckParameterSyntax(new_osp)
3022
          _CheckHVParams(self, node_list, hv_name, new_osp)
3023

    
3024
    if self.op.default_iallocator:
3025
      alloc_script = utils.FindFile(self.op.default_iallocator,
3026
                                    constants.IALLOCATOR_SEARCH_PATH,
3027
                                    os.path.isfile)
3028
      if alloc_script is None:
3029
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3030
                                   " specified" % self.op.default_iallocator,
3031
                                   errors.ECODE_INVAL)
3032

    
3033
  def Exec(self, feedback_fn):
3034
    """Change the parameters of the cluster.
3035

3036
    """
3037
    if self.op.vg_name is not None:
3038
      new_volume = self.op.vg_name
3039
      if not new_volume:
3040
        new_volume = None
3041
      if new_volume != self.cfg.GetVGName():
3042
        self.cfg.SetVGName(new_volume)
3043
      else:
3044
        feedback_fn("Cluster LVM configuration already in desired"
3045
                    " state, not changing")
3046
    if self.op.drbd_helper is not None:
3047
      new_helper = self.op.drbd_helper
3048
      if not new_helper:
3049
        new_helper = None
3050
      if new_helper != self.cfg.GetDRBDHelper():
3051
        self.cfg.SetDRBDHelper(new_helper)
3052
      else:
3053
        feedback_fn("Cluster DRBD helper already in desired state,"
3054
                    " not changing")
3055
    if self.op.hvparams:
3056
      self.cluster.hvparams = self.new_hvparams
3057
    if self.op.os_hvp:
3058
      self.cluster.os_hvp = self.new_os_hvp
3059
    if self.op.enabled_hypervisors is not None:
3060
      self.cluster.hvparams = self.new_hvparams
3061
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3062
    if self.op.beparams:
3063
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3064
    if self.op.nicparams:
3065
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3066
    if self.op.osparams:
3067
      self.cluster.osparams = self.new_osp
3068
    if self.op.ndparams:
3069
      self.cluster.ndparams = self.new_ndparams
3070

    
3071
    if self.op.candidate_pool_size is not None:
3072
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3073
      # we need to update the pool size here, otherwise the save will fail
3074
      _AdjustCandidatePool(self, [])
3075

    
3076
    if self.op.maintain_node_health is not None:
3077
      self.cluster.maintain_node_health = self.op.maintain_node_health
3078

    
3079
    if self.op.prealloc_wipe_disks is not None:
3080
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3081

    
3082
    if self.op.add_uids is not None:
3083
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3084

    
3085
    if self.op.remove_uids is not None:
3086
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3087

    
3088
    if self.op.uid_pool is not None:
3089
      self.cluster.uid_pool = self.op.uid_pool
3090

    
3091
    if self.op.default_iallocator is not None:
3092
      self.cluster.default_iallocator = self.op.default_iallocator
3093

    
3094
    if self.op.reserved_lvs is not None:
3095
      self.cluster.reserved_lvs = self.op.reserved_lvs
3096

    
3097
    def helper_os(aname, mods, desc):
3098
      desc += " OS list"
3099
      lst = getattr(self.cluster, aname)
3100
      for key, val in mods:
3101
        if key == constants.DDM_ADD:
3102
          if val in lst:
3103
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3104
          else:
3105
            lst.append(val)
3106
        elif key == constants.DDM_REMOVE:
3107
          if val in lst:
3108
            lst.remove(val)
3109
          else:
3110
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3111
        else:
3112
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3113

    
3114
    if self.op.hidden_os:
3115
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3116

    
3117
    if self.op.blacklisted_os:
3118
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3119

    
3120
    if self.op.master_netdev:
3121
      master = self.cfg.GetMasterNode()
3122
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3123
                  self.cluster.master_netdev)
3124
      result = self.rpc.call_node_stop_master(master, False)
3125
      result.Raise("Could not disable the master ip")
3126
      feedback_fn("Changing master_netdev from %s to %s" %
3127
                  (self.cluster.master_netdev, self.op.master_netdev))
3128
      self.cluster.master_netdev = self.op.master_netdev
3129

    
3130
    self.cfg.Update(self.cluster, feedback_fn)
3131

    
3132
    if self.op.master_netdev:
3133
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3134
                  self.op.master_netdev)
3135
      result = self.rpc.call_node_start_master(master, False, False)
3136
      if result.fail_msg:
3137
        self.LogWarning("Could not re-enable the master ip on"
3138
                        " the master, please restart manually: %s",
3139
                        result.fail_msg)
3140

    
3141

    
3142
def _UploadHelper(lu, nodes, fname):
3143
  """Helper for uploading a file and showing warnings.
3144

3145
  """
3146
  if os.path.exists(fname):
3147
    result = lu.rpc.call_upload_file(nodes, fname)
3148
    for to_node, to_result in result.items():
3149
      msg = to_result.fail_msg
3150
      if msg:
3151
        msg = ("Copy of file %s to node %s failed: %s" %
3152
               (fname, to_node, msg))
3153
        lu.proc.LogWarning(msg)
3154

    
3155

    
3156
def _ComputeAncillaryFiles(cluster, redist):
3157
  """Compute files external to Ganeti which need to be consistent.
3158

3159
  @type redist: boolean
3160
  @param redist: Whether to include files which need to be redistributed
3161

3162
  """
3163
  # Compute files for all nodes
3164
  files_all = set([
3165
    constants.SSH_KNOWN_HOSTS_FILE,
3166
    constants.CONFD_HMAC_KEY,
3167
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3168
    ])
3169

    
3170
  if not redist:
3171
    files_all.update(constants.ALL_CERT_FILES)
3172
    files_all.update(ssconf.SimpleStore().GetFileList())
3173

    
3174
  if cluster.modify_etc_hosts:
3175
    files_all.add(constants.ETC_HOSTS)
3176

    
3177
  # Files which must either exist on all nodes or on none
3178
  files_all_opt = set([
3179
    constants.RAPI_USERS_FILE,
3180
    ])
3181

    
3182
  # Files which should only be on master candidates
3183
  files_mc = set()
3184
  if not redist:
3185
    files_mc.add(constants.CLUSTER_CONF_FILE)
3186

    
3187
  # Files which should only be on VM-capable nodes
3188
  files_vm = set(filename
3189
    for hv_name in cluster.enabled_hypervisors
3190
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3191

    
3192
  # Filenames must be unique
3193
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3194
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3195
         "Found file listed in more than one file list"
3196

    
3197
  return (files_all, files_all_opt, files_mc, files_vm)
3198

    
3199

    
3200
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3201
  """Distribute additional files which are part of the cluster configuration.
3202

3203
  ConfigWriter takes care of distributing the config and ssconf files, but
3204
  there are more files which should be distributed to all nodes. This function
3205
  makes sure those are copied.
3206

3207
  @param lu: calling logical unit
3208
  @param additional_nodes: list of nodes not in the config to distribute to
3209
  @type additional_vm: boolean
3210
  @param additional_vm: whether the additional nodes are vm-capable or not
3211

3212
  """
3213
  # Gather target nodes
3214
  cluster = lu.cfg.GetClusterInfo()
3215
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3216

    
3217
  online_nodes = lu.cfg.GetOnlineNodeList()
3218
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3219

    
3220
  if additional_nodes is not None:
3221
    online_nodes.extend(additional_nodes)
3222
    if additional_vm:
3223
      vm_nodes.extend(additional_nodes)
3224

    
3225
  # Never distribute to master node
3226
  for nodelist in [online_nodes, vm_nodes]:
3227
    if master_info.name in nodelist:
3228
      nodelist.remove(master_info.name)
3229

    
3230
  # Gather file lists
3231
  (files_all, files_all_opt, files_mc, files_vm) = \
3232
    _ComputeAncillaryFiles(cluster, True)
3233

    
3234
  # Never re-distribute configuration file from here
3235
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3236
              constants.CLUSTER_CONF_FILE in files_vm)
3237
  assert not files_mc, "Master candidates not handled in this function"
3238

    
3239
  filemap = [
3240
    (online_nodes, files_all),
3241
    (online_nodes, files_all_opt),
3242
    (vm_nodes, files_vm),
3243
    ]
3244

    
3245
  # Upload the files
3246
  for (node_list, files) in filemap:
3247
    for fname in files:
3248
      _UploadHelper(lu, node_list, fname)
3249

    
3250

    
3251
class LUClusterRedistConf(NoHooksLU):
3252
  """Force the redistribution of cluster configuration.
3253

3254
  This is a very simple LU.
3255

3256
  """
3257
  REQ_BGL = False
3258

    
3259
  def ExpandNames(self):
3260
    self.needed_locks = {
3261
      locking.LEVEL_NODE: locking.ALL_SET,
3262
    }
3263
    self.share_locks[locking.LEVEL_NODE] = 1
3264

    
3265
  def Exec(self, feedback_fn):
3266
    """Redistribute the configuration.
3267

3268
    """
3269
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3270
    _RedistributeAncillaryFiles(self)
3271

    
3272

    
3273
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3274
  """Sleep and poll for an instance's disk to sync.
3275

3276
  """
3277
  if not instance.disks or disks is not None and not disks:
3278
    return True
3279

    
3280
  disks = _ExpandCheckDisks(instance, disks)
3281

    
3282
  if not oneshot:
3283
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3284

    
3285
  node = instance.primary_node
3286

    
3287
  for dev in disks:
3288
    lu.cfg.SetDiskID(dev, node)
3289

    
3290
  # TODO: Convert to utils.Retry
3291

    
3292
  retries = 0
3293
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3294
  while True:
3295
    max_time = 0
3296
    done = True
3297
    cumul_degraded = False
3298
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3299
    msg = rstats.fail_msg
3300
    if msg:
3301
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3302
      retries += 1
3303
      if retries >= 10:
3304
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3305
                                 " aborting." % node)
3306
      time.sleep(6)
3307
      continue
3308
    rstats = rstats.payload
3309
    retries = 0
3310
    for i, mstat in enumerate(rstats):
3311
      if mstat is None:
3312
        lu.LogWarning("Can't compute data for node %s/%s",
3313
                           node, disks[i].iv_name)
3314
        continue
3315

    
3316
      cumul_degraded = (cumul_degraded or
3317
                        (mstat.is_degraded and mstat.sync_percent is None))
3318
      if mstat.sync_percent is not None:
3319
        done = False
3320
        if mstat.estimated_time is not None:
3321
          rem_time = ("%s remaining (estimated)" %
3322
                      utils.FormatSeconds(mstat.estimated_time))
3323
          max_time = mstat.estimated_time
3324
        else:
3325
          rem_time = "no time estimate"
3326
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3327
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3328

    
3329
    # if we're done but degraded, let's do a few small retries, to
3330
    # make sure we see a stable and not transient situation; therefore
3331
    # we force restart of the loop
3332
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3333
      logging.info("Degraded disks found, %d retries left", degr_retries)
3334
      degr_retries -= 1
3335
      time.sleep(1)
3336
      continue
3337

    
3338
    if done or oneshot:
3339
      break
3340

    
3341
    time.sleep(min(60, max_time))
3342

    
3343
  if done:
3344
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3345
  return not cumul_degraded
3346

    
3347

    
3348
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3349
  """Check that mirrors are not degraded.
3350

3351
  The ldisk parameter, if True, will change the test from the
3352
  is_degraded attribute (which represents overall non-ok status for
3353
  the device(s)) to the ldisk (representing the local storage status).
3354

3355
  """
3356
  lu.cfg.SetDiskID(dev, node)
3357

    
3358
  result = True
3359

    
3360
  if on_primary or dev.AssembleOnSecondary():
3361
    rstats = lu.rpc.call_blockdev_find(node, dev)
3362
    msg = rstats.fail_msg
3363
    if msg:
3364
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3365
      result = False
3366
    elif not rstats.payload:
3367
      lu.LogWarning("Can't find disk on node %s", node)
3368
      result = False
3369
    else:
3370
      if ldisk:
3371
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3372
      else:
3373
        result = result and not rstats.payload.is_degraded
3374

    
3375
  if dev.children:
3376
    for child in dev.children:
3377
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3378

    
3379
  return result
3380

    
3381

    
3382
class LUOobCommand(NoHooksLU):
3383
  """Logical unit for OOB handling.
3384

3385
  """
3386
  REG_BGL = False
3387
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3388

    
3389
  def CheckPrereq(self):
3390
    """Check prerequisites.
3391

3392
    This checks:
3393
     - the node exists in the configuration
3394
     - OOB is supported
3395

3396
    Any errors are signaled by raising errors.OpPrereqError.
3397

3398
    """
3399
    self.nodes = []
3400
    self.master_node = self.cfg.GetMasterNode()
3401

    
3402
    assert self.op.power_delay >= 0.0
3403

    
3404
    if self.op.node_names:
3405
      if self.op.command in self._SKIP_MASTER:
3406
        if self.master_node in self.op.node_names:
3407
          master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3408
          master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3409

    
3410
          if master_oob_handler:
3411
            additional_text = ("Run '%s %s %s' if you want to operate on the"
3412
                               " master regardless") % (master_oob_handler,
3413
                                                        self.op.command,
3414
                                                        self.master_node)
3415
          else:
3416
            additional_text = "The master node does not support out-of-band"
3417

    
3418
          raise errors.OpPrereqError(("Operating on the master node %s is not"
3419
                                      " allowed for %s\n%s") %
3420
                                     (self.master_node, self.op.command,
3421
                                      additional_text), errors.ECODE_INVAL)
3422
    else:
3423
      self.op.node_names = self.cfg.GetNodeList()
3424
      if self.op.command in self._SKIP_MASTER:
3425
        self.op.node_names.remove(self.master_node)
3426

    
3427
    if self.op.command in self._SKIP_MASTER:
3428
      assert self.master_node not in self.op.node_names
3429

    
3430
    for node_name in self.op.node_names:
3431
      node = self.cfg.GetNodeInfo(node_name)
3432

    
3433
      if node is None:
3434
        raise errors.OpPrereqError("Node %s not found" % node_name,
3435
                                   errors.ECODE_NOENT)
3436
      else:
3437
        self.nodes.append(node)
3438

    
3439
      if (not self.op.ignore_status and
3440
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3441
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3442
                                    " not marked offline") % node_name,
3443
                                   errors.ECODE_STATE)
3444

    
3445
  def ExpandNames(self):
3446
    """Gather locks we need.
3447

3448
    """
3449
    if self.op.node_names:
3450
      self.op.node_names = [_ExpandNodeName(self.cfg, name)
3451
                            for name in self.op.node_names]
3452
      lock_names = self.op.node_names
3453
    else:
3454
      lock_names = locking.ALL_SET
3455

    
3456
    self.needed_locks = {
3457
      locking.LEVEL_NODE: lock_names,
3458
      }
3459

    
3460
  def Exec(self, feedback_fn):
3461
    """Execute OOB and return result if we expect any.
3462

3463
    """
3464
    master_node = self.master_node
3465
    ret = []
3466

    
3467
    for idx, node in enumerate(self.nodes):
3468
      node_entry = [(constants.RS_NORMAL, node.name)]
3469
      ret.append(node_entry)
3470

    
3471
      oob_program = _SupportsOob(self.cfg, node)
3472

    
3473
      if not oob_program:
3474
        node_entry.append((constants.RS_UNAVAIL, None))
3475
        continue
3476

    
3477
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3478
                   self.op.command, oob_program, node.name)
3479
      result = self.rpc.call_run_oob(master_node, oob_program,
3480
                                     self.op.command, node.name,
3481
                                     self.op.timeout)
3482

    
3483
      if result.fail_msg:
3484
        self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3485
                        node.name, result.fail_msg)
3486
        node_entry.append((constants.RS_NODATA, None))
3487
      else:
3488
        try:
3489
          self._CheckPayload(result)
3490
        except errors.OpExecError, err:
3491
          self.LogWarning("The payload returned by '%s' is not valid: %s",
3492
                          node.name, err)
3493
          node_entry.append((constants.RS_NODATA, None))
3494
        else:
3495
          if self.op.command == constants.OOB_HEALTH:
3496
            # For health we should log important events
3497
            for item, status in result.payload:
3498
              if status in [constants.OOB_STATUS_WARNING,
3499
                            constants.OOB_STATUS_CRITICAL]:
3500
                self.LogWarning("On node '%s' item '%s' has status '%s'",
3501
                                node.name, item, status)
3502

    
3503
          if self.op.command == constants.OOB_POWER_ON:
3504
            node.powered = True
3505
          elif self.op.command == constants.OOB_POWER_OFF:
3506
            node.powered = False
3507
          elif self.op.command == constants.OOB_POWER_STATUS:
3508
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3509
            if powered != node.powered:
3510
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3511
                               " match actual power state (%s)"), node.powered,
3512
                              node.name, powered)
3513

    
3514
          # For configuration changing commands we should update the node
3515
          if self.op.command in (constants.OOB_POWER_ON,
3516
                                 constants.OOB_POWER_OFF):
3517
            self.cfg.Update(node, feedback_fn)
3518

    
3519
          node_entry.append((constants.RS_NORMAL, result.payload))
3520

    
3521
          if (self.op.command == constants.OOB_POWER_ON and
3522
              idx < len(self.nodes) - 1):
3523
            time.sleep(self.op.power_delay)
3524

    
3525
    return ret
3526

    
3527
  def _CheckPayload(self, result):
3528
    """Checks if the payload is valid.
3529

3530
    @param result: RPC result
3531
    @raises errors.OpExecError: If payload is not valid
3532

3533
    """
3534
    errs = []
3535
    if self.op.command == constants.OOB_HEALTH:
3536
      if not isinstance(result.payload, list):
3537
        errs.append("command 'health' is expected to return a list but got %s" %
3538
                    type(result.payload))
3539
      else:
3540
        for item, status in result.payload:
3541
          if status not in constants.OOB_STATUSES:
3542
            errs.append("health item '%s' has invalid status '%s'" %
3543
                        (item, status))
3544

    
3545
    if self.op.command == constants.OOB_POWER_STATUS:
3546
      if not isinstance(result.payload, dict):
3547
        errs.append("power-status is expected to return a dict but got %s" %
3548
                    type(result.payload))
3549

    
3550
    if self.op.command in [
3551
        constants.OOB_POWER_ON,
3552
        constants.OOB_POWER_OFF,
3553
        constants.OOB_POWER_CYCLE,
3554
        ]:
3555
      if result.payload is not None:
3556
        errs.append("%s is expected to not return payload but got '%s'" %
3557
                    (self.op.command, result.payload))
3558

    
3559
    if errs:
3560
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3561
                               utils.CommaJoin(errs))
3562

    
3563
class _OsQuery(_QueryBase):
3564
  FIELDS = query.OS_FIELDS
3565

    
3566
  def ExpandNames(self, lu):
3567
    # Lock all nodes in shared mode
3568
    # Temporary removal of locks, should be reverted later
3569
    # TODO: reintroduce locks when they are lighter-weight
3570
    lu.needed_locks = {}
3571
    #self.share_locks[locking.LEVEL_NODE] = 1
3572
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3573

    
3574
    # The following variables interact with _QueryBase._GetNames
3575
    if self.names:
3576
      self.wanted = self.names
3577
    else:
3578
      self.wanted = locking.ALL_SET
3579

    
3580
    self.do_locking = self.use_locking
3581

    
3582
  def DeclareLocks(self, lu, level):
3583
    pass
3584

    
3585
  @staticmethod
3586
  def _DiagnoseByOS(rlist):
3587
    """Remaps a per-node return list into an a per-os per-node dictionary
3588

3589
    @param rlist: a map with node names as keys and OS objects as values
3590

3591
    @rtype: dict
3592
    @return: a dictionary with osnames as keys and as value another
3593
        map, with nodes as keys and tuples of (path, status, diagnose,
3594
        variants, parameters, api_versions) as values, eg::
3595

3596
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3597
                                     (/srv/..., False, "invalid api")],
3598
                           "node2": [(/srv/..., True, "", [], [])]}
3599
          }
3600

3601
    """
3602
    all_os = {}
3603
    # we build here the list of nodes that didn't fail the RPC (at RPC
3604
    # level), so that nodes with a non-responding node daemon don't
3605
    # make all OSes invalid
3606
    good_nodes = [node_name for node_name in rlist
3607
                  if not rlist[node_name].fail_msg]
3608
    for node_name, nr in rlist.items():
3609
      if nr.fail_msg or not nr.payload:
3610
        continue
3611
      for (name, path, status, diagnose, variants,
3612
           params, api_versions) in nr.payload:
3613
        if name not in all_os:
3614
          # build a list of nodes for this os containing empty lists
3615
          # for each node in node_list
3616
          all_os[name] = {}
3617
          for nname in good_nodes:
3618
            all_os[name][nname] = []
3619
        # convert params from [name, help] to (name, help)
3620
        params = [tuple(v) for v in params]
3621
        all_os[name][node_name].append((path, status, diagnose,
3622
                                        variants, params, api_versions))
3623
    return all_os
3624

    
3625
  def _GetQueryData(self, lu):
3626
    """Computes the list of nodes and their attributes.
3627

3628
    """
3629
    # Locking is not used
3630
    assert not (lu.acquired_locks or self.do_locking or self.use_locking)
3631

    
3632
    valid_nodes = [node.name
3633
                   for node in lu.cfg.GetAllNodesInfo().values()
3634
                   if not node.offline and node.vm_capable]
3635
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3636
    cluster = lu.cfg.GetClusterInfo()
3637

    
3638
    data = {}
3639

    
3640
    for (os_name, os_data) in pol.items():
3641
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3642
                          hidden=(os_name in cluster.hidden_os),
3643
                          blacklisted=(os_name in cluster.blacklisted_os))
3644

    
3645
      variants = set()
3646
      parameters = set()
3647
      api_versions = set()
3648

    
3649
      for idx, osl in enumerate(os_data.values()):
3650
        info.valid = bool(info.valid and osl and osl[0][1])
3651
        if not info.valid:
3652
          break
3653

    
3654
        (node_variants, node_params, node_api) = osl[0][3:6]
3655
        if idx == 0:
3656
          # First entry
3657
          variants.update(node_variants)
3658
          parameters.update(node_params)
3659
          api_versions.update(node_api)
3660
        else:
3661
          # Filter out inconsistent values
3662
          variants.intersection_update(node_variants)
3663
          parameters.intersection_update(node_params)
3664
          api_versions.intersection_update(node_api)
3665

    
3666
      info.variants = list(variants)
3667
      info.parameters = list(parameters)
3668
      info.api_versions = list(api_versions)
3669

    
3670
      data[os_name] = info
3671

    
3672
    # Prepare data in requested order
3673
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3674
            if name in data]
3675

    
3676

    
3677
class LUOsDiagnose(NoHooksLU):
3678
  """Logical unit for OS diagnose/query.
3679

3680
  """
3681
  REQ_BGL = False
3682

    
3683
  @staticmethod
3684
  def _BuildFilter(fields, names):
3685
    """Builds a filter for querying OSes.
3686

3687
    """
3688
    name_filter = qlang.MakeSimpleFilter("name", names)
3689

    
3690
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3691
    # respective field is not requested
3692
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3693
                     for fname in ["hidden", "blacklisted"]
3694
                     if fname not in fields]
3695
    if "valid" not in fields:
3696
      status_filter.append([qlang.OP_TRUE, "valid"])
3697

    
3698
    if status_filter:
3699
      status_filter.insert(0, qlang.OP_AND)
3700
    else:
3701
      status_filter = None
3702

    
3703
    if name_filter and status_filter:
3704
      return [qlang.OP_AND, name_filter, status_filter]
3705
    elif name_filter:
3706
      return name_filter
3707
    else:
3708
      return status_filter
3709

    
3710
  def CheckArguments(self):
3711
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3712
                       self.op.output_fields, False)
3713

    
3714
  def ExpandNames(self):
3715
    self.oq.ExpandNames(self)
3716

    
3717
  def Exec(self, feedback_fn):
3718
    return self.oq.OldStyleQuery(self)
3719

    
3720

    
3721
class LUNodeRemove(LogicalUnit):
3722
  """Logical unit for removing a node.
3723

3724
  """
3725
  HPATH = "node-remove"
3726
  HTYPE = constants.HTYPE_NODE
3727

    
3728
  def BuildHooksEnv(self):
3729
    """Build hooks env.
3730

3731
    This doesn't run on the target node in the pre phase as a failed
3732
    node would then be impossible to remove.
3733

3734
    """
3735
    return {
3736
      "OP_TARGET": self.op.node_name,
3737
      "NODE_NAME": self.op.node_name,
3738
      }
3739

    
3740
  def BuildHooksNodes(self):
3741
    """Build hooks nodes.
3742

3743
    """
3744
    all_nodes = self.cfg.GetNodeList()
3745
    try:
3746
      all_nodes.remove(self.op.node_name)
3747
    except ValueError:
3748
      logging.warning("Node '%s', which is about to be removed, was not found"
3749
                      " in the list of all nodes", self.op.node_name)
3750
    return (all_nodes, all_nodes)
3751

    
3752
  def CheckPrereq(self):
3753
    """Check prerequisites.
3754

3755
    This checks:
3756
     - the node exists in the configuration
3757
     - it does not have primary or secondary instances
3758
     - it's not the master
3759

3760
    Any errors are signaled by raising errors.OpPrereqError.
3761

3762
    """
3763
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3764
    node = self.cfg.GetNodeInfo(self.op.node_name)
3765
    assert node is not None
3766

    
3767
    instance_list = self.cfg.GetInstanceList()
3768

    
3769
    masternode = self.cfg.GetMasterNode()
3770
    if node.name == masternode:
3771
      raise errors.OpPrereqError("Node is the master node,"
3772
                                 " you need to failover first.",
3773
                                 errors.ECODE_INVAL)
3774

    
3775
    for instance_name in instance_list:
3776
      instance = self.cfg.GetInstanceInfo(instance_name)
3777
      if node.name in instance.all_nodes:
3778
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3779
                                   " please remove first." % instance_name,
3780
                                   errors.ECODE_INVAL)
3781
    self.op.node_name = node.name
3782
    self.node = node
3783

    
3784
  def Exec(self, feedback_fn):
3785
    """Removes the node from the cluster.
3786

3787
    """
3788
    node = self.node
3789
    logging.info("Stopping the node daemon and removing configs from node %s",
3790
                 node.name)
3791

    
3792
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3793

    
3794
    # Promote nodes to master candidate as needed
3795
    _AdjustCandidatePool(self, exceptions=[node.name])
3796
    self.context.RemoveNode(node.name)
3797

    
3798
    # Run post hooks on the node before it's removed
3799
    _RunPostHook(self, node.name)
3800

    
3801
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3802
    msg = result.fail_msg
3803
    if msg:
3804
      self.LogWarning("Errors encountered on the remote node while leaving"
3805
                      " the cluster: %s", msg)
3806

    
3807
    # Remove node from our /etc/hosts
3808
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3809
      master_node = self.cfg.GetMasterNode()
3810
      result = self.rpc.call_etc_hosts_modify(master_node,
3811
                                              constants.ETC_HOSTS_REMOVE,
3812
                                              node.name, None)
3813
      result.Raise("Can't update hosts file with new host data")
3814
      _RedistributeAncillaryFiles(self)
3815

    
3816

    
3817
class _NodeQuery(_QueryBase):
3818
  FIELDS = query.NODE_FIELDS
3819

    
3820
  def ExpandNames(self, lu):
3821
    lu.needed_locks = {}
3822
    lu.share_locks[locking.LEVEL_NODE] = 1
3823

    
3824
    if self.names:
3825
      self.wanted = _GetWantedNodes(lu, self.names)
3826
    else:
3827
      self.wanted = locking.ALL_SET
3828

    
3829
    self.do_locking = (self.use_locking and
3830
                       query.NQ_LIVE in self.requested_data)
3831

    
3832
    if self.do_locking:
3833
      # if we don't request only static fields, we need to lock the nodes
3834
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3835

    
3836
  def DeclareLocks(self, lu, level):
3837
    pass
3838

    
3839
  def _GetQueryData(self, lu):
3840
    """Computes the list of nodes and their attributes.
3841

3842
    """
3843
    all_info = lu.cfg.GetAllNodesInfo()
3844

    
3845
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3846

    
3847
    # Gather data as requested
3848
    if query.NQ_LIVE in self.requested_data:
3849
      # filter out non-vm_capable nodes
3850
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3851

    
3852
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3853
                                        lu.cfg.GetHypervisorType())
3854
      live_data = dict((name, nresult.payload)
3855
                       for (name, nresult) in node_data.items()
3856
                       if not nresult.fail_msg and nresult.payload)
3857
    else:
3858
      live_data = None
3859

    
3860
    if query.NQ_INST in self.requested_data:
3861
      node_to_primary = dict([(name, set()) for name in nodenames])
3862
      node_to_secondary = dict([(name, set()) for name in nodenames])
3863

    
3864
      inst_data = lu.cfg.GetAllInstancesInfo()
3865

    
3866
      for inst in inst_data.values():
3867
        if inst.primary_node in node_to_primary:
3868
          node_to_primary[inst.primary_node].add(inst.name)
3869
        for secnode in inst.secondary_nodes:
3870
          if secnode in node_to_secondary:
3871
            node_to_secondary[secnode].add(inst.name)
3872
    else:
3873
      node_to_primary = None
3874
      node_to_secondary = None
3875

    
3876
    if query.NQ_OOB in self.requested_data:
3877
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3878
                         for name, node in all_info.iteritems())
3879
    else:
3880
      oob_support = None
3881

    
3882
    if query.NQ_GROUP in self.requested_data:
3883
      groups = lu.cfg.GetAllNodeGroupsInfo()
3884
    else:
3885
      groups = {}
3886

    
3887
    return query.NodeQueryData([all_info[name] for name in nodenames],
3888
                               live_data, lu.cfg.GetMasterNode(),
3889
                               node_to_primary, node_to_secondary, groups,
3890
                               oob_support, lu.cfg.GetClusterInfo())
3891

    
3892

    
3893
class LUNodeQuery(NoHooksLU):
3894
  """Logical unit for querying nodes.
3895

3896
  """
3897
  # pylint: disable-msg=W0142
3898
  REQ_BGL = False
3899

    
3900
  def CheckArguments(self):
3901
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3902
                         self.op.output_fields, self.op.use_locking)
3903

    
3904
  def ExpandNames(self):
3905
    self.nq.ExpandNames(self)
3906

    
3907
  def Exec(self, feedback_fn):
3908
    return self.nq.OldStyleQuery(self)
3909

    
3910

    
3911
class LUNodeQueryvols(NoHooksLU):
3912
  """Logical unit for getting volumes on node(s).
3913

3914
  """
3915
  REQ_BGL = False
3916
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3917
  _FIELDS_STATIC = utils.FieldSet("node")
3918

    
3919
  def CheckArguments(self):
3920
    _CheckOutputFields(static=self._FIELDS_STATIC,
3921
                       dynamic=self._FIELDS_DYNAMIC,
3922
                       selected=self.op.output_fields)
3923

    
3924
  def ExpandNames(self):
3925
    self.needed_locks = {}
3926
    self.share_locks[locking.LEVEL_NODE] = 1
3927
    if not self.op.nodes:
3928
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3929
    else:
3930
      self.needed_locks[locking.LEVEL_NODE] = \
3931
        _GetWantedNodes(self, self.op.nodes)
3932

    
3933
  def Exec(self, feedback_fn):
3934
    """Computes the list of nodes and their attributes.
3935

3936
    """
3937
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3938
    volumes = self.rpc.call_node_volumes(nodenames)
3939

    
3940
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3941
             in self.cfg.GetInstanceList()]
3942

    
3943
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3944

    
3945
    output = []
3946
    for node in nodenames:
3947
      nresult = volumes[node]
3948
      if nresult.offline:
3949
        continue
3950
      msg = nresult.fail_msg
3951
      if msg:
3952
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3953
        continue
3954

    
3955
      node_vols = nresult.payload[:]
3956
      node_vols.sort(key=lambda vol: vol['dev'])
3957

    
3958
      for vol in node_vols:
3959
        node_output = []
3960
        for field in self.op.output_fields:
3961
          if field == "node":
3962
            val = node
3963
          elif field == "phys":
3964
            val = vol['dev']
3965
          elif field == "vg":
3966
            val = vol['vg']
3967
          elif field == "name":
3968
            val = vol['name']
3969
          elif field == "size":
3970
            val = int(float(vol['size']))
3971
          elif field == "instance":
3972
            for inst in ilist:
3973
              if node not in lv_by_node[inst]:
3974
                continue
3975
              if vol['name'] in lv_by_node[inst][node]:
3976
                val = inst.name
3977
                break
3978
            else:
3979
              val = '-'
3980
          else:
3981
            raise errors.ParameterError(field)
3982
          node_output.append(str(val))
3983

    
3984
        output.append(node_output)
3985

    
3986
    return output
3987

    
3988

    
3989
class LUNodeQueryStorage(NoHooksLU):
3990
  """Logical unit for getting information on storage units on node(s).
3991

3992
  """
3993
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3994
  REQ_BGL = False
3995

    
3996
  def CheckArguments(self):
3997
    _CheckOutputFields(static=self._FIELDS_STATIC,
3998
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3999
                       selected=self.op.output_fields)
4000

    
4001
  def ExpandNames(self):
4002
    self.needed_locks = {}
4003
    self.share_locks[locking.LEVEL_NODE] = 1
4004

    
4005
    if self.op.nodes:
4006
      self.needed_locks[locking.LEVEL_NODE] = \
4007
        _GetWantedNodes(self, self.op.nodes)
4008
    else:
4009
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4010

    
4011
  def Exec(self, feedback_fn):
4012
    """Computes the list of nodes and their attributes.
4013

4014
    """
4015
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
4016

    
4017
    # Always get name to sort by
4018
    if constants.SF_NAME in self.op.output_fields:
4019
      fields = self.op.output_fields[:]
4020
    else:
4021
      fields = [constants.SF_NAME] + self.op.output_fields
4022

    
4023
    # Never ask for node or type as it's only known to the LU
4024
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4025
      while extra in fields:
4026
        fields.remove(extra)
4027

    
4028
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4029
    name_idx = field_idx[constants.SF_NAME]
4030

    
4031
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4032
    data = self.rpc.call_storage_list(self.nodes,
4033
                                      self.op.storage_type, st_args,
4034
                                      self.op.name, fields)
4035

    
4036
    result = []
4037

    
4038
    for node in utils.NiceSort(self.nodes):
4039
      nresult = data[node]
4040
      if nresult.offline:
4041
        continue
4042

    
4043
      msg = nresult.fail_msg
4044
      if msg:
4045
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4046
        continue
4047

    
4048
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4049

    
4050
      for name in utils.NiceSort(rows.keys()):
4051
        row = rows[name]
4052

    
4053
        out = []
4054

    
4055
        for field in self.op.output_fields:
4056
          if field == constants.SF_NODE:
4057
            val = node
4058
          elif field == constants.SF_TYPE:
4059
            val = self.op.storage_type
4060
          elif field in field_idx:
4061
            val = row[field_idx[field]]
4062
          else:
4063
            raise errors.ParameterError(field)
4064

    
4065
          out.append(val)
4066

    
4067
        result.append(out)
4068

    
4069
    return result
4070

    
4071

    
4072
class _InstanceQuery(_QueryBase):
4073
  FIELDS = query.INSTANCE_FIELDS
4074

    
4075
  def ExpandNames(self, lu):
4076
    lu.needed_locks = {}
4077
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
4078
    lu.share_locks[locking.LEVEL_NODE] = 1
4079

    
4080
    if self.names:
4081
      self.wanted = _GetWantedInstances(lu, self.names)
4082
    else:
4083
      self.wanted = locking.ALL_SET
4084

    
4085
    self.do_locking = (self.use_locking and
4086
                       query.IQ_LIVE in self.requested_data)
4087
    if self.do_locking:
4088
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4089
      lu.needed_locks[locking.LEVEL_NODE] = []
4090
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4091

    
4092
  def DeclareLocks(self, lu, level):
4093
    if level == locking.LEVEL_NODE and self.do_locking:
4094
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
4095

    
4096
  def _GetQueryData(self, lu):
4097
    """Computes the list of instances and their attributes.
4098

4099
    """
4100
    cluster = lu.cfg.GetClusterInfo()
4101
    all_info = lu.cfg.GetAllInstancesInfo()
4102

    
4103
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4104

    
4105
    instance_list = [all_info[name] for name in instance_names]
4106
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4107
                                        for inst in instance_list)))
4108
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4109
    bad_nodes = []
4110
    offline_nodes = []
4111
    wrongnode_inst = set()
4112

    
4113
    # Gather data as requested
4114
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4115
      live_data = {}
4116
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4117
      for name in nodes:
4118
        result = node_data[name]
4119
        if result.offline:
4120
          # offline nodes will be in both lists
4121
          assert result.fail_msg
4122
          offline_nodes.append(name)
4123
        if result.fail_msg:
4124
          bad_nodes.append(name)
4125
        elif result.payload:
4126
          for inst in result.payload:
4127
            if all_info[inst].primary_node == name:
4128
              live_data.update(result.payload)
4129
            else:
4130
              wrongnode_inst.add(inst)
4131
        # else no instance is alive
4132
    else:
4133
      live_data = {}
4134

    
4135
    if query.IQ_DISKUSAGE in self.requested_data:
4136
      disk_usage = dict((inst.name,
4137
                         _ComputeDiskSize(inst.disk_template,
4138
                                          [{constants.IDISK_SIZE: disk.size}
4139
                                           for disk in inst.disks]))
4140
                        for inst in instance_list)
4141
    else:
4142
      disk_usage = None
4143

    
4144
    if query.IQ_CONSOLE in self.requested_data:
4145
      consinfo = {}
4146
      for inst in instance_list:
4147
        if inst.name in live_data:
4148
          # Instance is running
4149
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4150
        else:
4151
          consinfo[inst.name] = None
4152
      assert set(consinfo.keys()) == set(instance_names)
4153
    else:
4154
      consinfo = None
4155

    
4156
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4157
                                   disk_usage, offline_nodes, bad_nodes,
4158
                                   live_data, wrongnode_inst, consinfo)
4159

    
4160

    
4161
class LUQuery(NoHooksLU):
4162
  """Query for resources/items of a certain kind.
4163

4164
  """
4165
  # pylint: disable-msg=W0142
4166
  REQ_BGL = False
4167

    
4168
  def CheckArguments(self):
4169
    qcls = _GetQueryImplementation(self.op.what)
4170

    
4171
    self.impl = qcls(self.op.filter, self.op.fields, False)
4172

    
4173
  def ExpandNames(self):
4174
    self.impl.ExpandNames(self)
4175

    
4176
  def DeclareLocks(self, level):
4177
    self.impl.DeclareLocks(self, level)
4178

    
4179
  def Exec(self, feedback_fn):
4180
    return self.impl.NewStyleQuery(self)
4181

    
4182

    
4183
class LUQueryFields(NoHooksLU):
4184
  """Query for resources/items of a certain kind.
4185

4186
  """
4187
  # pylint: disable-msg=W0142
4188
  REQ_BGL = False
4189

    
4190
  def CheckArguments(self):
4191
    self.qcls = _GetQueryImplementation(self.op.what)
4192

    
4193
  def ExpandNames(self):
4194
    self.needed_locks = {}
4195

    
4196
  def Exec(self, feedback_fn):
4197
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4198

    
4199

    
4200
class LUNodeModifyStorage(NoHooksLU):
4201
  """Logical unit for modifying a storage volume on a node.
4202

4203
  """
4204
  REQ_BGL = False
4205

    
4206
  def CheckArguments(self):
4207
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4208

    
4209
    storage_type = self.op.storage_type
4210

    
4211
    try:
4212
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4213
    except KeyError:
4214
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4215
                                 " modified" % storage_type,
4216
                                 errors.ECODE_INVAL)
4217

    
4218
    diff = set(self.op.changes.keys()) - modifiable
4219
    if diff:
4220
      raise errors.OpPrereqError("The following fields can not be modified for"
4221
                                 " storage units of type '%s': %r" %
4222
                                 (storage_type, list(diff)),
4223
                                 errors.ECODE_INVAL)
4224

    
4225
  def ExpandNames(self):
4226
    self.needed_locks = {
4227
      locking.LEVEL_NODE: self.op.node_name,
4228
      }
4229

    
4230
  def Exec(self, feedback_fn):
4231
    """Computes the list of nodes and their attributes.
4232

4233
    """
4234
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4235
    result = self.rpc.call_storage_modify(self.op.node_name,
4236
                                          self.op.storage_type, st_args,
4237
                                          self.op.name, self.op.changes)
4238
    result.Raise("Failed to modify storage unit '%s' on %s" %
4239
                 (self.op.name, self.op.node_name))
4240

    
4241

    
4242
class LUNodeAdd(LogicalUnit):
4243
  """Logical unit for adding node to the cluster.
4244

4245
  """
4246
  HPATH = "node-add"
4247
  HTYPE = constants.HTYPE_NODE
4248
  _NFLAGS = ["master_capable", "vm_capable"]
4249

    
4250
  def CheckArguments(self):
4251
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4252
    # validate/normalize the node name
4253
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4254
                                         family=self.primary_ip_family)
4255
    self.op.node_name = self.hostname.name
4256
    if self.op.readd and self.op.group:
4257
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4258
                                 " being readded", errors.ECODE_INVAL)
4259

    
4260
  def BuildHooksEnv(self):
4261
    """Build hooks env.
4262

4263
    This will run on all nodes before, and on all nodes + the new node after.
4264

4265
    """
4266
    return {
4267
      "OP_TARGET": self.op.node_name,
4268
      "NODE_NAME": self.op.node_name,
4269
      "NODE_PIP": self.op.primary_ip,
4270
      "NODE_SIP": self.op.secondary_ip,
4271
      "MASTER_CAPABLE": str(self.op.master_capable),
4272
      "VM_CAPABLE": str(self.op.vm_capable),
4273
      }
4274

    
4275
  def BuildHooksNodes(self):
4276
    """Build hooks nodes.
4277

4278
    """
4279
    # Exclude added node
4280
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4281
    post_nodes = pre_nodes + [self.op.node_name, ]
4282

    
4283
    return (pre_nodes, post_nodes)
4284

    
4285
  def CheckPrereq(self):
4286
    """Check prerequisites.
4287

4288
    This checks:
4289
     - the new node is not already in the config
4290
     - it is resolvable
4291
     - its parameters (single/dual homed) matches the cluster
4292

4293
    Any errors are signaled by raising errors.OpPrereqError.
4294

4295
    """
4296
    cfg = self.cfg
4297
    hostname = self.hostname
4298
    node = hostname.name
4299
    primary_ip = self.op.primary_ip = hostname.ip
4300
    if self.op.secondary_ip is None:
4301
      if self.primary_ip_family == netutils.IP6Address.family:
4302
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4303
                                   " IPv4 address must be given as secondary",
4304
                                   errors.ECODE_INVAL)
4305
      self.op.secondary_ip = primary_ip
4306

    
4307
    secondary_ip = self.op.secondary_ip
4308
    if not netutils.IP4Address.IsValid(secondary_ip):
4309
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4310
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4311

    
4312
    node_list = cfg.GetNodeList()
4313
    if not self.op.readd and node in node_list:
4314
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4315
                                 node, errors.ECODE_EXISTS)
4316
    elif self.op.readd and node not in node_list:
4317
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4318
                                 errors.ECODE_NOENT)
4319

    
4320
    self.changed_primary_ip = False
4321

    
4322
    for existing_node_name in node_list:
4323
      existing_node = cfg.GetNodeInfo(existing_node_name)
4324

    
4325
      if self.op.readd and node == existing_node_name:
4326
        if existing_node.secondary_ip != secondary_ip:
4327
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4328
                                     " address configuration as before",
4329
                                     errors.ECODE_INVAL)
4330
        if existing_node.primary_ip != primary_ip:
4331
          self.changed_primary_ip = True
4332

    
4333
        continue
4334

    
4335
      if (existing_node.primary_ip == primary_ip or
4336
          existing_node.secondary_ip == primary_ip or
4337
          existing_node.primary_ip == secondary_ip or
4338
          existing_node.secondary_ip == secondary_ip):
4339
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4340
                                   " existing node %s" % existing_node.name,
4341
                                   errors.ECODE_NOTUNIQUE)
4342

    
4343
    # After this 'if' block, None is no longer a valid value for the
4344
    # _capable op attributes
4345
    if self.op.readd:
4346
      old_node = self.cfg.GetNodeInfo(node)
4347
      assert old_node is not None, "Can't retrieve locked node %s" % node
4348
      for attr in self._NFLAGS:
4349
        if getattr(self.op, attr) is None:
4350
          setattr(self.op, attr, getattr(old_node, attr))
4351
    else:
4352
      for attr in self._NFLAGS:
4353
        if getattr(self.op, attr) is None:
4354
          setattr(self.op, attr, True)
4355

    
4356
    if self.op.readd and not self.op.vm_capable:
4357
      pri, sec = cfg.GetNodeInstances(node)
4358
      if pri or sec:
4359
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4360
                                   " flag set to false, but it already holds"
4361
                                   " instances" % node,
4362
                                   errors.ECODE_STATE)
4363

    
4364
    # check that the type of the node (single versus dual homed) is the
4365
    # same as for the master
4366
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4367
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4368
    newbie_singlehomed = secondary_ip == primary_ip
4369
    if master_singlehomed != newbie_singlehomed:
4370
      if master_singlehomed:
4371
        raise errors.OpPrereqError("The master has no secondary ip but the"
4372
                                   " new node has one",
4373
                                   errors.ECODE_INVAL)
4374
      else:
4375
        raise errors.OpPrereqError("The master has a secondary ip but the"
4376
                                   " new node doesn't have one",
4377
                                   errors.ECODE_INVAL)
4378

    
4379
    # checks reachability
4380
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4381
      raise errors.OpPrereqError("Node not reachable by ping",
4382
                                 errors.ECODE_ENVIRON)
4383

    
4384
    if not newbie_singlehomed:
4385
      # check reachability from my secondary ip to newbie's secondary ip
4386
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4387
                           source=myself.secondary_ip):
4388
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4389
                                   " based ping to node daemon port",
4390
                                   errors.ECODE_ENVIRON)
4391

    
4392
    if self.op.readd:
4393
      exceptions = [node]
4394
    else:
4395
      exceptions = []
4396

    
4397
    if self.op.master_capable:
4398
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4399
    else:
4400
      self.master_candidate = False
4401

    
4402
    if self.op.readd:
4403
      self.new_node = old_node
4404
    else:
4405
      node_group = cfg.LookupNodeGroup(self.op.group)
4406
      self.new_node = objects.Node(name=node,
4407
                                   primary_ip=primary_ip,
4408
                                   secondary_ip=secondary_ip,
4409
                                   master_candidate=self.master_candidate,
4410
                                   offline=False, drained=False,
4411
                                   group=node_group)
4412

    
4413
    if self.op.ndparams:
4414
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4415

    
4416
  def Exec(self, feedback_fn):
4417
    """Adds the new node to the cluster.
4418

4419
    """
4420
    new_node = self.new_node
4421
    node = new_node.name
4422

    
4423
    # We adding a new node so we assume it's powered
4424
    new_node.powered = True
4425

    
4426
    # for re-adds, reset the offline/drained/master-candidate flags;
4427
    # we need to reset here, otherwise offline would prevent RPC calls
4428
    # later in the procedure; this also means that if the re-add
4429
    # fails, we are left with a non-offlined, broken node
4430
    if self.op.readd:
4431
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4432
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4433
      # if we demote the node, we do cleanup later in the procedure
4434
      new_node.master_candidate = self.master_candidate
4435
      if self.changed_primary_ip:
4436
        new_node.primary_ip = self.op.primary_ip
4437

    
4438
    # copy the master/vm_capable flags
4439
    for attr in self._NFLAGS:
4440
      setattr(new_node, attr, getattr(self.op, attr))
4441

    
4442
    # notify the user about any possible mc promotion
4443
    if new_node.master_candidate:
4444
      self.LogInfo("Node will be a master candidate")
4445

    
4446
    if self.op.ndparams:
4447
      new_node.ndparams = self.op.ndparams
4448
    else:
4449
      new_node.ndparams = {}
4450

    
4451
    # check connectivity
4452
    result = self.rpc.call_version([node])[node]
4453
    result.Raise("Can't get version information from node %s" % node)
4454
    if constants.PROTOCOL_VERSION == result.payload:
4455
      logging.info("Communication to node %s fine, sw version %s match",
4456
                   node, result.payload)
4457
    else:
4458
      raise errors.OpExecError("Version mismatch master version %s,"
4459
                               " node version %s" %
4460
                               (constants.PROTOCOL_VERSION, result.payload))
4461

    
4462
    # Add node to our /etc/hosts, and add key to known_hosts
4463
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4464
      master_node = self.cfg.GetMasterNode()
4465
      result = self.rpc.call_etc_hosts_modify(master_node,
4466
                                              constants.ETC_HOSTS_ADD,
4467
                                              self.hostname.name,
4468
                                              self.hostname.ip)
4469
      result.Raise("Can't update hosts file with new host data")
4470

    
4471
    if new_node.secondary_ip != new_node.primary_ip:
4472
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4473
                               False)
4474

    
4475
    node_verify_list = [self.cfg.GetMasterNode()]
4476
    node_verify_param = {
4477
      constants.NV_NODELIST: [node],
4478
      # TODO: do a node-net-test as well?
4479
    }
4480

    
4481
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4482
                                       self.cfg.GetClusterName())
4483
    for verifier in node_verify_list:
4484
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4485
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4486
      if nl_payload:
4487
        for failed in nl_payload:
4488
          feedback_fn("ssh/hostname verification failed"
4489
                      " (checking from %s): %s" %
4490
                      (verifier, nl_payload[failed]))
4491
        raise errors.OpExecError("ssh/hostname verification failed.")
4492

    
4493
    if self.op.readd:
4494
      _RedistributeAncillaryFiles(self)
4495
      self.context.ReaddNode(new_node)
4496
      # make sure we redistribute the config
4497
      self.cfg.Update(new_node, feedback_fn)
4498
      # and make sure the new node will not have old files around
4499
      if not new_node.master_candidate:
4500
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4501
        msg = result.fail_msg
4502
        if msg:
4503
          self.LogWarning("Node failed to demote itself from master"
4504
                          " candidate status: %s" % msg)
4505
    else:
4506
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4507
                                  additional_vm=self.op.vm_capable)
4508
      self.context.AddNode(new_node, self.proc.GetECId())
4509

    
4510

    
4511
class LUNodeSetParams(LogicalUnit):
4512
  """Modifies the parameters of a node.
4513

4514
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4515
      to the node role (as _ROLE_*)
4516
  @cvar _R2F: a dictionary from node role to tuples of flags
4517
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4518

4519
  """
4520
  HPATH = "node-modify"
4521
  HTYPE = constants.HTYPE_NODE
4522
  REQ_BGL = False
4523
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4524
  _F2R = {
4525
    (True, False, False): _ROLE_CANDIDATE,
4526
    (False, True, False): _ROLE_DRAINED,
4527
    (False, False, True): _ROLE_OFFLINE,
4528
    (False, False, False): _ROLE_REGULAR,
4529
    }
4530
  _R2F = dict((v, k) for k, v in _F2R.items())
4531
  _FLAGS = ["master_candidate", "drained", "offline"]
4532

    
4533
  def CheckArguments(self):
4534
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4535
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4536
                self.op.master_capable, self.op.vm_capable,
4537
                self.op.secondary_ip, self.op.ndparams]
4538
    if all_mods.count(None) == len(all_mods):
4539
      raise errors.OpPrereqError("Please pass at least one modification",
4540
                                 errors.ECODE_INVAL)
4541
    if all_mods.count(True) > 1:
4542
      raise errors.OpPrereqError("Can't set the node into more than one"
4543
                                 " state at the same time",
4544
                                 errors.ECODE_INVAL)
4545

    
4546
    # Boolean value that tells us whether we might be demoting from MC
4547
    self.might_demote = (self.op.master_candidate == False or
4548
                         self.op.offline == True or
4549
                         self.op.drained == True or
4550
                         self.op.master_capable == False)
4551

    
4552
    if self.op.secondary_ip:
4553
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4554
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4555
                                   " address" % self.op.secondary_ip,
4556
                                   errors.ECODE_INVAL)
4557

    
4558
    self.lock_all = self.op.auto_promote and self.might_demote
4559
    self.lock_instances = self.op.secondary_ip is not None
4560

    
4561
  def ExpandNames(self):
4562
    if self.lock_all:
4563
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4564
    else:
4565
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4566

    
4567
    if self.lock_instances:
4568
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4569

    
4570
  def DeclareLocks(self, level):
4571
    # If we have locked all instances, before waiting to lock nodes, release
4572
    # all the ones living on nodes unrelated to the current operation.
4573
    if level == locking.LEVEL_NODE and self.lock_instances:
4574
      instances_release = []
4575
      instances_keep = []
4576
      self.affected_instances = []
4577
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4578
        for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4579
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4580
          i_mirrored = instance.disk_template in constants.DTS_INT_MIRROR
4581
          if i_mirrored and self.op.node_name in instance.all_nodes:
4582
            instances_keep.append(instance_name)
4583
            self.affected_instances.append(instance)
4584
          else:
4585
            instances_release.append(instance_name)
4586
        if instances_release:
4587
          self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4588
          self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4589

    
4590
  def BuildHooksEnv(self):
4591
    """Build hooks env.
4592

4593
    This runs on the master node.
4594

4595
    """
4596
    return {
4597
      "OP_TARGET": self.op.node_name,
4598
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4599
      "OFFLINE": str(self.op.offline),
4600
      "DRAINED": str(self.op.drained),
4601
      "MASTER_CAPABLE": str(self.op.master_capable),
4602
      "VM_CAPABLE": str(self.op.vm_capable),
4603
      }
4604

    
4605
  def BuildHooksNodes(self):
4606
    """Build hooks nodes.
4607

4608
    """
4609
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
4610
    return (nl, nl)
4611

    
4612
  def CheckPrereq(self):
4613
    """Check prerequisites.
4614

4615
    This only checks the instance list against the existing names.
4616

4617
    """
4618
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4619

    
4620
    if (self.op.master_candidate is not None or
4621
        self.op.drained is not None or
4622
        self.op.offline is not None):
4623
      # we can't change the master's node flags
4624
      if self.op.node_name == self.cfg.GetMasterNode():
4625
        raise errors.OpPrereqError("The master role can be changed"
4626
                                   " only via master-failover",
4627
                                   errors.ECODE_INVAL)
4628

    
4629
    if self.op.master_candidate and not node.master_capable:
4630
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4631
                                 " it a master candidate" % node.name,
4632
                                 errors.ECODE_STATE)
4633

    
4634
    if self.op.vm_capable == False:
4635
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4636
      if ipri or isec:
4637
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4638
                                   " the vm_capable flag" % node.name,
4639
                                   errors.ECODE_STATE)
4640

    
4641
    if node.master_candidate and self.might_demote and not self.lock_all:
4642
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
4643
      # check if after removing the current node, we're missing master
4644
      # candidates
4645
      (mc_remaining, mc_should, _) = \
4646
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4647
      if mc_remaining < mc_should:
4648
        raise errors.OpPrereqError("Not enough master candidates, please"
4649
                                   " pass auto promote option to allow"
4650
                                   " promotion", errors.ECODE_STATE)
4651

    
4652
    self.old_flags = old_flags = (node.master_candidate,
4653
                                  node.drained, node.offline)
4654
    assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4655
    self.old_role = old_role = self._F2R[old_flags]
4656

    
4657
    # Check for ineffective changes
4658
    for attr in self._FLAGS:
4659
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4660
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4661
        setattr(self.op, attr, None)
4662

    
4663
    # Past this point, any flag change to False means a transition
4664
    # away from the respective state, as only real changes are kept
4665

    
4666
    # TODO: We might query the real power state if it supports OOB
4667
    if _SupportsOob(self.cfg, node):
4668
      if self.op.offline is False and not (node.powered or
4669
                                           self.op.powered == True):
4670
        raise errors.OpPrereqError(("Please power on node %s first before you"
4671
                                    " can reset offline state") %
4672
                                   self.op.node_name)
4673
    elif self.op.powered is not None:
4674
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
4675
                                  " which does not support out-of-band"
4676
                                  " handling") % self.op.node_name)
4677

    
4678
    # If we're being deofflined/drained, we'll MC ourself if needed
4679
    if (self.op.drained == False or self.op.offline == False or
4680
        (self.op.master_capable and not node.master_capable)):
4681
      if _DecideSelfPromotion(self):
4682
        self.op.master_candidate = True
4683
        self.LogInfo("Auto-promoting node to master candidate")
4684

    
4685
    # If we're no longer master capable, we'll demote ourselves from MC
4686
    if self.op.master_capable == False and node.master_candidate:
4687
      self.LogInfo("Demoting from master candidate")
4688
      self.op.master_candidate = False
4689

    
4690
    # Compute new role
4691
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4692
    if self.op.master_candidate:
4693
      new_role = self._ROLE_CANDIDATE
4694
    elif self.op.drained:
4695
      new_role = self._ROLE_DRAINED
4696
    elif self.op.offline:
4697
      new_role = self._ROLE_OFFLINE
4698
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4699
      # False is still in new flags, which means we're un-setting (the
4700
      # only) True flag
4701
      new_role = self._ROLE_REGULAR
4702
    else: # no new flags, nothing, keep old role
4703
      new_role = old_role
4704

    
4705
    self.new_role = new_role
4706

    
4707
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4708
      # Trying to transition out of offline status
4709
      result = self.rpc.call_version([node.name])[node.name]
4710
      if result.fail_msg:
4711
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4712
                                   " to report its version: %s" %
4713
                                   (node.name, result.fail_msg),
4714
                                   errors.ECODE_STATE)
4715
      else:
4716
        self.LogWarning("Transitioning node from offline to online state"
4717
                        " without using re-add. Please make sure the node"
4718
                        " is healthy!")
4719

    
4720
    if self.op.secondary_ip:
4721
      # Ok even without locking, because this can't be changed by any LU
4722
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4723
      master_singlehomed = master.secondary_ip == master.primary_ip
4724
      if master_singlehomed and self.op.secondary_ip:
4725
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4726
                                   " homed cluster", errors.ECODE_INVAL)
4727

    
4728
      if node.offline:
4729
        if self.affected_instances:
4730
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4731
                                     " node has instances (%s) configured"
4732
                                     " to use it" % self.affected_instances)
4733
      else:
4734
        # On online nodes, check that no instances are running, and that
4735
        # the node has the new ip and we can reach it.
4736
        for instance in self.affected_instances:
4737
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4738

    
4739
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4740
        if master.name != node.name:
4741
          # check reachability from master secondary ip to new secondary ip
4742
          if not netutils.TcpPing(self.op.secondary_ip,
4743
                                  constants.DEFAULT_NODED_PORT,
4744
                                  source=master.secondary_ip):
4745
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4746
                                       " based ping to node daemon port",
4747
                                       errors.ECODE_ENVIRON)
4748

    
4749
    if self.op.ndparams:
4750
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4751
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4752
      self.new_ndparams = new_ndparams
4753

    
4754
  def Exec(self, feedback_fn):
4755
    """Modifies a node.
4756

4757
    """
4758
    node = self.node
4759
    old_role = self.old_role
4760
    new_role = self.new_role
4761

    
4762
    result = []
4763

    
4764
    if self.op.ndparams:
4765
      node.ndparams = self.new_ndparams
4766

    
4767
    if self.op.powered is not None:
4768
      node.powered = self.op.powered
4769

    
4770
    for attr in ["master_capable", "vm_capable"]:
4771
      val = getattr(self.op, attr)
4772
      if val is not None:
4773
        setattr(node, attr, val)
4774
        result.append((attr, str(val)))
4775

    
4776
    if new_role != old_role:
4777
      # Tell the node to demote itself, if no longer MC and not offline
4778
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4779
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4780
        if msg:
4781
          self.LogWarning("Node failed to demote itself: %s", msg)
4782

    
4783
      new_flags = self._R2F[new_role]
4784
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4785
        if of != nf:
4786
          result.append((desc, str(nf)))
4787
      (node.master_candidate, node.drained, node.offline) = new_flags
4788

    
4789
      # we locked all nodes, we adjust the CP before updating this node
4790
      if self.lock_all:
4791
        _AdjustCandidatePool(self, [node.name])
4792

    
4793
    if self.op.secondary_ip:
4794
      node.secondary_ip = self.op.secondary_ip
4795
      result.append(("secondary_ip", self.op.secondary_ip))
4796

    
4797
    # this will trigger configuration file update, if needed
4798
    self.cfg.Update(node, feedback_fn)
4799

    
4800
    # this will trigger job queue propagation or cleanup if the mc
4801
    # flag changed
4802
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4803
      self.context.ReaddNode(node)
4804

    
4805
    return result
4806

    
4807

    
4808
class LUNodePowercycle(NoHooksLU):
4809
  """Powercycles a node.
4810

4811
  """
4812
  REQ_BGL = False
4813

    
4814
  def CheckArguments(self):
4815
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4816
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4817
      raise errors.OpPrereqError("The node is the master and the force"
4818
                                 " parameter was not set",
4819
                                 errors.ECODE_INVAL)
4820

    
4821
  def ExpandNames(self):
4822
    """Locking for PowercycleNode.
4823

4824
    This is a last-resort option and shouldn't block on other
4825
    jobs. Therefore, we grab no locks.
4826

4827
    """
4828
    self.needed_locks = {}
4829

    
4830
  def Exec(self, feedback_fn):
4831
    """Reboots a node.
4832

4833
    """
4834
    result = self.rpc.call_node_powercycle(self.op.node_name,
4835
                                           self.cfg.GetHypervisorType())
4836
    result.Raise("Failed to schedule the reboot")
4837
    return result.payload
4838

    
4839

    
4840
class LUClusterQuery(NoHooksLU):
4841
  """Query cluster configuration.
4842

4843
  """
4844
  REQ_BGL = False
4845

    
4846
  def ExpandNames(self):
4847
    self.needed_locks = {}
4848

    
4849
  def Exec(self, feedback_fn):
4850
    """Return cluster config.
4851

4852
    """
4853
    cluster = self.cfg.GetClusterInfo()
4854
    os_hvp = {}
4855

    
4856
    # Filter just for enabled hypervisors
4857
    for os_name, hv_dict in cluster.os_hvp.items():
4858
      os_hvp[os_name] = {}
4859
      for hv_name, hv_params in hv_dict.items():
4860
        if hv_name in cluster.enabled_hypervisors:
4861
          os_hvp[os_name][hv_name] = hv_params
4862

    
4863
    # Convert ip_family to ip_version
4864
    primary_ip_version = constants.IP4_VERSION
4865
    if cluster.primary_ip_family == netutils.IP6Address.family:
4866
      primary_ip_version = constants.IP6_VERSION
4867

    
4868
    result = {
4869
      "software_version": constants.RELEASE_VERSION,
4870
      "protocol_version": constants.PROTOCOL_VERSION,
4871
      "config_version": constants.CONFIG_VERSION,
4872
      "os_api_version": max(constants.OS_API_VERSIONS),
4873
      "export_version": constants.EXPORT_VERSION,
4874
      "architecture": (platform.architecture()[0], platform.machine()),
4875
      "name": cluster.cluster_name,
4876
      "master": cluster.master_node,
4877
      "default_hypervisor": cluster.enabled_hypervisors[0],
4878
      "enabled_hypervisors": cluster.enabled_hypervisors,
4879
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4880
                        for hypervisor_name in cluster.enabled_hypervisors]),
4881
      "os_hvp": os_hvp,
4882
      "beparams": cluster.beparams,
4883
      "osparams": cluster.osparams,
4884
      "nicparams": cluster.nicparams,
4885
      "ndparams": cluster.ndparams,
4886
      "candidate_pool_size": cluster.candidate_pool_size,
4887
      "master_netdev": cluster.master_netdev,
4888
      "volume_group_name": cluster.volume_group_name,
4889
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4890
      "file_storage_dir": cluster.file_storage_dir,
4891
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
4892
      "maintain_node_health": cluster.maintain_node_health,
4893
      "ctime": cluster.ctime,
4894
      "mtime": cluster.mtime,
4895
      "uuid": cluster.uuid,
4896
      "tags": list(cluster.GetTags()),
4897
      "uid_pool": cluster.uid_pool,
4898
      "default_iallocator": cluster.default_iallocator,
4899
      "reserved_lvs": cluster.reserved_lvs,
4900
      "primary_ip_version": primary_ip_version,
4901
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4902
      "hidden_os": cluster.hidden_os,
4903
      "blacklisted_os": cluster.blacklisted_os,
4904
      }
4905

    
4906
    return result
4907

    
4908

    
4909
class LUClusterConfigQuery(NoHooksLU):
4910
  """Return configuration values.
4911

4912
  """
4913
  REQ_BGL = False
4914
  _FIELDS_DYNAMIC = utils.FieldSet()
4915
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4916
                                  "watcher_pause", "volume_group_name")
4917

    
4918
  def CheckArguments(self):
4919
    _CheckOutputFields(static=self._FIELDS_STATIC,
4920
                       dynamic=self._FIELDS_DYNAMIC,
4921
                       selected=self.op.output_fields)
4922

    
4923
  def ExpandNames(self):
4924
    self.needed_locks = {}
4925

    
4926
  def Exec(self, feedback_fn):
4927
    """Dump a representation of the cluster config to the standard output.
4928

4929
    """
4930
    values = []
4931
    for field in self.op.output_fields:
4932
      if field == "cluster_name":
4933
        entry = self.cfg.GetClusterName()
4934
      elif field == "master_node":
4935
        entry = self.cfg.GetMasterNode()
4936
      elif field == "drain_flag":
4937
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4938
      elif field == "watcher_pause":
4939
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4940
      elif field == "volume_group_name":
4941
        entry = self.cfg.GetVGName()
4942
      else:
4943
        raise errors.ParameterError(field)
4944
      values.append(entry)
4945
    return values
4946

    
4947

    
4948
class LUInstanceActivateDisks(NoHooksLU):
4949
  """Bring up an instance's disks.
4950

4951
  """
4952
  REQ_BGL = False
4953

    
4954
  def ExpandNames(self):
4955
    self._ExpandAndLockInstance()
4956
    self.needed_locks[locking.LEVEL_NODE] = []
4957
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4958

    
4959
  def DeclareLocks(self, level):
4960
    if level == locking.LEVEL_NODE:
4961
      self._LockInstancesNodes()
4962

    
4963
  def CheckPrereq(self):
4964
    """Check prerequisites.
4965

4966
    This checks that the instance is in the cluster.
4967

4968
    """
4969
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4970
    assert self.instance is not None, \
4971
      "Cannot retrieve locked instance %s" % self.op.instance_name
4972
    _CheckNodeOnline(self, self.instance.primary_node)
4973

    
4974
  def Exec(self, feedback_fn):
4975
    """Activate the disks.
4976

4977
    """
4978
    disks_ok, disks_info = \
4979
              _AssembleInstanceDisks(self, self.instance,
4980
                                     ignore_size=self.op.ignore_size)
4981
    if not disks_ok:
4982
      raise errors.OpExecError("Cannot activate block devices")
4983

    
4984
    return disks_info
4985

    
4986

    
4987
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4988
                           ignore_size=False):
4989
  """Prepare the block devices for an instance.
4990

4991
  This sets up the block devices on all nodes.
4992

4993
  @type lu: L{LogicalUnit}
4994
  @param lu: the logical unit on whose behalf we execute
4995
  @type instance: L{objects.Instance}
4996
  @param instance: the instance for whose disks we assemble
4997
  @type disks: list of L{objects.Disk} or None
4998
  @param disks: which disks to assemble (or all, if None)
4999
  @type ignore_secondaries: boolean
5000
  @param ignore_secondaries: if true, errors on secondary nodes
5001
      won't result in an error return from the function
5002
  @type ignore_size: boolean
5003
  @param ignore_size: if true, the current known size of the disk
5004
      will not be used during the disk activation, useful for cases
5005
      when the size is wrong
5006
  @return: False if the operation failed, otherwise a list of
5007
      (host, instance_visible_name, node_visible_name)
5008
      with the mapping from node devices to instance devices
5009

5010
  """
5011
  device_info = []
5012
  disks_ok = True
5013
  iname = instance.name
5014
  disks = _ExpandCheckDisks(instance, disks)
5015

    
5016
  # With the two passes mechanism we try to reduce the window of
5017
  # opportunity for the race condition of switching DRBD to primary
5018
  # before handshaking occured, but we do not eliminate it
5019

    
5020
  # The proper fix would be to wait (with some limits) until the
5021
  # connection has been made and drbd transitions from WFConnection
5022
  # into any other network-connected state (Connected, SyncTarget,
5023
  # SyncSource, etc.)
5024

    
5025
  # 1st pass, assemble on all nodes in secondary mode
5026
  for idx, inst_disk in enumerate(disks):
5027
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5028
      if ignore_size:
5029
        node_disk = node_disk.Copy()
5030
        node_disk.UnsetSize()
5031
      lu.cfg.SetDiskID(node_disk, node)
5032
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5033
      msg = result.fail_msg
5034
      if msg:
5035
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5036
                           " (is_primary=False, pass=1): %s",
5037
                           inst_disk.iv_name, node, msg)
5038
        if not ignore_secondaries:
5039
          disks_ok = False
5040

    
5041
  # FIXME: race condition on drbd migration to primary
5042

    
5043
  # 2nd pass, do only the primary node
5044
  for idx, inst_disk in enumerate(disks):
5045
    dev_path = None
5046

    
5047
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5048
      if node != instance.primary_node:
5049
        continue
5050
      if ignore_size:
5051
        node_disk = node_disk.Copy()
5052
        node_disk.UnsetSize()
5053
      lu.cfg.SetDiskID(node_disk, node)
5054
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5055
      msg = result.fail_msg
5056
      if msg:
5057
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5058
                           " (is_primary=True, pass=2): %s",
5059
                           inst_disk.iv_name, node, msg)
5060
        disks_ok = False
5061
      else:
5062
        dev_path = result.payload
5063

    
5064
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5065

    
5066
  # leave the disks configured for the primary node
5067
  # this is a workaround that would be fixed better by
5068
  # improving the logical/physical id handling
5069
  for disk in disks:
5070
    lu.cfg.SetDiskID(disk, instance.primary_node)
5071

    
5072
  return disks_ok, device_info
5073

    
5074

    
5075
def _StartInstanceDisks(lu, instance, force):
5076
  """Start the disks of an instance.
5077

5078
  """
5079
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5080
                                           ignore_secondaries=force)
5081
  if not disks_ok:
5082
    _ShutdownInstanceDisks(lu, instance)
5083
    if force is not None and not force:
5084
      lu.proc.LogWarning("", hint="If the message above refers to a"
5085
                         " secondary node,"
5086
                         " you can retry the operation using '--force'.")
5087
    raise errors.OpExecError("Disk consistency error")
5088

    
5089

    
5090
class LUInstanceDeactivateDisks(NoHooksLU):
5091
  """Shutdown an instance's disks.
5092

5093
  """
5094
  REQ_BGL = False
5095

    
5096
  def ExpandNames(self):
5097
    self._ExpandAndLockInstance()
5098
    self.needed_locks[locking.LEVEL_NODE] = []
5099
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5100

    
5101
  def DeclareLocks(self, level):
5102
    if level == locking.LEVEL_NODE:
5103
      self._LockInstancesNodes()
5104

    
5105
  def CheckPrereq(self):
5106
    """Check prerequisites.
5107

5108
    This checks that the instance is in the cluster.
5109

5110
    """
5111
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5112
    assert self.instance is not None, \
5113
      "Cannot retrieve locked instance %s" % self.op.instance_name
5114

    
5115
  def Exec(self, feedback_fn):
5116
    """Deactivate the disks
5117

5118
    """
5119
    instance = self.instance
5120
    if self.op.force:
5121
      _ShutdownInstanceDisks(self, instance)
5122
    else:
5123
      _SafeShutdownInstanceDisks(self, instance)
5124

    
5125

    
5126
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5127
  """Shutdown block devices of an instance.
5128

5129
  This function checks if an instance is running, before calling
5130
  _ShutdownInstanceDisks.
5131

5132
  """
5133
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5134
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5135

    
5136

    
5137
def _ExpandCheckDisks(instance, disks):
5138
  """Return the instance disks selected by the disks list
5139

5140
  @type disks: list of L{objects.Disk} or None
5141
  @param disks: selected disks
5142
  @rtype: list of L{objects.Disk}
5143
  @return: selected instance disks to act on
5144

5145
  """
5146
  if disks is None:
5147
    return instance.disks
5148
  else:
5149
    if not set(disks).issubset(instance.disks):
5150
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5151
                                   " target instance")
5152
    return disks
5153

    
5154

    
5155
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5156
  """Shutdown block devices of an instance.
5157

5158
  This does the shutdown on all nodes of the instance.
5159

5160
  If the ignore_primary is false, errors on the primary node are
5161
  ignored.
5162

5163
  """
5164
  all_result = True
5165
  disks = _ExpandCheckDisks(instance, disks)
5166

    
5167
  for disk in disks:
5168
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5169
      lu.cfg.SetDiskID(top_disk, node)
5170
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5171
      msg = result.fail_msg
5172
      if msg:
5173
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5174
                      disk.iv_name, node, msg)
5175
        if ((node == instance.primary_node and not ignore_primary) or
5176
            (node != instance.primary_node and not result.offline)):
5177
          all_result = False
5178
  return all_result
5179

    
5180

    
5181
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5182
  """Checks if a node has enough free memory.
5183

5184
  This function check if a given node has the needed amount of free
5185
  memory. In case the node has less memory or we cannot get the
5186
  information from the node, this function raise an OpPrereqError
5187
  exception.
5188

5189
  @type lu: C{LogicalUnit}
5190
  @param lu: a logical unit from which we get configuration data
5191
  @type node: C{str}
5192
  @param node: the node to check
5193
  @type reason: C{str}
5194
  @param reason: string to use in the error message
5195
  @type requested: C{int}
5196
  @param requested: the amount of memory in MiB to check for
5197
  @type hypervisor_name: C{str}
5198
  @param hypervisor_name: the hypervisor to ask for memory stats
5199
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5200
      we cannot check the node
5201

5202
  """
5203
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5204
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5205
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5206
  free_mem = nodeinfo[node].payload.get('memory_free', None)
5207
  if not isinstance(free_mem, int):
5208
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5209
                               " was '%s'" % (node, free_mem),
5210
                               errors.ECODE_ENVIRON)
5211
  if requested > free_mem:
5212
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5213
                               " needed %s MiB, available %s MiB" %
5214
                               (node, reason, requested, free_mem),
5215
                               errors.ECODE_NORES)
5216

    
5217

    
5218
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5219
  """Checks if nodes have enough free disk space in the all VGs.
5220

5221
  This function check if all given nodes have the needed amount of
5222
  free disk. In case any node has less disk or we cannot get the
5223
  information from the node, this function raise an OpPrereqError
5224
  exception.
5225

5226
  @type lu: C{LogicalUnit}
5227
  @param lu: a logical unit from which we get configuration data
5228
  @type nodenames: C{list}
5229
  @param nodenames: the list of node names to check
5230
  @type req_sizes: C{dict}
5231
  @param req_sizes: the hash of vg and corresponding amount of disk in
5232
      MiB to check for
5233
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5234
      or we cannot check the node
5235

5236
  """
5237
  for vg, req_size in req_sizes.items():
5238
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5239

    
5240

    
5241
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5242
  """Checks if nodes have enough free disk space in the specified VG.
5243

5244
  This function check if all given nodes have the needed amount of
5245
  free disk. In case any node has less disk or we cannot get the
5246
  information from the node, this function raise an OpPrereqError
5247
  exception.
5248

5249
  @type lu: C{LogicalUnit}
5250
  @param lu: a logical unit from which we get configuration data
5251
  @type nodenames: C{list}
5252
  @param nodenames: the list of node names to check
5253
  @type vg: C{str}
5254
  @param vg: the volume group to check
5255
  @type requested: C{int}
5256
  @param requested: the amount of disk in MiB to check for
5257
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5258
      or we cannot check the node
5259

5260
  """
5261
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5262
  for node in nodenames:
5263
    info = nodeinfo[node]
5264
    info.Raise("Cannot get current information from node %s" % node,
5265
               prereq=True, ecode=errors.ECODE_ENVIRON)
5266
    vg_free = info.payload.get("vg_free", None)
5267
    if not isinstance(vg_free, int):
5268
      raise errors.OpPrereqError("Can't compute free disk space on node"
5269
                                 " %s for vg %s, result was '%s'" %
5270
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5271
    if requested > vg_free:
5272
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5273
                                 " vg %s: required %d MiB, available %d MiB" %
5274
                                 (node, vg, requested, vg_free),
5275
                                 errors.ECODE_NORES)
5276

    
5277

    
5278
class LUInstanceStartup(LogicalUnit):
5279
  """Starts an instance.
5280

5281
  """
5282
  HPATH = "instance-start"
5283
  HTYPE = constants.HTYPE_INSTANCE
5284
  REQ_BGL = False
5285

    
5286
  def CheckArguments(self):
5287
    # extra beparams
5288
    if self.op.beparams:
5289
      # fill the beparams dict
5290
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5291

    
5292
  def ExpandNames(self):
5293
    self._ExpandAndLockInstance()
5294

    
5295
  def BuildHooksEnv(self):
5296
    """Build hooks env.
5297

5298
    This runs on master, primary and secondary nodes of the instance.
5299

5300
    """
5301
    env = {
5302
      "FORCE": self.op.force,
5303
      }
5304

    
5305
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5306

    
5307
    return env
5308

    
5309
  def BuildHooksNodes(self):
5310
    """Build hooks nodes.
5311

5312
    """
5313
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5314
    return (nl, nl)
5315

    
5316
  def CheckPrereq(self):
5317
    """Check prerequisites.
5318

5319
    This checks that the instance is in the cluster.
5320

5321
    """
5322
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5323
    assert self.instance is not None, \
5324
      "Cannot retrieve locked instance %s" % self.op.instance_name
5325

    
5326
    # extra hvparams
5327
    if self.op.hvparams:
5328
      # check hypervisor parameter syntax (locally)
5329
      cluster = self.cfg.GetClusterInfo()
5330
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5331
      filled_hvp = cluster.FillHV(instance)
5332
      filled_hvp.update(self.op.hvparams)
5333
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5334
      hv_type.CheckParameterSyntax(filled_hvp)
5335
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5336

    
5337
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5338

    
5339
    if self.primary_offline and self.op.ignore_offline_nodes:
5340
      self.proc.LogWarning("Ignoring offline primary node")
5341

    
5342
      if self.op.hvparams or self.op.beparams:
5343
        self.proc.LogWarning("Overridden parameters are ignored")
5344
    else:
5345
      _CheckNodeOnline(self, instance.primary_node)
5346

    
5347
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5348

    
5349
      # check bridges existence
5350
      _CheckInstanceBridgesExist(self, instance)
5351

    
5352
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5353
                                                instance.name,
5354
                                                instance.hypervisor)
5355
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5356
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5357
      if not remote_info.payload: # not running already
5358
        _CheckNodeFreeMemory(self, instance.primary_node,
5359
                             "starting instance %s" % instance.name,
5360
                             bep[constants.BE_MEMORY], instance.hypervisor)
5361

    
5362
  def Exec(self, feedback_fn):
5363
    """Start the instance.
5364

5365
    """
5366
    instance = self.instance
5367
    force = self.op.force
5368

    
5369
    self.cfg.MarkInstanceUp(instance.name)
5370

    
5371
    if self.primary_offline:
5372
      assert self.op.ignore_offline_nodes
5373
      self.proc.LogInfo("Primary node offline, marked instance as started")
5374
    else:
5375
      node_current = instance.primary_node
5376

    
5377
      _StartInstanceDisks(self, instance, force)
5378

    
5379
      result = self.rpc.call_instance_start(node_current, instance,
5380
                                            self.op.hvparams, self.op.beparams)
5381
      msg = result.fail_msg
5382
      if msg:
5383
        _ShutdownInstanceDisks(self, instance)
5384
        raise errors.OpExecError("Could not start instance: %s" % msg)
5385

    
5386

    
5387
class LUInstanceReboot(LogicalUnit):
5388
  """Reboot an instance.
5389

5390
  """
5391
  HPATH = "instance-reboot"
5392
  HTYPE = constants.HTYPE_INSTANCE
5393
  REQ_BGL = False
5394

    
5395
  def ExpandNames(self):
5396
    self._ExpandAndLockInstance()
5397

    
5398
  def BuildHooksEnv(self):
5399
    """Build hooks env.
5400

5401
    This runs on master, primary and secondary nodes of the instance.
5402

5403
    """
5404
    env = {
5405
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5406
      "REBOOT_TYPE": self.op.reboot_type,
5407
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5408
      }
5409

    
5410
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5411

    
5412
    return env
5413

    
5414
  def BuildHooksNodes(self):
5415
    """Build hooks nodes.
5416

5417
    """
5418
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5419
    return (nl, nl)
5420

    
5421
  def CheckPrereq(self):
5422
    """Check prerequisites.
5423

5424
    This checks that the instance is in the cluster.
5425

5426
    """
5427
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5428
    assert self.instance is not None, \
5429
      "Cannot retrieve locked instance %s" % self.op.instance_name
5430

    
5431
    _CheckNodeOnline(self, instance.primary_node)
5432

    
5433
    # check bridges existence
5434
    _CheckInstanceBridgesExist(self, instance)
5435

    
5436
  def Exec(self, feedback_fn):
5437
    """Reboot the instance.
5438

5439
    """
5440
    instance = self.instance
5441
    ignore_secondaries = self.op.ignore_secondaries
5442
    reboot_type = self.op.reboot_type
5443

    
5444
    remote_info = self.rpc.call_instance_info(instance.primary_node,
5445
                                              instance.name,
5446
                                              instance.hypervisor)
5447
    remote_info.Raise("Error checking node %s" % instance.primary_node)
5448
    instance_running = bool(remote_info.payload)
5449

    
5450
    node_current = instance.primary_node
5451

    
5452
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5453
                                            constants.INSTANCE_REBOOT_HARD]:
5454
      for disk in instance.disks:
5455
        self.cfg.SetDiskID(disk, node_current)
5456
      result = self.rpc.call_instance_reboot(node_current, instance,
5457
                                             reboot_type,
5458
                                             self.op.shutdown_timeout)
5459
      result.Raise("Could not reboot instance")
5460
    else:
5461
      if instance_running:
5462
        result = self.rpc.call_instance_shutdown(node_current, instance,
5463
                                                 self.op.shutdown_timeout)
5464
        result.Raise("Could not shutdown instance for full reboot")
5465
        _ShutdownInstanceDisks(self, instance)
5466
      else:
5467
        self.LogInfo("Instance %s was already stopped, starting now",
5468
                     instance.name)
5469
      _StartInstanceDisks(self, instance, ignore_secondaries)
5470
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5471
      msg = result.fail_msg
5472
      if msg:
5473
        _ShutdownInstanceDisks(self, instance)
5474
        raise errors.OpExecError("Could not start instance for"
5475
                                 " full reboot: %s" % msg)
5476

    
5477
    self.cfg.MarkInstanceUp(instance.name)
5478

    
5479

    
5480
class LUInstanceShutdown(LogicalUnit):
5481
  """Shutdown an instance.
5482

5483
  """
5484
  HPATH = "instance-stop"
5485
  HTYPE = constants.HTYPE_INSTANCE
5486
  REQ_BGL = False
5487

    
5488
  def ExpandNames(self):
5489
    self._ExpandAndLockInstance()
5490

    
5491
  def BuildHooksEnv(self):
5492
    """Build hooks env.
5493

5494
    This runs on master, primary and secondary nodes of the instance.
5495

5496
    """
5497
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5498
    env["TIMEOUT"] = self.op.timeout
5499
    return env
5500

    
5501
  def BuildHooksNodes(self):
5502
    """Build hooks nodes.
5503

5504
    """
5505
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5506
    return (nl, nl)
5507

    
5508
  def CheckPrereq(self):
5509
    """Check prerequisites.
5510

5511
    This checks that the instance is in the cluster.
5512

5513
    """
5514
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5515
    assert self.instance is not None, \
5516
      "Cannot retrieve locked instance %s" % self.op.instance_name
5517

    
5518
    self.primary_offline = \
5519
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5520

    
5521
    if self.primary_offline and self.op.ignore_offline_nodes:
5522
      self.proc.LogWarning("Ignoring offline primary node")
5523
    else:
5524
      _CheckNodeOnline(self, self.instance.primary_node)
5525

    
5526
  def Exec(self, feedback_fn):
5527
    """Shutdown the instance.
5528

5529
    """
5530
    instance = self.instance
5531
    node_current = instance.primary_node
5532
    timeout = self.op.timeout
5533

    
5534
    self.cfg.MarkInstanceDown(instance.name)
5535

    
5536
    if self.primary_offline:
5537
      assert self.op.ignore_offline_nodes
5538
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5539
    else:
5540
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5541
      msg = result.fail_msg
5542
      if msg:
5543
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5544

    
5545
      _ShutdownInstanceDisks(self, instance)
5546

    
5547

    
5548
class LUInstanceReinstall(LogicalUnit):
5549
  """Reinstall an instance.
5550

5551
  """
5552
  HPATH = "instance-reinstall"
5553
  HTYPE = constants.HTYPE_INSTANCE
5554
  REQ_BGL = False
5555

    
5556
  def ExpandNames(self):
5557
    self._ExpandAndLockInstance()
5558

    
5559
  def BuildHooksEnv(self):
5560
    """Build hooks env.
5561

5562
    This runs on master, primary and secondary nodes of the instance.
5563

5564
    """
5565
    return _BuildInstanceHookEnvByObject(self, self.instance)
5566

    
5567
  def BuildHooksNodes(self):
5568
    """Build hooks nodes.
5569

5570
    """
5571
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5572
    return (nl, nl)
5573

    
5574
  def CheckPrereq(self):
5575
    """Check prerequisites.
5576

5577
    This checks that the instance is in the cluster and is not running.
5578

5579
    """
5580
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5581
    assert instance is not None, \
5582
      "Cannot retrieve locked instance %s" % self.op.instance_name
5583
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5584
                     " offline, cannot reinstall")
5585
    for node in instance.secondary_nodes:
5586
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5587
                       " cannot reinstall")
5588

    
5589
    if instance.disk_template == constants.DT_DISKLESS:
5590
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5591
                                 self.op.instance_name,
5592
                                 errors.ECODE_INVAL)
5593
    _CheckInstanceDown(self, instance, "cannot reinstall")
5594

    
5595
    if self.op.os_type is not None:
5596
      # OS verification
5597
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5598
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5599
      instance_os = self.op.os_type
5600
    else:
5601
      instance_os = instance.os
5602

    
5603
    nodelist = list(instance.all_nodes)
5604

    
5605
    if self.op.osparams:
5606
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5607
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5608
      self.os_inst = i_osdict # the new dict (without defaults)
5609
    else:
5610
      self.os_inst = None
5611

    
5612
    self.instance = instance
5613

    
5614
  def Exec(self, feedback_fn):
5615
    """Reinstall the instance.
5616

5617
    """
5618
    inst = self.instance
5619

    
5620
    if self.op.os_type is not None:
5621
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5622
      inst.os = self.op.os_type
5623
      # Write to configuration
5624
      self.cfg.Update(inst, feedback_fn)
5625

    
5626
    _StartInstanceDisks(self, inst, None)
5627
    try:
5628
      feedback_fn("Running the instance OS create scripts...")
5629
      # FIXME: pass debug option from opcode to backend
5630
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5631
                                             self.op.debug_level,
5632
                                             osparams=self.os_inst)
5633
      result.Raise("Could not install OS for instance %s on node %s" %
5634
                   (inst.name, inst.primary_node))
5635
    finally:
5636
      _ShutdownInstanceDisks(self, inst)
5637

    
5638

    
5639
class LUInstanceRecreateDisks(LogicalUnit):
5640
  """Recreate an instance's missing disks.
5641

5642
  """
5643
  HPATH = "instance-recreate-disks"
5644
  HTYPE = constants.HTYPE_INSTANCE
5645
  REQ_BGL = False
5646

    
5647
  def ExpandNames(self):
5648
    self._ExpandAndLockInstance()
5649

    
5650
  def BuildHooksEnv(self):
5651
    """Build hooks env.
5652

5653
    This runs on master, primary and secondary nodes of the instance.
5654

5655
    """
5656
    return _BuildInstanceHookEnvByObject(self, self.instance)
5657

    
5658
  def BuildHooksNodes(self):
5659
    """Build hooks nodes.
5660

5661
    """
5662
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5663
    return (nl, nl)
5664

    
5665
  def CheckPrereq(self):
5666
    """Check prerequisites.
5667

5668
    This checks that the instance is in the cluster and is not running.
5669

5670
    """
5671
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5672
    assert instance is not None, \
5673
      "Cannot retrieve locked instance %s" % self.op.instance_name
5674
    _CheckNodeOnline(self, instance.primary_node)
5675

    
5676
    if instance.disk_template == constants.DT_DISKLESS:
5677
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5678
                                 self.op.instance_name, errors.ECODE_INVAL)
5679
    _CheckInstanceDown(self, instance, "cannot recreate disks")
5680

    
5681
    if not self.op.disks:
5682
      self.op.disks = range(len(instance.disks))
5683
    else:
5684
      for idx in self.op.disks:
5685
        if idx >= len(instance.disks):
5686
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5687
                                     errors.ECODE_INVAL)
5688

    
5689
    self.instance = instance
5690

    
5691
  def Exec(self, feedback_fn):
5692
    """Recreate the disks.
5693

5694
    """
5695
    to_skip = []
5696
    for idx, _ in enumerate(self.instance.disks):
5697
      if idx not in self.op.disks: # disk idx has not been passed in
5698
        to_skip.append(idx)
5699
        continue
5700

    
5701
    _CreateDisks(self, self.instance, to_skip=to_skip)
5702

    
5703

    
5704
class LUInstanceRename(LogicalUnit):
5705
  """Rename an instance.
5706

5707
  """
5708
  HPATH = "instance-rename"
5709
  HTYPE = constants.HTYPE_INSTANCE
5710

    
5711
  def CheckArguments(self):
5712
    """Check arguments.
5713

5714
    """
5715
    if self.op.ip_check and not self.op.name_check:
5716
      # TODO: make the ip check more flexible and not depend on the name check
5717
      raise errors.OpPrereqError("Cannot do ip check without a name check",
5718
                                 errors.ECODE_INVAL)
5719

    
5720
  def BuildHooksEnv(self):
5721
    """Build hooks env.
5722

5723
    This runs on master, primary and secondary nodes of the instance.
5724

5725
    """
5726
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5727
    env["INSTANCE_NEW_NAME"] = self.op.new_name
5728
    return env
5729

    
5730
  def BuildHooksNodes(self):
5731
    """Build hooks nodes.
5732

5733
    """
5734
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5735
    return (nl, nl)
5736

    
5737
  def CheckPrereq(self):
5738
    """Check prerequisites.
5739

5740
    This checks that the instance is in the cluster and is not running.
5741

5742
    """
5743
    self.op.instance_name = _ExpandInstanceName(self.cfg,
5744
                                                self.op.instance_name)
5745
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5746
    assert instance is not None
5747
    _CheckNodeOnline(self, instance.primary_node)
5748
    _CheckInstanceDown(self, instance, "cannot rename")
5749
    self.instance = instance
5750

    
5751
    new_name = self.op.new_name
5752
    if self.op.name_check:
5753
      hostname = netutils.GetHostname(name=new_name)
5754
      self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5755
                   hostname.name)
5756
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5757
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5758
                                    " same as given hostname '%s'") %
5759
                                    (hostname.name, self.op.new_name),
5760
                                    errors.ECODE_INVAL)
5761
      new_name = self.op.new_name = hostname.name
5762
      if (self.op.ip_check and
5763
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5764
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5765
                                   (hostname.ip, new_name),
5766
                                   errors.ECODE_NOTUNIQUE)
5767

    
5768
    instance_list = self.cfg.GetInstanceList()
5769
    if new_name in instance_list and new_name != instance.name:
5770
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5771
                                 new_name, errors.ECODE_EXISTS)
5772

    
5773
  def Exec(self, feedback_fn):
5774
    """Rename the instance.
5775

5776
    """
5777
    inst = self.instance
5778
    old_name = inst.name
5779

    
5780
    rename_file_storage = False
5781
    if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5782
        self.op.new_name != inst.name):
5783
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5784
      rename_file_storage = True
5785

    
5786
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5787
    # Change the instance lock. This is definitely safe while we hold the BGL
5788
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5789
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5790

    
5791
    # re-read the instance from the configuration after rename
5792
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5793

    
5794
    if rename_file_storage:
5795
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5796
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5797
                                                     old_file_storage_dir,
5798
                                                     new_file_storage_dir)
5799
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5800
                   " (but the instance has been renamed in Ganeti)" %
5801
                   (inst.primary_node, old_file_storage_dir,
5802
                    new_file_storage_dir))
5803

    
5804
    _StartInstanceDisks(self, inst, None)
5805
    try:
5806
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5807
                                                 old_name, self.op.debug_level)
5808
      msg = result.fail_msg
5809
      if msg:
5810
        msg = ("Could not run OS rename script for instance %s on node %s"
5811
               " (but the instance has been renamed in Ganeti): %s" %
5812
               (inst.name, inst.primary_node, msg))
5813
        self.proc.LogWarning(msg)
5814
    finally:
5815
      _ShutdownInstanceDisks(self, inst)
5816

    
5817
    return inst.name
5818

    
5819

    
5820
class LUInstanceRemove(LogicalUnit):
5821
  """Remove an instance.
5822

5823
  """
5824
  HPATH = "instance-remove"
5825
  HTYPE = constants.HTYPE_INSTANCE
5826
  REQ_BGL = False
5827

    
5828
  def ExpandNames(self):
5829
    self._ExpandAndLockInstance()
5830
    self.needed_locks[locking.LEVEL_NODE] = []
5831
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5832

    
5833
  def DeclareLocks(self, level):
5834
    if level == locking.LEVEL_NODE:
5835
      self._LockInstancesNodes()
5836

    
5837
  def BuildHooksEnv(self):
5838
    """Build hooks env.
5839

5840
    This runs on master, primary and secondary nodes of the instance.
5841

5842
    """
5843
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5844
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5845
    return env
5846

    
5847
  def BuildHooksNodes(self):
5848
    """Build hooks nodes.
5849

5850
    """
5851
    nl = [self.cfg.GetMasterNode()]
5852
    nl_post = list(self.instance.all_nodes) + nl
5853
    return (nl, nl_post)
5854

    
5855
  def CheckPrereq(self):
5856
    """Check prerequisites.
5857

5858
    This checks that the instance is in the cluster.
5859

5860
    """
5861
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5862
    assert self.instance is not None, \
5863
      "Cannot retrieve locked instance %s" % self.op.instance_name
5864

    
5865
  def Exec(self, feedback_fn):
5866
    """Remove the instance.
5867

5868
    """
5869
    instance = self.instance
5870
    logging.info("Shutting down instance %s on node %s",
5871
                 instance.name, instance.primary_node)
5872

    
5873
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5874
                                             self.op.shutdown_timeout)
5875
    msg = result.fail_msg
5876
    if msg:
5877
      if self.op.ignore_failures:
5878
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5879
      else:
5880
        raise errors.OpExecError("Could not shutdown instance %s on"
5881
                                 " node %s: %s" %
5882
                                 (instance.name, instance.primary_node, msg))
5883

    
5884
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5885

    
5886

    
5887
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5888
  """Utility function to remove an instance.
5889

5890
  """
5891
  logging.info("Removing block devices for instance %s", instance.name)
5892

    
5893
  if not _RemoveDisks(lu, instance):
5894
    if not ignore_failures:
5895
      raise errors.OpExecError("Can't remove instance's disks")
5896
    feedback_fn("Warning: can't remove instance's disks")
5897

    
5898
  logging.info("Removing instance %s out of cluster config", instance.name)
5899

    
5900
  lu.cfg.RemoveInstance(instance.name)
5901

    
5902
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5903
    "Instance lock removal conflict"
5904

    
5905
  # Remove lock for the instance
5906
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5907

    
5908

    
5909
class LUInstanceQuery(NoHooksLU):
5910
  """Logical unit for querying instances.
5911

5912
  """
5913
  # pylint: disable-msg=W0142
5914
  REQ_BGL = False
5915

    
5916
  def CheckArguments(self):
5917
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5918
                             self.op.output_fields, self.op.use_locking)
5919

    
5920
  def ExpandNames(self):
5921
    self.iq.ExpandNames(self)
5922

    
5923
  def DeclareLocks(self, level):
5924
    self.iq.DeclareLocks(self, level)
5925

    
5926
  def Exec(self, feedback_fn):
5927
    return self.iq.OldStyleQuery(self)
5928

    
5929

    
5930
class LUInstanceFailover(LogicalUnit):
5931
  """Failover an instance.
5932

5933
  """
5934
  HPATH = "instance-failover"
5935
  HTYPE = constants.HTYPE_INSTANCE
5936
  REQ_BGL = False
5937

    
5938
  def CheckArguments(self):
5939
    """Check the arguments.
5940

5941
    """
5942
    self.iallocator = getattr(self.op, "iallocator", None)
5943
    self.target_node = getattr(self.op, "target_node", None)
5944

    
5945
  def ExpandNames(self):
5946
    self._ExpandAndLockInstance()
5947

    
5948
    if self.op.target_node is not None:
5949
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5950

    
5951
    self.needed_locks[locking.LEVEL_NODE] = []
5952
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5953

    
5954
  def DeclareLocks(self, level):
5955
    if level == locking.LEVEL_NODE:
5956
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
5957
      if instance.disk_template in constants.DTS_EXT_MIRROR:
5958
        if self.op.target_node is None:
5959
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5960
        else:
5961
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
5962
                                                   self.op.target_node]
5963
        del self.recalculate_locks[locking.LEVEL_NODE]
5964
      else:
5965
        self._LockInstancesNodes()
5966

    
5967
  def BuildHooksEnv(self):
5968
    """Build hooks env.
5969

5970
    This runs on master, primary and secondary nodes of the instance.
5971

5972
    """
5973
    instance = self.instance
5974
    source_node = instance.primary_node
5975
    env = {
5976
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5977
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5978
      "OLD_PRIMARY": source_node,
5979
      "NEW_PRIMARY": self.op.target_node,
5980
      }
5981

    
5982
    if instance.disk_template in constants.DTS_INT_MIRROR:
5983
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
5984
      env["NEW_SECONDARY"] = source_node
5985
    else:
5986
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
5987

    
5988
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5989

    
5990
    return env
5991

    
5992
  def BuildHooksNodes(self):
5993
    """Build hooks nodes.
5994

5995
    """
5996
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
5997
    return (nl, nl + [self.instance.primary_node])
5998

    
5999
  def CheckPrereq(self):
6000
    """Check prerequisites.
6001

6002
    This checks that the instance is in the cluster.
6003

6004
    """
6005
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6006
    assert self.instance is not None, \
6007
      "Cannot retrieve locked instance %s" % self.op.instance_name
6008

    
6009
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6010
    if instance.disk_template not in constants.DTS_MIRRORED:
6011
      raise errors.OpPrereqError("Instance's disk layout is not"
6012
                                 " mirrored, cannot failover.",
6013
                                 errors.ECODE_STATE)
6014

    
6015
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6016
      _CheckIAllocatorOrNode(self, "iallocator", "target_node")
6017
      if self.op.iallocator:
6018
        self._RunAllocator()
6019
        # Release all unnecessary node locks
6020
        nodes_keep = [instance.primary_node, self.op.target_node]
6021
        nodes_rel = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6022
                     if node not in nodes_keep]
6023
        self.context.glm.release(locking.LEVEL_NODE, nodes_rel)
6024
        self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6025

    
6026
      # self.op.target_node is already populated, either directly or by the
6027
      # iallocator run
6028
      target_node = self.op.target_node
6029

    
6030
    else:
6031
      secondary_nodes = instance.secondary_nodes
6032
      if not secondary_nodes:
6033
        raise errors.ConfigurationError("No secondary node but using"
6034
                                        " %s disk template" %
6035
                                        instance.disk_template)
6036
      target_node = secondary_nodes[0]
6037

    
6038
      if self.op.iallocator or (self.op.target_node and
6039
                                self.op.target_node != target_node):
6040
        raise errors.OpPrereqError("Instances with disk template %s cannot"
6041
                                   " be failed over to arbitrary nodes"
6042
                                   " (neither an iallocator nor a target"
6043
                                   " node can be passed)" %
6044
                                   instance.disk_template, errors.ECODE_INVAL)
6045
    _CheckNodeOnline(self, target_node)
6046
    _CheckNodeNotDrained(self, target_node)
6047

    
6048
    # Save target_node so that we can use it in BuildHooksEnv
6049
    self.op.target_node = target_node
6050

    
6051
    if instance.admin_up:
6052
      # check memory requirements on the secondary node
6053
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6054
                           instance.name, bep[constants.BE_MEMORY],
6055
                           instance.hypervisor)
6056
    else:
6057
      self.LogInfo("Not checking memory on the secondary node as"
6058
                   " instance will not be started")
6059

    
6060
    # check bridge existance
6061
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6062

    
6063
  def Exec(self, feedback_fn):
6064
    """Failover an instance.
6065

6066
    The failover is done by shutting it down on its present node and
6067
    starting it on the secondary.
6068

6069
    """
6070
    instance = self.instance
6071
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
6072

    
6073
    source_node = instance.primary_node
6074
    target_node = self.op.target_node
6075

    
6076
    if instance.admin_up:
6077
      feedback_fn("* checking disk consistency between source and target")
6078
      for dev in instance.disks:
6079
        # for drbd, these are drbd over lvm
6080
        if not _CheckDiskConsistency(self, dev, target_node, False):
6081
          if not self.op.ignore_consistency:
6082
            raise errors.OpExecError("Disk %s is degraded on target node,"
6083
                                     " aborting failover." % dev.iv_name)
6084
    else:
6085
      feedback_fn("* not checking disk consistency as instance is not running")
6086

    
6087
    feedback_fn("* shutting down instance on source node")
6088
    logging.info("Shutting down instance %s on node %s",
6089
                 instance.name, source_node)
6090

    
6091
    result = self.rpc.call_instance_shutdown(source_node, instance,
6092
                                             self.op.shutdown_timeout)
6093
    msg = result.fail_msg
6094
    if msg:
6095
      if self.op.ignore_consistency or primary_node.offline:
6096
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6097
                             " Proceeding anyway. Please make sure node"
6098
                             " %s is down. Error details: %s",
6099
                             instance.name, source_node, source_node, msg)
6100
      else:
6101
        raise errors.OpExecError("Could not shutdown instance %s on"
6102
                                 " node %s: %s" %
6103
                                 (instance.name, source_node, msg))
6104

    
6105
    feedback_fn("* deactivating the instance's disks on source node")
6106
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
6107
      raise errors.OpExecError("Can't shut down the instance's disks.")
6108

    
6109
    instance.primary_node = target_node
6110
    # distribute new instance config to the other nodes
6111
    self.cfg.Update(instance, feedback_fn)
6112

    
6113
    # Only start the instance if it's marked as up
6114
    if instance.admin_up:
6115
      feedback_fn("* activating the instance's disks on target node")
6116
      logging.info("Starting instance %s on node %s",
6117
                   instance.name, target_node)
6118

    
6119
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6120
                                           ignore_secondaries=True)
6121
      if not disks_ok:
6122
        _ShutdownInstanceDisks(self, instance)
6123
        raise errors.OpExecError("Can't activate the instance's disks")
6124

    
6125
      feedback_fn("* starting the instance on the target node")
6126
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6127
      msg = result.fail_msg
6128
      if msg:
6129
        _ShutdownInstanceDisks(self, instance)
6130
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6131
                                 (instance.name, target_node, msg))
6132

    
6133
  def _RunAllocator(self):
6134
    """Run the allocator based on input opcode.
6135

6136
    """
6137
    ial = IAllocator(self.cfg, self.rpc,
6138
                     mode=constants.IALLOCATOR_MODE_RELOC,
6139
                     name=self.instance.name,
6140
                     # TODO See why hail breaks with a single node below
6141
                     relocate_from=[self.instance.primary_node,
6142
                                    self.instance.primary_node],
6143
                     )
6144

    
6145
    ial.Run(self.op.iallocator)
6146

    
6147
    if not ial.success:
6148
      raise errors.OpPrereqError("Can't compute nodes using"
6149
                                 " iallocator '%s': %s" %
6150
                                 (self.op.iallocator, ial.info),
6151
                                 errors.ECODE_NORES)
6152
    if len(ial.result) != ial.required_nodes:
6153
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6154
                                 " of nodes (%s), required %s" %
6155
                                 (self.op.iallocator, len(ial.result),
6156
                                  ial.required_nodes), errors.ECODE_FAULT)
6157
    self.op.target_node = ial.result[0]
6158
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6159
                 self.instance.name, self.op.iallocator,
6160
                 utils.CommaJoin(ial.result))
6161

    
6162

    
6163
class LUInstanceMigrate(LogicalUnit):
6164
  """Migrate an instance.
6165

6166
  This is migration without shutting down, compared to the failover,
6167
  which is done with shutdown.
6168

6169
  """
6170
  HPATH = "instance-migrate"
6171
  HTYPE = constants.HTYPE_INSTANCE
6172
  REQ_BGL = False
6173

    
6174
  def ExpandNames(self):
6175
    self._ExpandAndLockInstance()
6176

    
6177
    if self.op.target_node is not None:
6178
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6179

    
6180
    self.needed_locks[locking.LEVEL_NODE] = []
6181
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6182

    
6183
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6184
                                       self.op.cleanup, self.op.iallocator,
6185
                                       self.op.target_node)
6186
    self.tasklets = [self._migrater]
6187

    
6188
  def DeclareLocks(self, level):
6189
    if level == locking.LEVEL_NODE:
6190
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6191
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6192
        if self.op.target_node is None:
6193
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6194
        else:
6195
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6196
                                                   self.op.target_node]
6197
        del self.recalculate_locks[locking.LEVEL_NODE]
6198
      else:
6199
        self._LockInstancesNodes()
6200

    
6201
  def BuildHooksEnv(self):
6202
    """Build hooks env.
6203

6204
    This runs on master, primary and secondary nodes of the instance.
6205

6206
    """
6207
    instance = self._migrater.instance
6208
    source_node = instance.primary_node
6209
    target_node = self._migrater.target_node
6210
    env = _BuildInstanceHookEnvByObject(self, instance)
6211
    env.update({
6212
      "MIGRATE_LIVE": self._migrater.live,
6213
      "MIGRATE_CLEANUP": self.op.cleanup,
6214
      "OLD_PRIMARY": source_node,
6215
      "NEW_PRIMARY": target_node,
6216
      })
6217

    
6218
    if instance.disk_template in constants.DTS_INT_MIRROR:
6219
      env["OLD_SECONDARY"] = target_node
6220
      env["NEW_SECONDARY"] = source_node
6221
    else:
6222
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6223

    
6224
    return env
6225

    
6226
  def BuildHooksNodes(self):
6227
    """Build hooks nodes.
6228

6229
    """
6230
    instance = self._migrater.instance
6231
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6232
    return (nl, nl + [instance.primary_node])
6233

    
6234

    
6235
class LUInstanceMove(LogicalUnit):
6236
  """Move an instance by data-copying.
6237

6238
  """
6239
  HPATH = "instance-move"
6240
  HTYPE = constants.HTYPE_INSTANCE
6241
  REQ_BGL = False
6242

    
6243
  def ExpandNames(self):
6244
    self._ExpandAndLockInstance()
6245
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6246
    self.op.target_node = target_node
6247
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6248
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6249

    
6250
  def DeclareLocks(self, level):
6251
    if level == locking.LEVEL_NODE:
6252
      self._LockInstancesNodes(primary_only=True)
6253

    
6254
  def BuildHooksEnv(self):
6255
    """Build hooks env.
6256

6257
    This runs on master, primary and secondary nodes of the instance.
6258

6259
    """
6260
    env = {
6261
      "TARGET_NODE": self.op.target_node,
6262
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6263
      }
6264
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6265
    return env
6266

    
6267
  def BuildHooksNodes(self):
6268
    """Build hooks nodes.
6269

6270
    """
6271
    nl = [
6272
      self.cfg.GetMasterNode(),
6273
      self.instance.primary_node,
6274
      self.op.target_node,
6275
      ]
6276
    return (nl, nl)
6277

    
6278
  def CheckPrereq(self):
6279
    """Check prerequisites.
6280

6281
    This checks that the instance is in the cluster.
6282

6283
    """
6284
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6285
    assert self.instance is not None, \
6286
      "Cannot retrieve locked instance %s" % self.op.instance_name
6287

    
6288
    node = self.cfg.GetNodeInfo(self.op.target_node)
6289
    assert node is not None, \
6290
      "Cannot retrieve locked node %s" % self.op.target_node
6291

    
6292
    self.target_node = target_node = node.name
6293

    
6294
    if target_node == instance.primary_node:
6295
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6296
                                 (instance.name, target_node),
6297
                                 errors.ECODE_STATE)
6298

    
6299
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6300

    
6301
    for idx, dsk in enumerate(instance.disks):
6302
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6303
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6304
                                   " cannot copy" % idx, errors.ECODE_STATE)
6305

    
6306
    _CheckNodeOnline(self, target_node)
6307
    _CheckNodeNotDrained(self, target_node)
6308
    _CheckNodeVmCapable(self, target_node)
6309

    
6310
    if instance.admin_up:
6311
      # check memory requirements on the secondary node
6312
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6313
                           instance.name, bep[constants.BE_MEMORY],
6314
                           instance.hypervisor)
6315
    else:
6316
      self.LogInfo("Not checking memory on the secondary node as"
6317
                   " instance will not be started")
6318

    
6319
    # check bridge existance
6320
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6321

    
6322
  def Exec(self, feedback_fn):
6323
    """Move an instance.
6324

6325
    The move is done by shutting it down on its present node, copying
6326
    the data over (slow) and starting it on the new node.
6327

6328
    """
6329
    instance = self.instance
6330

    
6331
    source_node = instance.primary_node
6332
    target_node = self.target_node
6333

    
6334
    self.LogInfo("Shutting down instance %s on source node %s",
6335
                 instance.name, source_node)
6336

    
6337
    result = self.rpc.call_instance_shutdown(source_node, instance,
6338
                                             self.op.shutdown_timeout)
6339
    msg = result.fail_msg
6340
    if msg:
6341
      if self.op.ignore_consistency:
6342
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6343
                             " Proceeding anyway. Please make sure node"
6344
                             " %s is down. Error details: %s",
6345
                             instance.name, source_node, source_node, msg)
6346
      else:
6347
        raise errors.OpExecError("Could not shutdown instance %s on"
6348
                                 " node %s: %s" %
6349
                                 (instance.name, source_node, msg))
6350

    
6351
    # create the target disks
6352
    try:
6353
      _CreateDisks(self, instance, target_node=target_node)
6354
    except errors.OpExecError:
6355
      self.LogWarning("Device creation failed, reverting...")
6356
      try:
6357
        _RemoveDisks(self, instance, target_node=target_node)
6358
      finally:
6359
        self.cfg.ReleaseDRBDMinors(instance.name)
6360
        raise
6361

    
6362
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6363

    
6364
    errs = []
6365
    # activate, get path, copy the data over
6366
    for idx, disk in enumerate(instance.disks):
6367
      self.LogInfo("Copying data for disk %d", idx)
6368
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6369
                                               instance.name, True, idx)
6370
      if result.fail_msg:
6371
        self.LogWarning("Can't assemble newly created disk %d: %s",
6372
                        idx, result.fail_msg)
6373
        errs.append(result.fail_msg)
6374
        break
6375
      dev_path = result.payload
6376
      result = self.rpc.call_blockdev_export(source_node, disk,
6377
                                             target_node, dev_path,
6378
                                             cluster_name)
6379
      if result.fail_msg:
6380
        self.LogWarning("Can't copy data over for disk %d: %s",
6381
                        idx, result.fail_msg)
6382
        errs.append(result.fail_msg)
6383
        break
6384

    
6385
    if errs:
6386
      self.LogWarning("Some disks failed to copy, aborting")
6387
      try:
6388
        _RemoveDisks(self, instance, target_node=target_node)
6389
      finally:
6390
        self.cfg.ReleaseDRBDMinors(instance.name)
6391
        raise errors.OpExecError("Errors during disk copy: %s" %
6392
                                 (",".join(errs),))
6393

    
6394
    instance.primary_node = target_node
6395
    self.cfg.Update(instance, feedback_fn)
6396

    
6397
    self.LogInfo("Removing the disks on the original node")
6398
    _RemoveDisks(self, instance, target_node=source_node)
6399

    
6400
    # Only start the instance if it's marked as up
6401
    if instance.admin_up:
6402
      self.LogInfo("Starting instance %s on node %s",
6403
                   instance.name, target_node)
6404

    
6405
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6406
                                           ignore_secondaries=True)
6407
      if not disks_ok:
6408
        _ShutdownInstanceDisks(self, instance)
6409
        raise errors.OpExecError("Can't activate the instance's disks")
6410

    
6411
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6412
      msg = result.fail_msg
6413
      if msg:
6414
        _ShutdownInstanceDisks(self, instance)
6415
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6416
                                 (instance.name, target_node, msg))
6417

    
6418

    
6419
class LUNodeMigrate(LogicalUnit):
6420
  """Migrate all instances from a node.
6421

6422
  """
6423
  HPATH = "node-migrate"
6424
  HTYPE = constants.HTYPE_NODE
6425
  REQ_BGL = False
6426

    
6427
  def CheckArguments(self):
6428
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6429

    
6430
  def ExpandNames(self):
6431
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6432

    
6433
    self.needed_locks = {}
6434

    
6435
    # Create tasklets for migrating instances for all instances on this node
6436
    names = []
6437
    tasklets = []
6438

    
6439
    self.lock_all_nodes = False
6440

    
6441
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6442
      logging.debug("Migrating instance %s", inst.name)
6443
      names.append(inst.name)
6444

    
6445
      tasklets.append(TLMigrateInstance(self, inst.name, False,
6446
                                        self.op.iallocator, None))
6447

    
6448
      if inst.disk_template in constants.DTS_EXT_MIRROR:
6449
        # We need to lock all nodes, as the iallocator will choose the
6450
        # destination nodes afterwards
6451
        self.lock_all_nodes = True
6452

    
6453
    self.tasklets = tasklets
6454

    
6455
    # Declare node locks
6456
    if self.lock_all_nodes:
6457
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6458
    else:
6459
      self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6460
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6461

    
6462
    # Declare instance locks
6463
    self.needed_locks[locking.LEVEL_INSTANCE] = names
6464

    
6465
  def DeclareLocks(self, level):
6466
    if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6467
      self._LockInstancesNodes()
6468

    
6469
  def BuildHooksEnv(self):
6470
    """Build hooks env.
6471

6472
    This runs on the master, the primary and all the secondaries.
6473

6474
    """
6475
    return {
6476
      "NODE_NAME": self.op.node_name,
6477
      }
6478

    
6479
  def BuildHooksNodes(self):
6480
    """Build hooks nodes.
6481

6482
    """
6483
    nl = [self.cfg.GetMasterNode()]
6484
    return (nl, nl)
6485

    
6486

    
6487
class TLMigrateInstance(Tasklet):
6488
  """Tasklet class for instance migration.
6489

6490
  @type live: boolean
6491
  @ivar live: whether the migration will be done live or non-live;
6492
      this variable is initalized only after CheckPrereq has run
6493

6494
  """
6495
  def __init__(self, lu, instance_name, cleanup,
6496
               iallocator=None, target_node=None):
6497
    """Initializes this class.
6498

6499
    """
6500
    Tasklet.__init__(self, lu)
6501

    
6502
    # Parameters
6503
    self.instance_name = instance_name
6504
    self.cleanup = cleanup
6505
    self.live = False # will be overridden later
6506
    self.iallocator = iallocator
6507
    self.target_node = target_node
6508

    
6509
  def CheckPrereq(self):
6510
    """Check prerequisites.
6511

6512
    This checks that the instance is in the cluster.
6513

6514
    """
6515
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6516
    instance = self.cfg.GetInstanceInfo(instance_name)
6517
    assert instance is not None
6518
    self.instance = instance
6519

    
6520
    if instance.disk_template not in constants.DTS_MIRRORED:
6521
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6522
                                 " migrations" % instance.disk_template,
6523
                                 errors.ECODE_STATE)
6524

    
6525
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6526
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6527

    
6528
      if self.iallocator:
6529
        self._RunAllocator()
6530

    
6531
      # self.target_node is already populated, either directly or by the
6532
      # iallocator run
6533
      target_node = self.target_node
6534

    
6535
      if len(self.lu.tasklets) == 1:
6536
        # It is safe to remove locks only when we're the only tasklet in the LU
6537
        nodes_keep = [instance.primary_node, self.target_node]
6538
        nodes_rel = [node for node in self.lu.acquired_locks[locking.LEVEL_NODE]
6539
                     if node not in nodes_keep]
6540
        self.lu.context.glm.release(locking.LEVEL_NODE, nodes_rel)
6541
        self.lu.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6542

    
6543
    else:
6544
      secondary_nodes = instance.secondary_nodes
6545
      if not secondary_nodes:
6546
        raise errors.ConfigurationError("No secondary node but using"
6547
                                        " %s disk template" %
6548
                                        instance.disk_template)
6549
      target_node = secondary_nodes[0]
6550
      if self.lu.op.iallocator or (self.lu.op.target_node and
6551
                                   self.lu.op.target_node != target_node):
6552
        raise errors.OpPrereqError("Instances with disk template %s cannot"
6553
                                   " be migrated over to arbitrary nodes"
6554
                                   " (neither an iallocator nor a target"
6555
                                   " node can be passed)" %
6556
                                   instance.disk_template, errors.ECODE_INVAL)
6557

    
6558
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6559

    
6560
    # check memory requirements on the secondary node
6561
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6562
                         instance.name, i_be[constants.BE_MEMORY],
6563
                         instance.hypervisor)
6564

    
6565
    # check bridge existance
6566
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6567

    
6568
    if not self.cleanup:
6569
      _CheckNodeNotDrained(self.lu, target_node)
6570
      result = self.rpc.call_instance_migratable(instance.primary_node,
6571
                                                 instance)
6572
      result.Raise("Can't migrate, please use failover",
6573
                   prereq=True, ecode=errors.ECODE_STATE)
6574

    
6575

    
6576
  def _RunAllocator(self):
6577
    """Run the allocator based on input opcode.
6578

6579
    """
6580
    ial = IAllocator(self.cfg, self.rpc,
6581
                     mode=constants.IALLOCATOR_MODE_RELOC,
6582
                     name=self.instance_name,
6583
                     # TODO See why hail breaks with a single node below
6584
                     relocate_from=[self.instance.primary_node,
6585
                                    self.instance.primary_node],
6586
                     )
6587

    
6588
    ial.Run(self.iallocator)
6589

    
6590
    if not ial.success:
6591
      raise errors.OpPrereqError("Can't compute nodes using"
6592
                                 " iallocator '%s': %s" %
6593
                                 (self.iallocator, ial.info),
6594
                                 errors.ECODE_NORES)
6595
    if len(ial.result) != ial.required_nodes:
6596
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6597
                                 " of nodes (%s), required %s" %
6598
                                 (self.iallocator, len(ial.result),
6599
                                  ial.required_nodes), errors.ECODE_FAULT)
6600
    self.target_node = ial.result[0]
6601
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6602
                 self.instance_name, self.iallocator,
6603
                 utils.CommaJoin(ial.result))
6604

    
6605
    if self.lu.op.live is not None and self.lu.op.mode is not None:
6606
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6607
                                 " parameters are accepted",
6608
                                 errors.ECODE_INVAL)
6609
    if self.lu.op.live is not None:
6610
      if self.lu.op.live:
6611
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
6612
      else:
6613
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6614
      # reset the 'live' parameter to None so that repeated
6615
      # invocations of CheckPrereq do not raise an exception
6616
      self.lu.op.live = None
6617
    elif self.lu.op.mode is None:
6618
      # read the default value from the hypervisor
6619
      i_hv = self.cfg.GetClusterInfo().FillHV(self.instance, skip_globals=False)
6620
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6621

    
6622
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6623

    
6624
  def _WaitUntilSync(self):
6625
    """Poll with custom rpc for disk sync.
6626

6627
    This uses our own step-based rpc call.
6628

6629
    """
6630
    self.feedback_fn("* wait until resync is done")
6631
    all_done = False
6632
    while not all_done:
6633
      all_done = True
6634
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6635
                                            self.nodes_ip,
6636
                                            self.instance.disks)
6637
      min_percent = 100
6638
      for node, nres in result.items():
6639
        nres.Raise("Cannot resync disks on node %s" % node)
6640
        node_done, node_percent = nres.payload
6641
        all_done = all_done and node_done
6642
        if node_percent is not None:
6643
          min_percent = min(min_percent, node_percent)
6644
      if not all_done:
6645
        if min_percent < 100:
6646
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6647
        time.sleep(2)
6648

    
6649
  def _EnsureSecondary(self, node):
6650
    """Demote a node to secondary.
6651

6652
    """
6653
    self.feedback_fn("* switching node %s to secondary mode" % node)
6654

    
6655
    for dev in self.instance.disks:
6656
      self.cfg.SetDiskID(dev, node)
6657

    
6658
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6659
                                          self.instance.disks)
6660
    result.Raise("Cannot change disk to secondary on node %s" % node)
6661

    
6662
  def _GoStandalone(self):
6663
    """Disconnect from the network.
6664

6665
    """
6666
    self.feedback_fn("* changing into standalone mode")
6667
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6668
                                               self.instance.disks)
6669
    for node, nres in result.items():
6670
      nres.Raise("Cannot disconnect disks node %s" % node)
6671

    
6672
  def _GoReconnect(self, multimaster):
6673
    """Reconnect to the network.
6674

6675
    """
6676
    if multimaster:
6677
      msg = "dual-master"
6678
    else:
6679
      msg = "single-master"
6680
    self.feedback_fn("* changing disks into %s mode" % msg)
6681
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6682
                                           self.instance.disks,
6683
                                           self.instance.name, multimaster)
6684
    for node, nres in result.items():
6685
      nres.Raise("Cannot change disks config on node %s" % node)
6686

    
6687
  def _ExecCleanup(self):
6688
    """Try to cleanup after a failed migration.
6689

6690
    The cleanup is done by:
6691
      - check that the instance is running only on one node
6692
        (and update the config if needed)
6693
      - change disks on its secondary node to secondary
6694
      - wait until disks are fully synchronized
6695
      - disconnect from the network
6696
      - change disks into single-master mode
6697
      - wait again until disks are fully synchronized
6698

6699
    """
6700
    instance = self.instance
6701
    target_node = self.target_node
6702
    source_node = self.source_node
6703

    
6704
    # check running on only one node
6705
    self.feedback_fn("* checking where the instance actually runs"
6706
                     " (if this hangs, the hypervisor might be in"
6707
                     " a bad state)")
6708
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6709
    for node, result in ins_l.items():
6710
      result.Raise("Can't contact node %s" % node)
6711

    
6712
    runningon_source = instance.name in ins_l[source_node].payload
6713
    runningon_target = instance.name in ins_l[target_node].payload
6714

    
6715
    if runningon_source and runningon_target:
6716
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6717
                               " or the hypervisor is confused. You will have"
6718
                               " to ensure manually that it runs only on one"
6719
                               " and restart this operation.")
6720

    
6721
    if not (runningon_source or runningon_target):
6722
      raise errors.OpExecError("Instance does not seem to be running at all."
6723
                               " In this case, it's safer to repair by"
6724
                               " running 'gnt-instance stop' to ensure disk"
6725
                               " shutdown, and then restarting it.")
6726

    
6727
    if runningon_target:
6728
      # the migration has actually succeeded, we need to update the config
6729
      self.feedback_fn("* instance running on secondary node (%s),"
6730
                       " updating config" % target_node)
6731
      instance.primary_node = target_node
6732
      self.cfg.Update(instance, self.feedback_fn)
6733
      demoted_node = source_node
6734
    else:
6735
      self.feedback_fn("* instance confirmed to be running on its"
6736
                       " primary node (%s)" % source_node)
6737
      demoted_node = target_node
6738

    
6739
    if instance.disk_template in constants.DTS_INT_MIRROR:
6740
      self._EnsureSecondary(demoted_node)
6741
      try:
6742
        self._WaitUntilSync()
6743
      except errors.OpExecError:
6744
        # we ignore here errors, since if the device is standalone, it
6745
        # won't be able to sync
6746
        pass
6747
      self._GoStandalone()
6748
      self._GoReconnect(False)
6749
      self._WaitUntilSync()
6750

    
6751
    self.feedback_fn("* done")
6752

    
6753
  def _RevertDiskStatus(self):
6754
    """Try to revert the disk status after a failed migration.
6755

6756
    """
6757
    target_node = self.target_node
6758
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6759
      return
6760

    
6761
    try:
6762
      self._EnsureSecondary(target_node)
6763
      self._GoStandalone()
6764
      self._GoReconnect(False)
6765
      self._WaitUntilSync()
6766
    except errors.OpExecError, err:
6767
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6768
                         " drives: error '%s'\n"
6769
                         "Please look and recover the instance status" %
6770
                         str(err))
6771

    
6772
  def _AbortMigration(self):
6773
    """Call the hypervisor code to abort a started migration.
6774

6775
    """
6776
    instance = self.instance
6777
    target_node = self.target_node
6778
    migration_info = self.migration_info
6779

    
6780
    abort_result = self.rpc.call_finalize_migration(target_node,
6781
                                                    instance,
6782
                                                    migration_info,
6783
                                                    False)
6784
    abort_msg = abort_result.fail_msg
6785
    if abort_msg:
6786
      logging.error("Aborting migration failed on target node %s: %s",
6787
                    target_node, abort_msg)
6788
      # Don't raise an exception here, as we stil have to try to revert the
6789
      # disk status, even if this step failed.
6790

    
6791
  def _ExecMigration(self):
6792
    """Migrate an instance.
6793

6794
    The migrate is done by:
6795
      - change the disks into dual-master mode
6796
      - wait until disks are fully synchronized again
6797
      - migrate the instance
6798
      - change disks on the new secondary node (the old primary) to secondary
6799
      - wait until disks are fully synchronized
6800
      - change disks into single-master mode
6801

6802
    """
6803
    instance = self.instance
6804
    target_node = self.target_node
6805
    source_node = self.source_node
6806

    
6807
    self.feedback_fn("* checking disk consistency between source and target")
6808
    for dev in instance.disks:
6809
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6810
        raise errors.OpExecError("Disk %s is degraded or not fully"
6811
                                 " synchronized on target node,"
6812
                                 " aborting migrate." % dev.iv_name)
6813

    
6814
    # First get the migration information from the remote node
6815
    result = self.rpc.call_migration_info(source_node, instance)
6816
    msg = result.fail_msg
6817
    if msg:
6818
      log_err = ("Failed fetching source migration information from %s: %s" %
6819
                 (source_node, msg))
6820
      logging.error(log_err)
6821
      raise errors.OpExecError(log_err)
6822

    
6823
    self.migration_info = migration_info = result.payload
6824

    
6825
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6826
      # Then switch the disks to master/master mode
6827
      self._EnsureSecondary(target_node)
6828
      self._GoStandalone()
6829
      self._GoReconnect(True)
6830
      self._WaitUntilSync()
6831

    
6832
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6833
    result = self.rpc.call_accept_instance(target_node,
6834
                                           instance,
6835
                                           migration_info,
6836
                                           self.nodes_ip[target_node])
6837

    
6838
    msg = result.fail_msg
6839
    if msg:
6840
      logging.error("Instance pre-migration failed, trying to revert"
6841
                    " disk status: %s", msg)
6842
      self.feedback_fn("Pre-migration failed, aborting")
6843
      self._AbortMigration()
6844
      self._RevertDiskStatus()
6845
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6846
                               (instance.name, msg))
6847

    
6848
    self.feedback_fn("* migrating instance to %s" % target_node)
6849
    time.sleep(10)
6850
    result = self.rpc.call_instance_migrate(source_node, instance,
6851
                                            self.nodes_ip[target_node],
6852
                                            self.live)
6853
    msg = result.fail_msg
6854
    if msg:
6855
      logging.error("Instance migration failed, trying to revert"
6856
                    " disk status: %s", msg)
6857
      self.feedback_fn("Migration failed, aborting")
6858
      self._AbortMigration()
6859
      self._RevertDiskStatus()
6860
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6861
                               (instance.name, msg))
6862
    time.sleep(10)
6863

    
6864
    instance.primary_node = target_node
6865
    # distribute new instance config to the other nodes
6866
    self.cfg.Update(instance, self.feedback_fn)
6867

    
6868
    result = self.rpc.call_finalize_migration(target_node,
6869
                                              instance,
6870
                                              migration_info,
6871
                                              True)
6872
    msg = result.fail_msg
6873
    if msg:
6874
      logging.error("Instance migration succeeded, but finalization failed:"
6875
                    " %s", msg)
6876
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6877
                               msg)
6878

    
6879
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6880
      self._EnsureSecondary(source_node)
6881
      self._WaitUntilSync()
6882
      self._GoStandalone()
6883
      self._GoReconnect(False)
6884
      self._WaitUntilSync()
6885

    
6886
    self.feedback_fn("* done")
6887

    
6888
  def Exec(self, feedback_fn):
6889
    """Perform the migration.
6890

6891
    """
6892
    feedback_fn("Migrating instance %s" % self.instance.name)
6893

    
6894
    self.feedback_fn = feedback_fn
6895

    
6896
    self.source_node = self.instance.primary_node
6897

    
6898
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6899
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
6900
      self.target_node = self.instance.secondary_nodes[0]
6901
      # Otherwise self.target_node has been populated either
6902
      # directly, or through an iallocator.
6903

    
6904
    self.all_nodes = [self.source_node, self.target_node]
6905
    self.nodes_ip = {
6906
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6907
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6908
      }
6909

    
6910
    if self.cleanup:
6911
      return self._ExecCleanup()
6912
    else:
6913
      return self._ExecMigration()
6914

    
6915

    
6916
def _CreateBlockDev(lu, node, instance, device, force_create,
6917
                    info, force_open):
6918
  """Create a tree of block devices on a given node.
6919

6920
  If this device type has to be created on secondaries, create it and
6921
  all its children.
6922

6923
  If not, just recurse to children keeping the same 'force' value.
6924

6925
  @param lu: the lu on whose behalf we execute
6926
  @param node: the node on which to create the device
6927
  @type instance: L{objects.Instance}
6928
  @param instance: the instance which owns the device
6929
  @type device: L{objects.Disk}
6930
  @param device: the device to create
6931
  @type force_create: boolean
6932
  @param force_create: whether to force creation of this device; this
6933
      will be change to True whenever we find a device which has
6934
      CreateOnSecondary() attribute
6935
  @param info: the extra 'metadata' we should attach to the device
6936
      (this will be represented as a LVM tag)
6937
  @type force_open: boolean
6938
  @param force_open: this parameter will be passes to the
6939
      L{backend.BlockdevCreate} function where it specifies
6940
      whether we run on primary or not, and it affects both
6941
      the child assembly and the device own Open() execution
6942

6943
  """
6944
  if device.CreateOnSecondary():
6945
    force_create = True
6946

    
6947
  if device.children:
6948
    for child in device.children:
6949
      _CreateBlockDev(lu, node, instance, child, force_create,
6950
                      info, force_open)
6951

    
6952
  if not force_create:
6953
    return
6954

    
6955
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6956

    
6957

    
6958
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6959
  """Create a single block device on a given node.
6960

6961
  This will not recurse over children of the device, so they must be
6962
  created in advance.
6963

6964
  @param lu: the lu on whose behalf we execute
6965
  @param node: the node on which to create the device
6966
  @type instance: L{objects.Instance}
6967
  @param instance: the instance which owns the device
6968
  @type device: L{objects.Disk}
6969
  @param device: the device to create
6970
  @param info: the extra 'metadata' we should attach to the device
6971
      (this will be represented as a LVM tag)
6972
  @type force_open: boolean
6973
  @param force_open: this parameter will be passes to the
6974
      L{backend.BlockdevCreate} function where it specifies
6975
      whether we run on primary or not, and it affects both
6976
      the child assembly and the device own Open() execution
6977

6978
  """
6979
  lu.cfg.SetDiskID(device, node)
6980
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6981
                                       instance.name, force_open, info)
6982
  result.Raise("Can't create block device %s on"
6983
               " node %s for instance %s" % (device, node, instance.name))
6984
  if device.physical_id is None:
6985
    device.physical_id = result.payload
6986

    
6987

    
6988
def _GenerateUniqueNames(lu, exts):
6989
  """Generate a suitable LV name.
6990

6991
  This will generate a logical volume name for the given instance.
6992

6993
  """
6994
  results = []
6995
  for val in exts:
6996
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6997
    results.append("%s%s" % (new_id, val))
6998
  return results
6999

    
7000

    
7001
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
7002
                         p_minor, s_minor):
7003
  """Generate a drbd8 device complete with its children.
7004

7005
  """
7006
  port = lu.cfg.AllocatePort()
7007
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7008
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7009
                          logical_id=(vgname, names[0]))
7010
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7011
                          logical_id=(vgname, names[1]))
7012
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7013
                          logical_id=(primary, secondary, port,
7014
                                      p_minor, s_minor,
7015
                                      shared_secret),
7016
                          children=[dev_data, dev_meta],
7017
                          iv_name=iv_name)
7018
  return drbd_dev
7019

    
7020

    
7021
def _GenerateDiskTemplate(lu, template_name,
7022
                          instance_name, primary_node,
7023
                          secondary_nodes, disk_info,
7024
                          file_storage_dir, file_driver,
7025
                          base_index, feedback_fn):
7026
  """Generate the entire disk layout for a given template type.
7027

7028
  """
7029
  #TODO: compute space requirements
7030

    
7031
  vgname = lu.cfg.GetVGName()
7032
  disk_count = len(disk_info)
7033
  disks = []
7034
  if template_name == constants.DT_DISKLESS:
7035
    pass
7036
  elif template_name == constants.DT_PLAIN:
7037
    if len(secondary_nodes) != 0:
7038
      raise errors.ProgrammerError("Wrong template configuration")
7039

    
7040
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7041
                                      for i in range(disk_count)])
7042
    for idx, disk in enumerate(disk_info):
7043
      disk_index = idx + base_index
7044
      vg = disk.get(constants.IDISK_VG, vgname)
7045
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7046
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7047
                              size=disk[constants.IDISK_SIZE],
7048
                              logical_id=(vg, names[idx]),
7049
                              iv_name="disk/%d" % disk_index,
7050
                              mode=disk[constants.IDISK_MODE])
7051
      disks.append(disk_dev)
7052
  elif template_name == constants.DT_DRBD8:
7053
    if len(secondary_nodes) != 1:
7054
      raise errors.ProgrammerError("Wrong template configuration")
7055
    remote_node = secondary_nodes[0]
7056
    minors = lu.cfg.AllocateDRBDMinor(
7057
      [primary_node, remote_node] * len(disk_info), instance_name)
7058

    
7059
    names = []
7060
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7061
                                               for i in range(disk_count)]):
7062
      names.append(lv_prefix + "_data")
7063
      names.append(lv_prefix + "_meta")
7064
    for idx, disk in enumerate(disk_info):
7065
      disk_index = idx + base_index
7066
      vg = disk.get(constants.IDISK_VG, vgname)
7067
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7068
                                      disk[constants.IDISK_SIZE], vg,
7069
                                      names[idx * 2:idx * 2 + 2],
7070
                                      "disk/%d" % disk_index,
7071
                                      minors[idx * 2], minors[idx * 2 + 1])
7072
      disk_dev.mode = disk[constants.IDISK_MODE]
7073
      disks.append(disk_dev)
7074
  elif template_name == constants.DT_FILE:
7075
    if len(secondary_nodes) != 0:
7076
      raise errors.ProgrammerError("Wrong template configuration")
7077

    
7078
    opcodes.RequireFileStorage()
7079

    
7080
    for idx, disk in enumerate(disk_info):
7081
      disk_index = idx + base_index
7082
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7083
                              size=disk[constants.IDISK_SIZE],
7084
                              iv_name="disk/%d" % disk_index,
7085
                              logical_id=(file_driver,
7086
                                          "%s/disk%d" % (file_storage_dir,
7087
                                                         disk_index)),
7088
                              mode=disk[constants.IDISK_MODE])
7089
      disks.append(disk_dev)
7090
  elif template_name == constants.DT_SHARED_FILE:
7091
    if len(secondary_nodes) != 0:
7092
      raise errors.ProgrammerError("Wrong template configuration")
7093

    
7094
    opcodes.RequireSharedFileStorage()
7095

    
7096
    for idx, disk in enumerate(disk_info):
7097
      disk_index = idx + base_index
7098
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7099
                              size=disk[constants.IDISK_SIZE],
7100
                              iv_name="disk/%d" % disk_index,
7101
                              logical_id=(file_driver,
7102
                                          "%s/disk%d" % (file_storage_dir,
7103
                                                         disk_index)),
7104
                              mode=disk[constants.IDISK_MODE])
7105
      disks.append(disk_dev)
7106
  elif template_name == constants.DT_BLOCK:
7107
    if len(secondary_nodes) != 0:
7108
      raise errors.ProgrammerError("Wrong template configuration")
7109

    
7110
    for idx, disk in enumerate(disk_info):
7111
      disk_index = idx + base_index
7112
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7113
                              size=disk[constants.IDISK_SIZE],
7114
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7115
                                          disk[constants.IDISK_ADOPT]),
7116
                              iv_name="disk/%d" % disk_index,
7117
                              mode=disk[constants.IDISK_MODE])
7118
      disks.append(disk_dev)
7119

    
7120
  else:
7121
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7122
  return disks
7123

    
7124

    
7125
def _GetInstanceInfoText(instance):
7126
  """Compute that text that should be added to the disk's metadata.
7127

7128
  """
7129
  return "originstname+%s" % instance.name
7130

    
7131

    
7132
def _CalcEta(time_taken, written, total_size):
7133
  """Calculates the ETA based on size written and total size.
7134

7135
  @param time_taken: The time taken so far
7136
  @param written: amount written so far
7137
  @param total_size: The total size of data to be written
7138
  @return: The remaining time in seconds
7139

7140
  """
7141
  avg_time = time_taken / float(written)
7142
  return (total_size - written) * avg_time
7143

    
7144

    
7145
def _WipeDisks(lu, instance):
7146
  """Wipes instance disks.
7147

7148
  @type lu: L{LogicalUnit}
7149
  @param lu: the logical unit on whose behalf we execute
7150
  @type instance: L{objects.Instance}
7151
  @param instance: the instance whose disks we should create
7152
  @return: the success of the wipe
7153

7154
  """
7155
  node = instance.primary_node
7156

    
7157
  for device in instance.disks:
7158
    lu.cfg.SetDiskID(device, node)
7159

    
7160
  logging.info("Pause sync of instance %s disks", instance.name)
7161
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7162

    
7163
  for idx, success in enumerate(result.payload):
7164
    if not success:
7165
      logging.warn("pause-sync of instance %s for disks %d failed",
7166
                   instance.name, idx)
7167

    
7168
  try:
7169
    for idx, device in enumerate(instance.disks):
7170
      lu.LogInfo("* Wiping disk %d", idx)
7171
      logging.info("Wiping disk %d for instance %s, node %s",
7172
                   idx, instance.name, node)
7173

    
7174
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7175
      # MAX_WIPE_CHUNK at max
7176
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7177
                            constants.MIN_WIPE_CHUNK_PERCENT)
7178

    
7179
      offset = 0
7180
      size = device.size
7181
      last_output = 0
7182
      start_time = time.time()
7183

    
7184
      while offset < size:
7185
        wipe_size = min(wipe_chunk_size, size - offset)
7186
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7187
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7188
                     (idx, offset, wipe_size))
7189
        now = time.time()
7190
        offset += wipe_size
7191
        if now - last_output >= 60:
7192
          eta = _CalcEta(now - start_time, offset, size)
7193
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7194
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7195
          last_output = now
7196
  finally:
7197
    logging.info("Resume sync of instance %s disks", instance.name)
7198

    
7199
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7200

    
7201
    for idx, success in enumerate(result.payload):
7202
      if not success:
7203
        lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
7204
                      " look at the status and troubleshoot the issue.", idx)
7205
        logging.warn("resume-sync of instance %s for disks %d failed",
7206
                     instance.name, idx)
7207

    
7208

    
7209
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7210
  """Create all disks for an instance.
7211

7212
  This abstracts away some work from AddInstance.
7213

7214
  @type lu: L{LogicalUnit}
7215
  @param lu: the logical unit on whose behalf we execute
7216
  @type instance: L{objects.Instance}
7217
  @param instance: the instance whose disks we should create
7218
  @type to_skip: list
7219
  @param to_skip: list of indices to skip
7220
  @type target_node: string
7221
  @param target_node: if passed, overrides the target node for creation
7222
  @rtype: boolean
7223
  @return: the success of the creation
7224

7225
  """
7226
  info = _GetInstanceInfoText(instance)
7227
  if target_node is None:
7228
    pnode = instance.primary_node
7229
    all_nodes = instance.all_nodes
7230
  else:
7231
    pnode = target_node
7232
    all_nodes = [pnode]
7233

    
7234
  if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7235
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7236
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7237

    
7238
    result.Raise("Failed to create directory '%s' on"
7239
                 " node %s" % (file_storage_dir, pnode))
7240

    
7241
  # Note: this needs to be kept in sync with adding of disks in
7242
  # LUInstanceSetParams
7243
  for idx, device in enumerate(instance.disks):
7244
    if to_skip and idx in to_skip:
7245
      continue
7246
    logging.info("Creating volume %s for instance %s",
7247
                 device.iv_name, instance.name)
7248
    #HARDCODE
7249
    for node in all_nodes:
7250
      f_create = node == pnode
7251
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7252

    
7253

    
7254
def _RemoveDisks(lu, instance, target_node=None):
7255
  """Remove all disks for an instance.
7256

7257
  This abstracts away some work from `AddInstance()` and
7258
  `RemoveInstance()`. Note that in case some of the devices couldn't
7259
  be removed, the removal will continue with the other ones (compare
7260
  with `_CreateDisks()`).
7261

7262
  @type lu: L{LogicalUnit}
7263
  @param lu: the logical unit on whose behalf we execute
7264
  @type instance: L{objects.Instance}
7265
  @param instance: the instance whose disks we should remove
7266
  @type target_node: string
7267
  @param target_node: used to override the node on which to remove the disks
7268
  @rtype: boolean
7269
  @return: the success of the removal
7270

7271
  """
7272
  logging.info("Removing block devices for instance %s", instance.name)
7273

    
7274
  all_result = True
7275
  for device in instance.disks:
7276
    if target_node:
7277
      edata = [(target_node, device)]
7278
    else:
7279
      edata = device.ComputeNodeTree(instance.primary_node)
7280
    for node, disk in edata:
7281
      lu.cfg.SetDiskID(disk, node)
7282
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7283
      if msg:
7284
        lu.LogWarning("Could not remove block device %s on node %s,"
7285
                      " continuing anyway: %s", device.iv_name, node, msg)
7286
        all_result = False
7287

    
7288
  if instance.disk_template == constants.DT_FILE:
7289
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7290
    if target_node:
7291
      tgt = target_node
7292
    else:
7293
      tgt = instance.primary_node
7294
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7295
    if result.fail_msg:
7296
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7297
                    file_storage_dir, instance.primary_node, result.fail_msg)
7298
      all_result = False
7299

    
7300
  return all_result
7301

    
7302

    
7303
def _ComputeDiskSizePerVG(disk_template, disks):
7304
  """Compute disk size requirements in the volume group
7305

7306
  """
7307
  def _compute(disks, payload):
7308
    """Universal algorithm.
7309

7310
    """
7311
    vgs = {}
7312
    for disk in disks:
7313
      vgs[disk[constants.IDISK_VG]] = \
7314
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7315

    
7316
    return vgs
7317

    
7318
  # Required free disk space as a function of disk and swap space
7319
  req_size_dict = {
7320
    constants.DT_DISKLESS: {},
7321
    constants.DT_PLAIN: _compute(disks, 0),
7322
    # 128 MB are added for drbd metadata for each disk
7323
    constants.DT_DRBD8: _compute(disks, 128),
7324
    constants.DT_FILE: {},
7325
    constants.DT_SHARED_FILE: {},
7326
  }
7327

    
7328
  if disk_template not in req_size_dict:
7329
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7330
                                 " is unknown" %  disk_template)
7331

    
7332
  return req_size_dict[disk_template]
7333

    
7334

    
7335
def _ComputeDiskSize(disk_template, disks):
7336
  """Compute disk size requirements in the volume group
7337

7338
  """
7339
  # Required free disk space as a function of disk and swap space
7340
  req_size_dict = {
7341
    constants.DT_DISKLESS: None,
7342
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7343
    # 128 MB are added for drbd metadata for each disk
7344
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7345
    constants.DT_FILE: None,
7346
    constants.DT_SHARED_FILE: 0,
7347
    constants.DT_BLOCK: 0,
7348
  }
7349

    
7350
  if disk_template not in req_size_dict:
7351
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7352
                                 " is unknown" %  disk_template)
7353

    
7354
  return req_size_dict[disk_template]
7355

    
7356

    
7357
def _FilterVmNodes(lu, nodenames):
7358
  """Filters out non-vm_capable nodes from a list.
7359

7360
  @type lu: L{LogicalUnit}
7361
  @param lu: the logical unit for which we check
7362
  @type nodenames: list
7363
  @param nodenames: the list of nodes on which we should check
7364
  @rtype: list
7365
  @return: the list of vm-capable nodes
7366

7367
  """
7368
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7369
  return [name for name in nodenames if name not in vm_nodes]
7370

    
7371

    
7372
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7373
  """Hypervisor parameter validation.
7374

7375
  This function abstract the hypervisor parameter validation to be
7376
  used in both instance create and instance modify.
7377

7378
  @type lu: L{LogicalUnit}
7379
  @param lu: the logical unit for which we check
7380
  @type nodenames: list
7381
  @param nodenames: the list of nodes on which we should check
7382
  @type hvname: string
7383
  @param hvname: the name of the hypervisor we should use
7384
  @type hvparams: dict
7385
  @param hvparams: the parameters which we need to check
7386
  @raise errors.OpPrereqError: if the parameters are not valid
7387

7388
  """
7389
  nodenames = _FilterVmNodes(lu, nodenames)
7390
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7391
                                                  hvname,
7392
                                                  hvparams)
7393
  for node in nodenames:
7394
    info = hvinfo[node]
7395
    if info.offline:
7396
      continue
7397
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7398

    
7399

    
7400
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7401
  """OS parameters validation.
7402

7403
  @type lu: L{LogicalUnit}
7404
  @param lu: the logical unit for which we check
7405
  @type required: boolean
7406
  @param required: whether the validation should fail if the OS is not
7407
      found
7408
  @type nodenames: list
7409
  @param nodenames: the list of nodes on which we should check
7410
  @type osname: string
7411
  @param osname: the name of the hypervisor we should use
7412
  @type osparams: dict
7413
  @param osparams: the parameters which we need to check
7414
  @raise errors.OpPrereqError: if the parameters are not valid
7415

7416
  """
7417
  nodenames = _FilterVmNodes(lu, nodenames)
7418
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7419
                                   [constants.OS_VALIDATE_PARAMETERS],
7420
                                   osparams)
7421
  for node, nres in result.items():
7422
    # we don't check for offline cases since this should be run only
7423
    # against the master node and/or an instance's nodes
7424
    nres.Raise("OS Parameters validation failed on node %s" % node)
7425
    if not nres.payload:
7426
      lu.LogInfo("OS %s not found on node %s, validation skipped",
7427
                 osname, node)
7428

    
7429

    
7430
class LUInstanceCreate(LogicalUnit):
7431
  """Create an instance.
7432

7433
  """
7434
  HPATH = "instance-add"
7435
  HTYPE = constants.HTYPE_INSTANCE
7436
  REQ_BGL = False
7437

    
7438
  def CheckArguments(self):
7439
    """Check arguments.
7440

7441
    """
7442
    # do not require name_check to ease forward/backward compatibility
7443
    # for tools
7444
    if self.op.no_install and self.op.start:
7445
      self.LogInfo("No-installation mode selected, disabling startup")
7446
      self.op.start = False
7447
    # validate/normalize the instance name
7448
    self.op.instance_name = \
7449
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7450

    
7451
    if self.op.ip_check and not self.op.name_check:
7452
      # TODO: make the ip check more flexible and not depend on the name check
7453
      raise errors.OpPrereqError("Cannot do ip check without a name check",
7454
                                 errors.ECODE_INVAL)
7455

    
7456
    # check nics' parameter names
7457
    for nic in self.op.nics:
7458
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7459

    
7460
    # check disks. parameter names and consistent adopt/no-adopt strategy
7461
    has_adopt = has_no_adopt = False
7462
    for disk in self.op.disks:
7463
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7464
      if constants.IDISK_ADOPT in disk:
7465
        has_adopt = True
7466
      else:
7467
        has_no_adopt = True
7468
    if has_adopt and has_no_adopt:
7469
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7470
                                 errors.ECODE_INVAL)
7471
    if has_adopt:
7472
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7473
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7474
                                   " '%s' disk template" %
7475
                                   self.op.disk_template,
7476
                                   errors.ECODE_INVAL)
7477
      if self.op.iallocator is not None:
7478
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7479
                                   " iallocator script", errors.ECODE_INVAL)
7480
      if self.op.mode == constants.INSTANCE_IMPORT:
7481
        raise errors.OpPrereqError("Disk adoption not allowed for"
7482
                                   " instance import", errors.ECODE_INVAL)
7483
    else:
7484
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
7485
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7486
                                   " but no 'adopt' parameter given" %
7487
                                   self.op.disk_template,
7488
                                   errors.ECODE_INVAL)
7489

    
7490
    self.adopt_disks = has_adopt
7491

    
7492
    # instance name verification
7493
    if self.op.name_check:
7494
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7495
      self.op.instance_name = self.hostname1.name
7496
      # used in CheckPrereq for ip ping check
7497
      self.check_ip = self.hostname1.ip
7498
    else:
7499
      self.check_ip = None
7500

    
7501
    # file storage checks
7502
    if (self.op.file_driver and
7503
        not self.op.file_driver in constants.FILE_DRIVER):
7504
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7505
                                 self.op.file_driver, errors.ECODE_INVAL)
7506

    
7507
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7508
      raise errors.OpPrereqError("File storage directory path not absolute",
7509
                                 errors.ECODE_INVAL)
7510

    
7511
    ### Node/iallocator related checks
7512
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7513

    
7514
    if self.op.pnode is not None:
7515
      if self.op.disk_template in constants.DTS_INT_MIRROR:
7516
        if self.op.snode is None:
7517
          raise errors.OpPrereqError("The networked disk templates need"
7518
                                     " a mirror node", errors.ECODE_INVAL)
7519
      elif self.op.snode:
7520
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7521
                        " template")
7522
        self.op.snode = None
7523

    
7524
    self._cds = _GetClusterDomainSecret()
7525

    
7526
    if self.op.mode == constants.INSTANCE_IMPORT:
7527
      # On import force_variant must be True, because if we forced it at
7528
      # initial install, our only chance when importing it back is that it
7529
      # works again!
7530
      self.op.force_variant = True
7531

    
7532
      if self.op.no_install:
7533
        self.LogInfo("No-installation mode has no effect during import")
7534

    
7535
    elif self.op.mode == constants.INSTANCE_CREATE:
7536
      if self.op.os_type is None:
7537
        raise errors.OpPrereqError("No guest OS specified",
7538
                                   errors.ECODE_INVAL)
7539
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7540
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7541
                                   " installation" % self.op.os_type,
7542
                                   errors.ECODE_STATE)
7543
      if self.op.disk_template is None:
7544
        raise errors.OpPrereqError("No disk template specified",
7545
                                   errors.ECODE_INVAL)
7546

    
7547
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7548
      # Check handshake to ensure both clusters have the same domain secret
7549
      src_handshake = self.op.source_handshake
7550
      if not src_handshake:
7551
        raise errors.OpPrereqError("Missing source handshake",
7552
                                   errors.ECODE_INVAL)
7553

    
7554
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7555
                                                           src_handshake)
7556
      if errmsg:
7557
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7558
                                   errors.ECODE_INVAL)
7559

    
7560
      # Load and check source CA
7561
      self.source_x509_ca_pem = self.op.source_x509_ca
7562
      if not self.source_x509_ca_pem:
7563
        raise errors.OpPrereqError("Missing source X509 CA",
7564
                                   errors.ECODE_INVAL)
7565

    
7566
      try:
7567
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7568
                                                    self._cds)
7569
      except OpenSSL.crypto.Error, err:
7570
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7571
                                   (err, ), errors.ECODE_INVAL)
7572

    
7573
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7574
      if errcode is not None:
7575
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7576
                                   errors.ECODE_INVAL)
7577

    
7578
      self.source_x509_ca = cert
7579

    
7580
      src_instance_name = self.op.source_instance_name
7581
      if not src_instance_name:
7582
        raise errors.OpPrereqError("Missing source instance name",
7583
                                   errors.ECODE_INVAL)
7584

    
7585
      self.source_instance_name = \
7586
          netutils.GetHostname(name=src_instance_name).name
7587

    
7588
    else:
7589
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
7590
                                 self.op.mode, errors.ECODE_INVAL)
7591

    
7592
  def ExpandNames(self):
7593
    """ExpandNames for CreateInstance.
7594

7595
    Figure out the right locks for instance creation.
7596

7597
    """
7598
    self.needed_locks = {}
7599

    
7600
    instance_name = self.op.instance_name
7601
    # this is just a preventive check, but someone might still add this
7602
    # instance in the meantime, and creation will fail at lock-add time
7603
    if instance_name in self.cfg.GetInstanceList():
7604
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7605
                                 instance_name, errors.ECODE_EXISTS)
7606

    
7607
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7608

    
7609
    if self.op.iallocator:
7610
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7611
    else:
7612
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7613
      nodelist = [self.op.pnode]
7614
      if self.op.snode is not None:
7615
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7616
        nodelist.append(self.op.snode)
7617
      self.needed_locks[locking.LEVEL_NODE] = nodelist
7618

    
7619
    # in case of import lock the source node too
7620
    if self.op.mode == constants.INSTANCE_IMPORT:
7621
      src_node = self.op.src_node
7622
      src_path = self.op.src_path
7623

    
7624
      if src_path is None:
7625
        self.op.src_path = src_path = self.op.instance_name
7626

    
7627
      if src_node is None:
7628
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7629
        self.op.src_node = None
7630
        if os.path.isabs(src_path):
7631
          raise errors.OpPrereqError("Importing an instance from an absolute"
7632
                                     " path requires a source node option.",
7633
                                     errors.ECODE_INVAL)
7634
      else:
7635
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7636
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7637
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
7638
        if not os.path.isabs(src_path):
7639
          self.op.src_path = src_path = \
7640
            utils.PathJoin(constants.EXPORT_DIR, src_path)
7641

    
7642
  def _RunAllocator(self):
7643
    """Run the allocator based on input opcode.
7644

7645
    """
7646
    nics = [n.ToDict() for n in self.nics]
7647
    ial = IAllocator(self.cfg, self.rpc,
7648
                     mode=constants.IALLOCATOR_MODE_ALLOC,
7649
                     name=self.op.instance_name,
7650
                     disk_template=self.op.disk_template,
7651
                     tags=[],
7652
                     os=self.op.os_type,
7653
                     vcpus=self.be_full[constants.BE_VCPUS],
7654
                     mem_size=self.be_full[constants.BE_MEMORY],
7655
                     disks=self.disks,
7656
                     nics=nics,
7657
                     hypervisor=self.op.hypervisor,
7658
                     )
7659

    
7660
    ial.Run(self.op.iallocator)
7661

    
7662
    if not ial.success:
7663
      raise errors.OpPrereqError("Can't compute nodes using"
7664
                                 " iallocator '%s': %s" %
7665
                                 (self.op.iallocator, ial.info),
7666
                                 errors.ECODE_NORES)
7667
    if len(ial.result) != ial.required_nodes:
7668
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7669
                                 " of nodes (%s), required %s" %
7670
                                 (self.op.iallocator, len(ial.result),
7671
                                  ial.required_nodes), errors.ECODE_FAULT)
7672
    self.op.pnode = ial.result[0]
7673
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7674
                 self.op.instance_name, self.op.iallocator,
7675
                 utils.CommaJoin(ial.result))
7676
    if ial.required_nodes == 2:
7677
      self.op.snode = ial.result[1]
7678

    
7679
  def BuildHooksEnv(self):
7680
    """Build hooks env.
7681

7682
    This runs on master, primary and secondary nodes of the instance.
7683

7684
    """
7685
    env = {
7686
      "ADD_MODE": self.op.mode,
7687
      }
7688
    if self.op.mode == constants.INSTANCE_IMPORT:
7689
      env["SRC_NODE"] = self.op.src_node
7690
      env["SRC_PATH"] = self.op.src_path
7691
      env["SRC_IMAGES"] = self.src_images
7692

    
7693
    env.update(_BuildInstanceHookEnv(
7694
      name=self.op.instance_name,
7695
      primary_node=self.op.pnode,
7696
      secondary_nodes=self.secondaries,
7697
      status=self.op.start,
7698
      os_type=self.op.os_type,
7699
      memory=self.be_full[constants.BE_MEMORY],
7700
      vcpus=self.be_full[constants.BE_VCPUS],
7701
      nics=_NICListToTuple(self, self.nics),
7702
      disk_template=self.op.disk_template,
7703
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
7704
             for d in self.disks],
7705
      bep=self.be_full,
7706
      hvp=self.hv_full,
7707
      hypervisor_name=self.op.hypervisor,
7708
    ))
7709

    
7710
    return env
7711

    
7712
  def BuildHooksNodes(self):
7713
    """Build hooks nodes.
7714

7715
    """
7716
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7717
    return nl, nl
7718

    
7719
  def _ReadExportInfo(self):
7720
    """Reads the export information from disk.
7721

7722
    It will override the opcode source node and path with the actual
7723
    information, if these two were not specified before.
7724

7725
    @return: the export information
7726

7727
    """
7728
    assert self.op.mode == constants.INSTANCE_IMPORT
7729

    
7730
    src_node = self.op.src_node
7731
    src_path = self.op.src_path
7732

    
7733
    if src_node is None:
7734
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7735
      exp_list = self.rpc.call_export_list(locked_nodes)
7736
      found = False
7737
      for node in exp_list:
7738
        if exp_list[node].fail_msg:
7739
          continue
7740
        if src_path in exp_list[node].payload:
7741
          found = True
7742
          self.op.src_node = src_node = node
7743
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7744
                                                       src_path)
7745
          break
7746
      if not found:
7747
        raise errors.OpPrereqError("No export found for relative path %s" %
7748
                                    src_path, errors.ECODE_INVAL)
7749

    
7750
    _CheckNodeOnline(self, src_node)
7751
    result = self.rpc.call_export_info(src_node, src_path)
7752
    result.Raise("No export or invalid export found in dir %s" % src_path)
7753

    
7754
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7755
    if not export_info.has_section(constants.INISECT_EXP):
7756
      raise errors.ProgrammerError("Corrupted export config",
7757
                                   errors.ECODE_ENVIRON)
7758

    
7759
    ei_version = export_info.get(constants.INISECT_EXP, "version")
7760
    if (int(ei_version) != constants.EXPORT_VERSION):
7761
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7762
                                 (ei_version, constants.EXPORT_VERSION),
7763
                                 errors.ECODE_ENVIRON)
7764
    return export_info
7765

    
7766
  def _ReadExportParams(self, einfo):
7767
    """Use export parameters as defaults.
7768

7769
    In case the opcode doesn't specify (as in override) some instance
7770
    parameters, then try to use them from the export information, if
7771
    that declares them.
7772

7773
    """
7774
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7775

    
7776
    if self.op.disk_template is None:
7777
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
7778
        self.op.disk_template = einfo.get(constants.INISECT_INS,
7779
                                          "disk_template")
7780
      else:
7781
        raise errors.OpPrereqError("No disk template specified and the export"
7782
                                   " is missing the disk_template information",
7783
                                   errors.ECODE_INVAL)
7784

    
7785
    if not self.op.disks:
7786
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7787
        disks = []
7788
        # TODO: import the disk iv_name too
7789
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7790
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7791
          disks.append({constants.IDISK_SIZE: disk_sz})
7792
        self.op.disks = disks
7793
      else:
7794
        raise errors.OpPrereqError("No disk info specified and the export"
7795
                                   " is missing the disk information",
7796
                                   errors.ECODE_INVAL)
7797

    
7798
    if (not self.op.nics and
7799
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7800
      nics = []
7801
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7802
        ndict = {}
7803
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7804
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7805
          ndict[name] = v
7806
        nics.append(ndict)
7807
      self.op.nics = nics
7808

    
7809
    if (self.op.hypervisor is None and
7810
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7811
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7812
    if einfo.has_section(constants.INISECT_HYP):
7813
      # use the export parameters but do not override the ones
7814
      # specified by the user
7815
      for name, value in einfo.items(constants.INISECT_HYP):
7816
        if name not in self.op.hvparams:
7817
          self.op.hvparams[name] = value
7818

    
7819
    if einfo.has_section(constants.INISECT_BEP):
7820
      # use the parameters, without overriding
7821
      for name, value in einfo.items(constants.INISECT_BEP):
7822
        if name not in self.op.beparams:
7823
          self.op.beparams[name] = value
7824
    else:
7825
      # try to read the parameters old style, from the main section
7826
      for name in constants.BES_PARAMETERS:
7827
        if (name not in self.op.beparams and
7828
            einfo.has_option(constants.INISECT_INS, name)):
7829
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7830

    
7831
    if einfo.has_section(constants.INISECT_OSP):
7832
      # use the parameters, without overriding
7833
      for name, value in einfo.items(constants.INISECT_OSP):
7834
        if name not in self.op.osparams:
7835
          self.op.osparams[name] = value
7836

    
7837
  def _RevertToDefaults(self, cluster):
7838
    """Revert the instance parameters to the default values.
7839

7840
    """
7841
    # hvparams
7842
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7843
    for name in self.op.hvparams.keys():
7844
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7845
        del self.op.hvparams[name]
7846
    # beparams
7847
    be_defs = cluster.SimpleFillBE({})
7848
    for name in self.op.beparams.keys():
7849
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7850
        del self.op.beparams[name]
7851
    # nic params
7852
    nic_defs = cluster.SimpleFillNIC({})
7853
    for nic in self.op.nics:
7854
      for name in constants.NICS_PARAMETERS:
7855
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7856
          del nic[name]
7857
    # osparams
7858
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7859
    for name in self.op.osparams.keys():
7860
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7861
        del self.op.osparams[name]
7862

    
7863
  def CheckPrereq(self):
7864
    """Check prerequisites.
7865

7866
    """
7867
    if self.op.mode == constants.INSTANCE_IMPORT:
7868
      export_info = self._ReadExportInfo()
7869
      self._ReadExportParams(export_info)
7870

    
7871
    if (not self.cfg.GetVGName() and
7872
        self.op.disk_template not in constants.DTS_NOT_LVM):
7873
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7874
                                 " instances", errors.ECODE_STATE)
7875

    
7876
    if self.op.hypervisor is None:
7877
      self.op.hypervisor = self.cfg.GetHypervisorType()
7878

    
7879
    cluster = self.cfg.GetClusterInfo()
7880
    enabled_hvs = cluster.enabled_hypervisors
7881
    if self.op.hypervisor not in enabled_hvs:
7882
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7883
                                 " cluster (%s)" % (self.op.hypervisor,
7884
                                  ",".join(enabled_hvs)),
7885
                                 errors.ECODE_STATE)
7886

    
7887
    # check hypervisor parameter syntax (locally)
7888
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7889
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7890
                                      self.op.hvparams)
7891
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7892
    hv_type.CheckParameterSyntax(filled_hvp)
7893
    self.hv_full = filled_hvp
7894
    # check that we don't specify global parameters on an instance
7895
    _CheckGlobalHvParams(self.op.hvparams)
7896

    
7897
    # fill and remember the beparams dict
7898
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7899
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7900

    
7901
    # build os parameters
7902
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7903

    
7904
    # now that hvp/bep are in final format, let's reset to defaults,
7905
    # if told to do so
7906
    if self.op.identify_defaults:
7907
      self._RevertToDefaults(cluster)
7908

    
7909
    # NIC buildup
7910
    self.nics = []
7911
    for idx, nic in enumerate(self.op.nics):
7912
      nic_mode_req = nic.get(constants.INIC_MODE, None)
7913
      nic_mode = nic_mode_req
7914
      if nic_mode is None:
7915
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7916

    
7917
      # in routed mode, for the first nic, the default ip is 'auto'
7918
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7919
        default_ip_mode = constants.VALUE_AUTO
7920
      else:
7921
        default_ip_mode = constants.VALUE_NONE
7922

    
7923
      # ip validity checks
7924
      ip = nic.get(constants.INIC_IP, default_ip_mode)
7925
      if ip is None or ip.lower() == constants.VALUE_NONE:
7926
        nic_ip = None
7927
      elif ip.lower() == constants.VALUE_AUTO:
7928
        if not self.op.name_check:
7929
          raise errors.OpPrereqError("IP address set to auto but name checks"
7930
                                     " have been skipped",
7931
                                     errors.ECODE_INVAL)
7932
        nic_ip = self.hostname1.ip
7933
      else:
7934
        if not netutils.IPAddress.IsValid(ip):
7935
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7936
                                     errors.ECODE_INVAL)
7937
        nic_ip = ip
7938

    
7939
      # TODO: check the ip address for uniqueness
7940
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7941
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7942
                                   errors.ECODE_INVAL)
7943

    
7944
      # MAC address verification
7945
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
7946
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7947
        mac = utils.NormalizeAndValidateMac(mac)
7948

    
7949
        try:
7950
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7951
        except errors.ReservationError:
7952
          raise errors.OpPrereqError("MAC address %s already in use"
7953
                                     " in cluster" % mac,
7954
                                     errors.ECODE_NOTUNIQUE)
7955

    
7956
      #  Build nic parameters
7957
      link = nic.get(constants.INIC_LINK, None)
7958
      nicparams = {}
7959
      if nic_mode_req:
7960
        nicparams[constants.NIC_MODE] = nic_mode_req
7961
      if link:
7962
        nicparams[constants.NIC_LINK] = link
7963

    
7964
      check_params = cluster.SimpleFillNIC(nicparams)
7965
      objects.NIC.CheckParameterSyntax(check_params)
7966
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7967

    
7968
    # disk checks/pre-build
7969
    default_vg = self.cfg.GetVGName()
7970
    self.disks = []
7971
    for disk in self.op.disks:
7972
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
7973
      if mode not in constants.DISK_ACCESS_SET:
7974
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7975
                                   mode, errors.ECODE_INVAL)
7976
      size = disk.get(constants.IDISK_SIZE, None)
7977
      if size is None:
7978
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7979
      try:
7980
        size = int(size)
7981
      except (TypeError, ValueError):
7982
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7983
                                   errors.ECODE_INVAL)
7984
      new_disk = {
7985
        constants.IDISK_SIZE: size,
7986
        constants.IDISK_MODE: mode,
7987
        constants.IDISK_VG: disk.get(constants.IDISK_VG, default_vg),
7988
        }
7989
      if constants.IDISK_ADOPT in disk:
7990
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
7991
      self.disks.append(new_disk)
7992

    
7993
    if self.op.mode == constants.INSTANCE_IMPORT:
7994

    
7995
      # Check that the new instance doesn't have less disks than the export
7996
      instance_disks = len(self.disks)
7997
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7998
      if instance_disks < export_disks:
7999
        raise errors.OpPrereqError("Not enough disks to import."
8000
                                   " (instance: %d, export: %d)" %
8001
                                   (instance_disks, export_disks),
8002
                                   errors.ECODE_INVAL)
8003

    
8004
      disk_images = []
8005
      for idx in range(export_disks):
8006
        option = 'disk%d_dump' % idx
8007
        if export_info.has_option(constants.INISECT_INS, option):
8008
          # FIXME: are the old os-es, disk sizes, etc. useful?
8009
          export_name = export_info.get(constants.INISECT_INS, option)
8010
          image = utils.PathJoin(self.op.src_path, export_name)
8011
          disk_images.append(image)
8012
        else:
8013
          disk_images.append(False)
8014

    
8015
      self.src_images = disk_images
8016

    
8017
      old_name = export_info.get(constants.INISECT_INS, 'name')
8018
      try:
8019
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8020
      except (TypeError, ValueError), err:
8021
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8022
                                   " an integer: %s" % str(err),
8023
                                   errors.ECODE_STATE)
8024
      if self.op.instance_name == old_name:
8025
        for idx, nic in enumerate(self.nics):
8026
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8027
            nic_mac_ini = 'nic%d_mac' % idx
8028
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8029

    
8030
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8031

    
8032
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8033
    if self.op.ip_check:
8034
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8035
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8036
                                   (self.check_ip, self.op.instance_name),
8037
                                   errors.ECODE_NOTUNIQUE)
8038

    
8039
    #### mac address generation
8040
    # By generating here the mac address both the allocator and the hooks get
8041
    # the real final mac address rather than the 'auto' or 'generate' value.
8042
    # There is a race condition between the generation and the instance object
8043
    # creation, which means that we know the mac is valid now, but we're not
8044
    # sure it will be when we actually add the instance. If things go bad
8045
    # adding the instance will abort because of a duplicate mac, and the
8046
    # creation job will fail.
8047
    for nic in self.nics:
8048
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8049
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8050

    
8051
    #### allocator run
8052

    
8053
    if self.op.iallocator is not None:
8054
      self._RunAllocator()
8055

    
8056
    #### node related checks
8057

    
8058
    # check primary node
8059
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8060
    assert self.pnode is not None, \
8061
      "Cannot retrieve locked node %s" % self.op.pnode
8062
    if pnode.offline:
8063
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8064
                                 pnode.name, errors.ECODE_STATE)
8065
    if pnode.drained:
8066
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8067
                                 pnode.name, errors.ECODE_STATE)
8068
    if not pnode.vm_capable:
8069
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8070
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8071

    
8072
    self.secondaries = []
8073

    
8074
    # mirror node verification
8075
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8076
      if self.op.snode == pnode.name:
8077
        raise errors.OpPrereqError("The secondary node cannot be the"
8078
                                   " primary node.", errors.ECODE_INVAL)
8079
      _CheckNodeOnline(self, self.op.snode)
8080
      _CheckNodeNotDrained(self, self.op.snode)
8081
      _CheckNodeVmCapable(self, self.op.snode)
8082
      self.secondaries.append(self.op.snode)
8083

    
8084
    nodenames = [pnode.name] + self.secondaries
8085

    
8086
    if not self.adopt_disks:
8087
      # Check lv size requirements, if not adopting
8088
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8089
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8090

    
8091
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8092
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8093
                                disk[constants.IDISK_ADOPT])
8094
                     for disk in self.disks])
8095
      if len(all_lvs) != len(self.disks):
8096
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8097
                                   errors.ECODE_INVAL)
8098
      for lv_name in all_lvs:
8099
        try:
8100
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8101
          # to ReserveLV uses the same syntax
8102
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8103
        except errors.ReservationError:
8104
          raise errors.OpPrereqError("LV named %s used by another instance" %
8105
                                     lv_name, errors.ECODE_NOTUNIQUE)
8106

    
8107
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8108
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8109

    
8110
      node_lvs = self.rpc.call_lv_list([pnode.name],
8111
                                       vg_names.payload.keys())[pnode.name]
8112
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8113
      node_lvs = node_lvs.payload
8114

    
8115
      delta = all_lvs.difference(node_lvs.keys())
8116
      if delta:
8117
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8118
                                   utils.CommaJoin(delta),
8119
                                   errors.ECODE_INVAL)
8120
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8121
      if online_lvs:
8122
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8123
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8124
                                   errors.ECODE_STATE)
8125
      # update the size of disk based on what is found
8126
      for dsk in self.disks:
8127
        dsk[constants.IDISK_SIZE] = \
8128
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8129
                                        dsk[constants.IDISK_ADOPT])][0]))
8130

    
8131
    elif self.op.disk_template == constants.DT_BLOCK:
8132
      # Normalize and de-duplicate device paths
8133
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8134
                       for disk in self.disks])
8135
      if len(all_disks) != len(self.disks):
8136
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8137
                                   errors.ECODE_INVAL)
8138
      baddisks = [d for d in all_disks
8139
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8140
      if baddisks:
8141
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8142
                                   " cannot be adopted" %
8143
                                   (", ".join(baddisks),
8144
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8145
                                   errors.ECODE_INVAL)
8146

    
8147
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8148
                                            list(all_disks))[pnode.name]
8149
      node_disks.Raise("Cannot get block device information from node %s" %
8150
                       pnode.name)
8151
      node_disks = node_disks.payload
8152
      delta = all_disks.difference(node_disks.keys())
8153
      if delta:
8154
        raise errors.OpPrereqError("Missing block device(s): %s" %
8155
                                   utils.CommaJoin(delta),
8156
                                   errors.ECODE_INVAL)
8157
      for dsk in self.disks:
8158
        dsk[constants.IDISK_SIZE] = \
8159
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8160

    
8161
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8162

    
8163
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8164
    # check OS parameters (remotely)
8165
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8166

    
8167
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8168

    
8169
    # memory check on primary node
8170
    if self.op.start:
8171
      _CheckNodeFreeMemory(self, self.pnode.name,
8172
                           "creating instance %s" % self.op.instance_name,
8173
                           self.be_full[constants.BE_MEMORY],
8174
                           self.op.hypervisor)
8175

    
8176
    self.dry_run_result = list(nodenames)
8177

    
8178
  def Exec(self, feedback_fn):
8179
    """Create and add the instance to the cluster.
8180

8181
    """
8182
    instance = self.op.instance_name
8183
    pnode_name = self.pnode.name
8184

    
8185
    ht_kind = self.op.hypervisor
8186
    if ht_kind in constants.HTS_REQ_PORT:
8187
      network_port = self.cfg.AllocatePort()
8188
    else:
8189
      network_port = None
8190

    
8191
    if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8192
      # this is needed because os.path.join does not accept None arguments
8193
      if self.op.file_storage_dir is None:
8194
        string_file_storage_dir = ""
8195
      else:
8196
        string_file_storage_dir = self.op.file_storage_dir
8197

    
8198
      # build the full file storage dir path
8199
      if self.op.disk_template == constants.DT_SHARED_FILE:
8200
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8201
      else:
8202
        get_fsd_fn = self.cfg.GetFileStorageDir
8203

    
8204
      file_storage_dir = utils.PathJoin(get_fsd_fn(),
8205
                                        string_file_storage_dir, instance)
8206
    else:
8207
      file_storage_dir = ""
8208

    
8209
    disks = _GenerateDiskTemplate(self,
8210
                                  self.op.disk_template,
8211
                                  instance, pnode_name,
8212
                                  self.secondaries,
8213
                                  self.disks,
8214
                                  file_storage_dir,
8215
                                  self.op.file_driver,
8216
                                  0,
8217
                                  feedback_fn)
8218

    
8219
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8220
                            primary_node=pnode_name,
8221
                            nics=self.nics, disks=disks,
8222
                            disk_template=self.op.disk_template,
8223
                            admin_up=False,
8224
                            network_port=network_port,
8225
                            beparams=self.op.beparams,
8226
                            hvparams=self.op.hvparams,
8227
                            hypervisor=self.op.hypervisor,
8228
                            osparams=self.op.osparams,
8229
                            )
8230

    
8231
    if self.adopt_disks:
8232
      if self.op.disk_template == constants.DT_PLAIN:
8233
        # rename LVs to the newly-generated names; we need to construct
8234
        # 'fake' LV disks with the old data, plus the new unique_id
8235
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8236
        rename_to = []
8237
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8238
          rename_to.append(t_dsk.logical_id)
8239
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8240
          self.cfg.SetDiskID(t_dsk, pnode_name)
8241
        result = self.rpc.call_blockdev_rename(pnode_name,
8242
                                               zip(tmp_disks, rename_to))
8243
        result.Raise("Failed to rename adoped LVs")
8244
    else:
8245
      feedback_fn("* creating instance disks...")
8246
      try:
8247
        _CreateDisks(self, iobj)
8248
      except errors.OpExecError:
8249
        self.LogWarning("Device creation failed, reverting...")
8250
        try:
8251
          _RemoveDisks(self, iobj)
8252
        finally:
8253
          self.cfg.ReleaseDRBDMinors(instance)
8254
          raise
8255

    
8256
      if self.cfg.GetClusterInfo().prealloc_wipe_disks:
8257
        feedback_fn("* wiping instance disks...")
8258
        try:
8259
          _WipeDisks(self, iobj)
8260
        except errors.OpExecError:
8261
          self.LogWarning("Device wiping failed, reverting...")
8262
          try:
8263
            _RemoveDisks(self, iobj)
8264
          finally:
8265
            self.cfg.ReleaseDRBDMinors(instance)
8266
            raise
8267

    
8268
    feedback_fn("adding instance %s to cluster config" % instance)
8269

    
8270
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8271

    
8272
    # Declare that we don't want to remove the instance lock anymore, as we've
8273
    # added the instance to the config
8274
    del self.remove_locks[locking.LEVEL_INSTANCE]
8275
    # Unlock all the nodes
8276
    if self.op.mode == constants.INSTANCE_IMPORT:
8277
      nodes_keep = [self.op.src_node]
8278
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
8279
                       if node != self.op.src_node]
8280
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
8281
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
8282
    else:
8283
      self.context.glm.release(locking.LEVEL_NODE)
8284
      del self.acquired_locks[locking.LEVEL_NODE]
8285

    
8286
    if self.op.wait_for_sync:
8287
      disk_abort = not _WaitForSync(self, iobj)
8288
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8289
      # make sure the disks are not degraded (still sync-ing is ok)
8290
      time.sleep(15)
8291
      feedback_fn("* checking mirrors status")
8292
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8293
    else:
8294
      disk_abort = False
8295

    
8296
    if disk_abort:
8297
      _RemoveDisks(self, iobj)
8298
      self.cfg.RemoveInstance(iobj.name)
8299
      # Make sure the instance lock gets removed
8300
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8301
      raise errors.OpExecError("There are some degraded disks for"
8302
                               " this instance")
8303

    
8304
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8305
      if self.op.mode == constants.INSTANCE_CREATE:
8306
        if not self.op.no_install:
8307
          feedback_fn("* running the instance OS create scripts...")
8308
          # FIXME: pass debug option from opcode to backend
8309
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8310
                                                 self.op.debug_level)
8311
          result.Raise("Could not add os for instance %s"
8312
                       " on node %s" % (instance, pnode_name))
8313

    
8314
      elif self.op.mode == constants.INSTANCE_IMPORT:
8315
        feedback_fn("* running the instance OS import scripts...")
8316

    
8317
        transfers = []
8318

    
8319
        for idx, image in enumerate(self.src_images):
8320
          if not image:
8321
            continue
8322

    
8323
          # FIXME: pass debug option from opcode to backend
8324
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8325
                                             constants.IEIO_FILE, (image, ),
8326
                                             constants.IEIO_SCRIPT,
8327
                                             (iobj.disks[idx], idx),
8328
                                             None)
8329
          transfers.append(dt)
8330

    
8331
        import_result = \
8332
          masterd.instance.TransferInstanceData(self, feedback_fn,
8333
                                                self.op.src_node, pnode_name,
8334
                                                self.pnode.secondary_ip,
8335
                                                iobj, transfers)
8336
        if not compat.all(import_result):
8337
          self.LogWarning("Some disks for instance %s on node %s were not"
8338
                          " imported successfully" % (instance, pnode_name))
8339

    
8340
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8341
        feedback_fn("* preparing remote import...")
8342
        # The source cluster will stop the instance before attempting to make a
8343
        # connection. In some cases stopping an instance can take a long time,
8344
        # hence the shutdown timeout is added to the connection timeout.
8345
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8346
                           self.op.source_shutdown_timeout)
8347
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8348

    
8349
        assert iobj.primary_node == self.pnode.name
8350
        disk_results = \
8351
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8352
                                        self.source_x509_ca,
8353
                                        self._cds, timeouts)
8354
        if not compat.all(disk_results):
8355
          # TODO: Should the instance still be started, even if some disks
8356
          # failed to import (valid for local imports, too)?
8357
          self.LogWarning("Some disks for instance %s on node %s were not"
8358
                          " imported successfully" % (instance, pnode_name))
8359

    
8360
        # Run rename script on newly imported instance
8361
        assert iobj.name == instance
8362
        feedback_fn("Running rename script for %s" % instance)
8363
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8364
                                                   self.source_instance_name,
8365
                                                   self.op.debug_level)
8366
        if result.fail_msg:
8367
          self.LogWarning("Failed to run rename script for %s on node"
8368
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
8369

    
8370
      else:
8371
        # also checked in the prereq part
8372
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8373
                                     % self.op.mode)
8374

    
8375
    if self.op.start:
8376
      iobj.admin_up = True
8377
      self.cfg.Update(iobj, feedback_fn)
8378
      logging.info("Starting instance %s on node %s", instance, pnode_name)
8379
      feedback_fn("* starting instance...")
8380
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8381
      result.Raise("Could not start instance")
8382

    
8383
    return list(iobj.all_nodes)
8384

    
8385

    
8386
class LUInstanceConsole(NoHooksLU):
8387
  """Connect to an instance's console.
8388

8389
  This is somewhat special in that it returns the command line that
8390
  you need to run on the master node in order to connect to the
8391
  console.
8392

8393
  """
8394
  REQ_BGL = False
8395

    
8396
  def ExpandNames(self):
8397
    self._ExpandAndLockInstance()
8398

    
8399
  def CheckPrereq(self):
8400
    """Check prerequisites.
8401

8402
    This checks that the instance is in the cluster.
8403

8404
    """
8405
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8406
    assert self.instance is not None, \
8407
      "Cannot retrieve locked instance %s" % self.op.instance_name
8408
    _CheckNodeOnline(self, self.instance.primary_node)
8409

    
8410
  def Exec(self, feedback_fn):
8411
    """Connect to the console of an instance
8412

8413
    """
8414
    instance = self.instance
8415
    node = instance.primary_node
8416

    
8417
    node_insts = self.rpc.call_instance_list([node],
8418
                                             [instance.hypervisor])[node]
8419
    node_insts.Raise("Can't get node information from %s" % node)
8420

    
8421
    if instance.name not in node_insts.payload:
8422
      if instance.admin_up:
8423
        state = constants.INSTST_ERRORDOWN
8424
      else:
8425
        state = constants.INSTST_ADMINDOWN
8426
      raise errors.OpExecError("Instance %s is not running (state %s)" %
8427
                               (instance.name, state))
8428

    
8429
    logging.debug("Connecting to console of %s on %s", instance.name, node)
8430

    
8431
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8432

    
8433

    
8434
def _GetInstanceConsole(cluster, instance):
8435
  """Returns console information for an instance.
8436

8437
  @type cluster: L{objects.Cluster}
8438
  @type instance: L{objects.Instance}
8439
  @rtype: dict
8440

8441
  """
8442
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
8443
  # beparams and hvparams are passed separately, to avoid editing the
8444
  # instance and then saving the defaults in the instance itself.
8445
  hvparams = cluster.FillHV(instance)
8446
  beparams = cluster.FillBE(instance)
8447
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8448

    
8449
  assert console.instance == instance.name
8450
  assert console.Validate()
8451

    
8452
  return console.ToDict()
8453

    
8454

    
8455
class LUInstanceReplaceDisks(LogicalUnit):
8456
  """Replace the disks of an instance.
8457

8458
  """
8459
  HPATH = "mirrors-replace"
8460
  HTYPE = constants.HTYPE_INSTANCE
8461
  REQ_BGL = False
8462

    
8463
  def CheckArguments(self):
8464
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8465
                                  self.op.iallocator)
8466

    
8467
  def ExpandNames(self):
8468
    self._ExpandAndLockInstance()
8469

    
8470
    if self.op.iallocator is not None:
8471
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8472

    
8473
    elif self.op.remote_node is not None:
8474
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8475
      self.op.remote_node = remote_node
8476

    
8477
      # Warning: do not remove the locking of the new secondary here
8478
      # unless DRBD8.AddChildren is changed to work in parallel;
8479
      # currently it doesn't since parallel invocations of
8480
      # FindUnusedMinor will conflict
8481
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8482
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8483

    
8484
    else:
8485
      self.needed_locks[locking.LEVEL_NODE] = []
8486
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8487

    
8488
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8489
                                   self.op.iallocator, self.op.remote_node,
8490
                                   self.op.disks, False, self.op.early_release)
8491

    
8492
    self.tasklets = [self.replacer]
8493

    
8494
  def DeclareLocks(self, level):
8495
    # If we're not already locking all nodes in the set we have to declare the
8496
    # instance's primary/secondary nodes.
8497
    if (level == locking.LEVEL_NODE and
8498
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8499
      self._LockInstancesNodes()
8500

    
8501
  def BuildHooksEnv(self):
8502
    """Build hooks env.
8503

8504
    This runs on the master, the primary and all the secondaries.
8505

8506
    """
8507
    instance = self.replacer.instance
8508
    env = {
8509
      "MODE": self.op.mode,
8510
      "NEW_SECONDARY": self.op.remote_node,
8511
      "OLD_SECONDARY": instance.secondary_nodes[0],
8512
      }
8513
    env.update(_BuildInstanceHookEnvByObject(self, instance))
8514
    return env
8515

    
8516
  def BuildHooksNodes(self):
8517
    """Build hooks nodes.
8518

8519
    """
8520
    instance = self.replacer.instance
8521
    nl = [
8522
      self.cfg.GetMasterNode(),
8523
      instance.primary_node,
8524
      ]
8525
    if self.op.remote_node is not None:
8526
      nl.append(self.op.remote_node)
8527
    return nl, nl
8528

    
8529

    
8530
class TLReplaceDisks(Tasklet):
8531
  """Replaces disks for an instance.
8532

8533
  Note: Locking is not within the scope of this class.
8534

8535
  """
8536
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8537
               disks, delay_iallocator, early_release):
8538
    """Initializes this class.
8539

8540
    """
8541
    Tasklet.__init__(self, lu)
8542

    
8543
    # Parameters
8544
    self.instance_name = instance_name
8545
    self.mode = mode
8546
    self.iallocator_name = iallocator_name
8547
    self.remote_node = remote_node
8548
    self.disks = disks
8549
    self.delay_iallocator = delay_iallocator
8550
    self.early_release = early_release
8551

    
8552
    # Runtime data
8553
    self.instance = None
8554
    self.new_node = None
8555
    self.target_node = None
8556
    self.other_node = None
8557
    self.remote_node_info = None
8558
    self.node_secondary_ip = None
8559

    
8560
  @staticmethod
8561
  def CheckArguments(mode, remote_node, iallocator):
8562
    """Helper function for users of this class.
8563

8564
    """
8565
    # check for valid parameter combination
8566
    if mode == constants.REPLACE_DISK_CHG:
8567
      if remote_node is None and iallocator is None:
8568
        raise errors.OpPrereqError("When changing the secondary either an"
8569
                                   " iallocator script must be used or the"
8570
                                   " new node given", errors.ECODE_INVAL)
8571

    
8572
      if remote_node is not None and iallocator is not None:
8573
        raise errors.OpPrereqError("Give either the iallocator or the new"
8574
                                   " secondary, not both", errors.ECODE_INVAL)
8575

    
8576
    elif remote_node is not None or iallocator is not None:
8577
      # Not replacing the secondary
8578
      raise errors.OpPrereqError("The iallocator and new node options can"
8579
                                 " only be used when changing the"
8580
                                 " secondary node", errors.ECODE_INVAL)
8581

    
8582
  @staticmethod
8583
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8584
    """Compute a new secondary node using an IAllocator.
8585

8586
    """
8587
    ial = IAllocator(lu.cfg, lu.rpc,
8588
                     mode=constants.IALLOCATOR_MODE_RELOC,
8589
                     name=instance_name,
8590
                     relocate_from=relocate_from)
8591

    
8592
    ial.Run(iallocator_name)
8593

    
8594
    if not ial.success:
8595
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8596
                                 " %s" % (iallocator_name, ial.info),
8597
                                 errors.ECODE_NORES)
8598

    
8599
    if len(ial.result) != ial.required_nodes:
8600
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8601
                                 " of nodes (%s), required %s" %
8602
                                 (iallocator_name,
8603
                                  len(ial.result), ial.required_nodes),
8604
                                 errors.ECODE_FAULT)
8605

    
8606
    remote_node_name = ial.result[0]
8607

    
8608
    lu.LogInfo("Selected new secondary for instance '%s': %s",
8609
               instance_name, remote_node_name)
8610

    
8611
    return remote_node_name
8612

    
8613
  def _FindFaultyDisks(self, node_name):
8614
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8615
                                    node_name, True)
8616

    
8617
  def _CheckDisksActivated(self, instance):
8618
    """Checks if the instance disks are activated.
8619

8620
    @param instance: The instance to check disks
8621
    @return: True if they are activated, False otherwise
8622

8623
    """
8624
    nodes = instance.all_nodes
8625

    
8626
    for idx, dev in enumerate(instance.disks):
8627
      for node in nodes:
8628
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8629
        self.cfg.SetDiskID(dev, node)
8630

    
8631
        result = self.rpc.call_blockdev_find(node, dev)
8632

    
8633
        if result.offline:
8634
          continue
8635
        elif result.fail_msg or not result.payload:
8636
          return False
8637

    
8638
    return True
8639

    
8640

    
8641
  def CheckPrereq(self):
8642
    """Check prerequisites.
8643

8644
    This checks that the instance is in the cluster.
8645

8646
    """
8647
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8648
    assert instance is not None, \
8649
      "Cannot retrieve locked instance %s" % self.instance_name
8650

    
8651
    if instance.disk_template != constants.DT_DRBD8:
8652
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8653
                                 " instances", errors.ECODE_INVAL)
8654

    
8655
    if len(instance.secondary_nodes) != 1:
8656
      raise errors.OpPrereqError("The instance has a strange layout,"
8657
                                 " expected one secondary but found %d" %
8658
                                 len(instance.secondary_nodes),
8659
                                 errors.ECODE_FAULT)
8660

    
8661
    if not self.delay_iallocator:
8662
      self._CheckPrereq2()
8663

    
8664
  def _CheckPrereq2(self):
8665
    """Check prerequisites, second part.
8666

8667
    This function should always be part of CheckPrereq. It was separated and is
8668
    now called from Exec because during node evacuation iallocator was only
8669
    called with an unmodified cluster model, not taking planned changes into
8670
    account.
8671

8672
    """
8673
    instance = self.instance
8674
    secondary_node = instance.secondary_nodes[0]
8675

    
8676
    if self.iallocator_name is None:
8677
      remote_node = self.remote_node
8678
    else:
8679
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8680
                                       instance.name, instance.secondary_nodes)
8681

    
8682
    if remote_node is not None:
8683
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8684
      assert self.remote_node_info is not None, \
8685
        "Cannot retrieve locked node %s" % remote_node
8686
    else:
8687
      self.remote_node_info = None
8688

    
8689
    if remote_node == self.instance.primary_node:
8690
      raise errors.OpPrereqError("The specified node is the primary node of"
8691
                                 " the instance.", errors.ECODE_INVAL)
8692

    
8693
    if remote_node == secondary_node:
8694
      raise errors.OpPrereqError("The specified node is already the"
8695
                                 " secondary node of the instance.",
8696
                                 errors.ECODE_INVAL)
8697

    
8698
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8699
                                    constants.REPLACE_DISK_CHG):
8700
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
8701
                                 errors.ECODE_INVAL)
8702

    
8703
    if self.mode == constants.REPLACE_DISK_AUTO:
8704
      if not self._CheckDisksActivated(instance):
8705
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
8706
                                   " first" % self.instance_name,
8707
                                   errors.ECODE_STATE)
8708
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
8709
      faulty_secondary = self._FindFaultyDisks(secondary_node)
8710

    
8711
      if faulty_primary and faulty_secondary:
8712
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8713
                                   " one node and can not be repaired"
8714
                                   " automatically" % self.instance_name,
8715
                                   errors.ECODE_STATE)
8716

    
8717
      if faulty_primary:
8718
        self.disks = faulty_primary
8719
        self.target_node = instance.primary_node
8720
        self.other_node = secondary_node
8721
        check_nodes = [self.target_node, self.other_node]
8722
      elif faulty_secondary:
8723
        self.disks = faulty_secondary
8724
        self.target_node = secondary_node
8725
        self.other_node = instance.primary_node
8726
        check_nodes = [self.target_node, self.other_node]
8727
      else:
8728
        self.disks = []
8729
        check_nodes = []
8730

    
8731
    else:
8732
      # Non-automatic modes
8733
      if self.mode == constants.REPLACE_DISK_PRI:
8734
        self.target_node = instance.primary_node
8735
        self.other_node = secondary_node
8736
        check_nodes = [self.target_node, self.other_node]
8737

    
8738
      elif self.mode == constants.REPLACE_DISK_SEC:
8739
        self.target_node = secondary_node
8740
        self.other_node = instance.primary_node
8741
        check_nodes = [self.target_node, self.other_node]
8742

    
8743
      elif self.mode == constants.REPLACE_DISK_CHG:
8744
        self.new_node = remote_node
8745
        self.other_node = instance.primary_node
8746
        self.target_node = secondary_node
8747
        check_nodes = [self.new_node, self.other_node]
8748

    
8749
        _CheckNodeNotDrained(self.lu, remote_node)
8750
        _CheckNodeVmCapable(self.lu, remote_node)
8751

    
8752
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
8753
        assert old_node_info is not None
8754
        if old_node_info.offline and not self.early_release:
8755
          # doesn't make sense to delay the release
8756
          self.early_release = True
8757
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8758
                          " early-release mode", secondary_node)
8759

    
8760
      else:
8761
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8762
                                     self.mode)
8763

    
8764
      # If not specified all disks should be replaced
8765
      if not self.disks:
8766
        self.disks = range(len(self.instance.disks))
8767

    
8768
    for node in check_nodes:
8769
      _CheckNodeOnline(self.lu, node)
8770

    
8771
    # Check whether disks are valid
8772
    for disk_idx in self.disks:
8773
      instance.FindDisk(disk_idx)
8774

    
8775
    # Get secondary node IP addresses
8776
    node_2nd_ip = {}
8777

    
8778
    for node_name in [self.target_node, self.other_node, self.new_node]:
8779
      if node_name is not None:
8780
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8781

    
8782
    self.node_secondary_ip = node_2nd_ip
8783

    
8784
  def Exec(self, feedback_fn):
8785
    """Execute disk replacement.
8786

8787
    This dispatches the disk replacement to the appropriate handler.
8788

8789
    """
8790
    if self.delay_iallocator:
8791
      self._CheckPrereq2()
8792

    
8793
    if not self.disks:
8794
      feedback_fn("No disks need replacement")
8795
      return
8796

    
8797
    feedback_fn("Replacing disk(s) %s for %s" %
8798
                (utils.CommaJoin(self.disks), self.instance.name))
8799

    
8800
    activate_disks = (not self.instance.admin_up)
8801

    
8802
    # Activate the instance disks if we're replacing them on a down instance
8803
    if activate_disks:
8804
      _StartInstanceDisks(self.lu, self.instance, True)
8805

    
8806
    try:
8807
      # Should we replace the secondary node?
8808
      if self.new_node is not None:
8809
        fn = self._ExecDrbd8Secondary
8810
      else:
8811
        fn = self._ExecDrbd8DiskOnly
8812

    
8813
      return fn(feedback_fn)
8814

    
8815
    finally:
8816
      # Deactivate the instance disks if we're replacing them on a
8817
      # down instance
8818
      if activate_disks:
8819
        _SafeShutdownInstanceDisks(self.lu, self.instance)
8820

    
8821
  def _CheckVolumeGroup(self, nodes):
8822
    self.lu.LogInfo("Checking volume groups")
8823

    
8824
    vgname = self.cfg.GetVGName()
8825

    
8826
    # Make sure volume group exists on all involved nodes
8827
    results = self.rpc.call_vg_list(nodes)
8828
    if not results:
8829
      raise errors.OpExecError("Can't list volume groups on the nodes")
8830

    
8831
    for node in nodes:
8832
      res = results[node]
8833
      res.Raise("Error checking node %s" % node)
8834
      if vgname not in res.payload:
8835
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
8836
                                 (vgname, node))
8837

    
8838
  def _CheckDisksExistence(self, nodes):
8839
    # Check disk existence
8840
    for idx, dev in enumerate(self.instance.disks):
8841
      if idx not in self.disks:
8842
        continue
8843

    
8844
      for node in nodes:
8845
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8846
        self.cfg.SetDiskID(dev, node)
8847

    
8848
        result = self.rpc.call_blockdev_find(node, dev)
8849

    
8850
        msg = result.fail_msg
8851
        if msg or not result.payload:
8852
          if not msg:
8853
            msg = "disk not found"
8854
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8855
                                   (idx, node, msg))
8856

    
8857
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8858
    for idx, dev in enumerate(self.instance.disks):
8859
      if idx not in self.disks:
8860
        continue
8861

    
8862
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8863
                      (idx, node_name))
8864

    
8865
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8866
                                   ldisk=ldisk):
8867
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8868
                                 " replace disks for instance %s" %
8869
                                 (node_name, self.instance.name))
8870

    
8871
  def _CreateNewStorage(self, node_name):
8872
    vgname = self.cfg.GetVGName()
8873
    iv_names = {}
8874

    
8875
    for idx, dev in enumerate(self.instance.disks):
8876
      if idx not in self.disks:
8877
        continue
8878

    
8879
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8880

    
8881
      self.cfg.SetDiskID(dev, node_name)
8882

    
8883
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8884
      names = _GenerateUniqueNames(self.lu, lv_names)
8885

    
8886
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8887
                             logical_id=(vgname, names[0]))
8888
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8889
                             logical_id=(vgname, names[1]))
8890

    
8891
      new_lvs = [lv_data, lv_meta]
8892
      old_lvs = dev.children
8893
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8894

    
8895
      # we pass force_create=True to force the LVM creation
8896
      for new_lv in new_lvs:
8897
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8898
                        _GetInstanceInfoText(self.instance), False)
8899

    
8900
    return iv_names
8901

    
8902
  def _CheckDevices(self, node_name, iv_names):
8903
    for name, (dev, _, _) in iv_names.iteritems():
8904
      self.cfg.SetDiskID(dev, node_name)
8905

    
8906
      result = self.rpc.call_blockdev_find(node_name, dev)
8907

    
8908
      msg = result.fail_msg
8909
      if msg or not result.payload:
8910
        if not msg:
8911
          msg = "disk not found"
8912
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
8913
                                 (name, msg))
8914

    
8915
      if result.payload.is_degraded:
8916
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
8917

    
8918
  def _RemoveOldStorage(self, node_name, iv_names):
8919
    for name, (_, old_lvs, _) in iv_names.iteritems():
8920
      self.lu.LogInfo("Remove logical volumes for %s" % name)
8921

    
8922
      for lv in old_lvs:
8923
        self.cfg.SetDiskID(lv, node_name)
8924

    
8925
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8926
        if msg:
8927
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
8928
                             hint="remove unused LVs manually")
8929

    
8930
  def _ReleaseNodeLock(self, node_name):
8931
    """Releases the lock for a given node."""
8932
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8933

    
8934
  def _ExecDrbd8DiskOnly(self, feedback_fn):
8935
    """Replace a disk on the primary or secondary for DRBD 8.
8936

8937
    The algorithm for replace is quite complicated:
8938

8939
      1. for each disk to be replaced:
8940

8941
        1. create new LVs on the target node with unique names
8942
        1. detach old LVs from the drbd device
8943
        1. rename old LVs to name_replaced.<time_t>
8944
        1. rename new LVs to old LVs
8945
        1. attach the new LVs (with the old names now) to the drbd device
8946

8947
      1. wait for sync across all devices
8948

8949
      1. for each modified disk:
8950

8951
        1. remove old LVs (which have the name name_replaces.<time_t>)
8952

8953
    Failures are not very well handled.
8954

8955
    """
8956
    steps_total = 6
8957

    
8958
    # Step: check device activation
8959
    self.lu.LogStep(1, steps_total, "Check device existence")
8960
    self._CheckDisksExistence([self.other_node, self.target_node])
8961
    self._CheckVolumeGroup([self.target_node, self.other_node])
8962

    
8963
    # Step: check other node consistency
8964
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8965
    self._CheckDisksConsistency(self.other_node,
8966
                                self.other_node == self.instance.primary_node,
8967
                                False)
8968

    
8969
    # Step: create new storage
8970
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8971
    iv_names = self._CreateNewStorage(self.target_node)
8972

    
8973
    # Step: for each lv, detach+rename*2+attach
8974
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8975
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8976
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8977

    
8978
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8979
                                                     old_lvs)
8980
      result.Raise("Can't detach drbd from local storage on node"
8981
                   " %s for device %s" % (self.target_node, dev.iv_name))
8982
      #dev.children = []
8983
      #cfg.Update(instance)
8984

    
8985
      # ok, we created the new LVs, so now we know we have the needed
8986
      # storage; as such, we proceed on the target node to rename
8987
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8988
      # using the assumption that logical_id == physical_id (which in
8989
      # turn is the unique_id on that node)
8990

    
8991
      # FIXME(iustin): use a better name for the replaced LVs
8992
      temp_suffix = int(time.time())
8993
      ren_fn = lambda d, suff: (d.physical_id[0],
8994
                                d.physical_id[1] + "_replaced-%s" % suff)
8995

    
8996
      # Build the rename list based on what LVs exist on the node
8997
      rename_old_to_new = []
8998
      for to_ren in old_lvs:
8999
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9000
        if not result.fail_msg and result.payload:
9001
          # device exists
9002
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9003

    
9004
      self.lu.LogInfo("Renaming the old LVs on the target node")
9005
      result = self.rpc.call_blockdev_rename(self.target_node,
9006
                                             rename_old_to_new)
9007
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9008

    
9009
      # Now we rename the new LVs to the old LVs
9010
      self.lu.LogInfo("Renaming the new LVs on the target node")
9011
      rename_new_to_old = [(new, old.physical_id)
9012
                           for old, new in zip(old_lvs, new_lvs)]
9013
      result = self.rpc.call_blockdev_rename(self.target_node,
9014
                                             rename_new_to_old)
9015
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9016

    
9017
      for old, new in zip(old_lvs, new_lvs):
9018
        new.logical_id = old.logical_id
9019
        self.cfg.SetDiskID(new, self.target_node)
9020

    
9021
      for disk in old_lvs:
9022
        disk.logical_id = ren_fn(disk, temp_suffix)
9023
        self.cfg.SetDiskID(disk, self.target_node)
9024

    
9025
      # Now that the new lvs have the old name, we can add them to the device
9026
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9027
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9028
                                                  new_lvs)
9029
      msg = result.fail_msg
9030
      if msg:
9031
        for new_lv in new_lvs:
9032
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9033
                                               new_lv).fail_msg
9034
          if msg2:
9035
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9036
                               hint=("cleanup manually the unused logical"
9037
                                     "volumes"))
9038
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9039

    
9040
      dev.children = new_lvs
9041

    
9042
      self.cfg.Update(self.instance, feedback_fn)
9043

    
9044
    cstep = 5
9045
    if self.early_release:
9046
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9047
      cstep += 1
9048
      self._RemoveOldStorage(self.target_node, iv_names)
9049
      # WARNING: we release both node locks here, do not do other RPCs
9050
      # than WaitForSync to the primary node
9051
      self._ReleaseNodeLock([self.target_node, self.other_node])
9052

    
9053
    # Wait for sync
9054
    # This can fail as the old devices are degraded and _WaitForSync
9055
    # does a combined result over all disks, so we don't check its return value
9056
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9057
    cstep += 1
9058
    _WaitForSync(self.lu, self.instance)
9059

    
9060
    # Check all devices manually
9061
    self._CheckDevices(self.instance.primary_node, iv_names)
9062

    
9063
    # Step: remove old storage
9064
    if not self.early_release:
9065
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9066
      cstep += 1
9067
      self._RemoveOldStorage(self.target_node, iv_names)
9068

    
9069
  def _ExecDrbd8Secondary(self, feedback_fn):
9070
    """Replace the secondary node for DRBD 8.
9071

9072
    The algorithm for replace is quite complicated:
9073
      - for all disks of the instance:
9074
        - create new LVs on the new node with same names
9075
        - shutdown the drbd device on the old secondary
9076
        - disconnect the drbd network on the primary
9077
        - create the drbd device on the new secondary
9078
        - network attach the drbd on the primary, using an artifice:
9079
          the drbd code for Attach() will connect to the network if it
9080
          finds a device which is connected to the good local disks but
9081
          not network enabled
9082
      - wait for sync across all devices
9083
      - remove all disks from the old secondary
9084

9085
    Failures are not very well handled.
9086

9087
    """
9088
    steps_total = 6
9089

    
9090
    # Step: check device activation
9091
    self.lu.LogStep(1, steps_total, "Check device existence")
9092
    self._CheckDisksExistence([self.instance.primary_node])
9093
    self._CheckVolumeGroup([self.instance.primary_node])
9094

    
9095
    # Step: check other node consistency
9096
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9097
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9098

    
9099
    # Step: create new storage
9100
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9101
    for idx, dev in enumerate(self.instance.disks):
9102
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9103
                      (self.new_node, idx))
9104
      # we pass force_create=True to force LVM creation
9105
      for new_lv in dev.children:
9106
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9107
                        _GetInstanceInfoText(self.instance), False)
9108

    
9109
    # Step 4: dbrd minors and drbd setups changes
9110
    # after this, we must manually remove the drbd minors on both the
9111
    # error and the success paths
9112
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9113
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9114
                                         for dev in self.instance.disks],
9115
                                        self.instance.name)
9116
    logging.debug("Allocated minors %r", minors)
9117

    
9118
    iv_names = {}
9119
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9120
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9121
                      (self.new_node, idx))
9122
      # create new devices on new_node; note that we create two IDs:
9123
      # one without port, so the drbd will be activated without
9124
      # networking information on the new node at this stage, and one
9125
      # with network, for the latter activation in step 4
9126
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9127
      if self.instance.primary_node == o_node1:
9128
        p_minor = o_minor1
9129
      else:
9130
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9131
        p_minor = o_minor2
9132

    
9133
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9134
                      p_minor, new_minor, o_secret)
9135
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9136
                    p_minor, new_minor, o_secret)
9137

    
9138
      iv_names[idx] = (dev, dev.children, new_net_id)
9139
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9140
                    new_net_id)
9141
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9142
                              logical_id=new_alone_id,
9143
                              children=dev.children,
9144
                              size=dev.size)
9145
      try:
9146
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9147
                              _GetInstanceInfoText(self.instance), False)
9148
      except errors.GenericError:
9149
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9150
        raise
9151

    
9152
    # We have new devices, shutdown the drbd on the old secondary
9153
    for idx, dev in enumerate(self.instance.disks):
9154
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9155
      self.cfg.SetDiskID(dev, self.target_node)
9156
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9157
      if msg:
9158
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9159
                           "node: %s" % (idx, msg),
9160
                           hint=("Please cleanup this device manually as"
9161
                                 " soon as possible"))
9162

    
9163
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9164
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9165
                                               self.node_secondary_ip,
9166
                                               self.instance.disks)\
9167
                                              [self.instance.primary_node]
9168

    
9169
    msg = result.fail_msg
9170
    if msg:
9171
      # detaches didn't succeed (unlikely)
9172
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9173
      raise errors.OpExecError("Can't detach the disks from the network on"
9174
                               " old node: %s" % (msg,))
9175

    
9176
    # if we managed to detach at least one, we update all the disks of
9177
    # the instance to point to the new secondary
9178
    self.lu.LogInfo("Updating instance configuration")
9179
    for dev, _, new_logical_id in iv_names.itervalues():
9180
      dev.logical_id = new_logical_id
9181
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9182

    
9183
    self.cfg.Update(self.instance, feedback_fn)
9184

    
9185
    # and now perform the drbd attach
9186
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9187
                    " (standalone => connected)")
9188
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9189
                                            self.new_node],
9190
                                           self.node_secondary_ip,
9191
                                           self.instance.disks,
9192
                                           self.instance.name,
9193
                                           False)
9194
    for to_node, to_result in result.items():
9195
      msg = to_result.fail_msg
9196
      if msg:
9197
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9198
                           to_node, msg,
9199
                           hint=("please do a gnt-instance info to see the"
9200
                                 " status of disks"))
9201
    cstep = 5
9202
    if self.early_release:
9203
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9204
      cstep += 1
9205
      self._RemoveOldStorage(self.target_node, iv_names)
9206
      # WARNING: we release all node locks here, do not do other RPCs
9207
      # than WaitForSync to the primary node
9208
      self._ReleaseNodeLock([self.instance.primary_node,
9209
                             self.target_node,
9210
                             self.new_node])
9211

    
9212
    # Wait for sync
9213
    # This can fail as the old devices are degraded and _WaitForSync
9214
    # does a combined result over all disks, so we don't check its return value
9215
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9216
    cstep += 1
9217
    _WaitForSync(self.lu, self.instance)
9218

    
9219
    # Check all devices manually
9220
    self._CheckDevices(self.instance.primary_node, iv_names)
9221

    
9222
    # Step: remove old storage
9223
    if not self.early_release:
9224
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9225
      self._RemoveOldStorage(self.target_node, iv_names)
9226

    
9227

    
9228
class LURepairNodeStorage(NoHooksLU):
9229
  """Repairs the volume group on a node.
9230

9231
  """
9232
  REQ_BGL = False
9233

    
9234
  def CheckArguments(self):
9235
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9236

    
9237
    storage_type = self.op.storage_type
9238

    
9239
    if (constants.SO_FIX_CONSISTENCY not in
9240
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9241
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
9242
                                 " repaired" % storage_type,
9243
                                 errors.ECODE_INVAL)
9244

    
9245
  def ExpandNames(self):
9246
    self.needed_locks = {
9247
      locking.LEVEL_NODE: [self.op.node_name],
9248
      }
9249

    
9250
  def _CheckFaultyDisks(self, instance, node_name):
9251
    """Ensure faulty disks abort the opcode or at least warn."""
9252
    try:
9253
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9254
                                  node_name, True):
9255
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9256
                                   " node '%s'" % (instance.name, node_name),
9257
                                   errors.ECODE_STATE)
9258
    except errors.OpPrereqError, err:
9259
      if self.op.ignore_consistency:
9260
        self.proc.LogWarning(str(err.args[0]))
9261
      else:
9262
        raise
9263

    
9264
  def CheckPrereq(self):
9265
    """Check prerequisites.
9266

9267
    """
9268
    # Check whether any instance on this node has faulty disks
9269
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9270
      if not inst.admin_up:
9271
        continue
9272
      check_nodes = set(inst.all_nodes)
9273
      check_nodes.discard(self.op.node_name)
9274
      for inst_node_name in check_nodes:
9275
        self._CheckFaultyDisks(inst, inst_node_name)
9276

    
9277
  def Exec(self, feedback_fn):
9278
    feedback_fn("Repairing storage unit '%s' on %s ..." %
9279
                (self.op.name, self.op.node_name))
9280

    
9281
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9282
    result = self.rpc.call_storage_execute(self.op.node_name,
9283
                                           self.op.storage_type, st_args,
9284
                                           self.op.name,
9285
                                           constants.SO_FIX_CONSISTENCY)
9286
    result.Raise("Failed to repair storage unit '%s' on %s" %
9287
                 (self.op.name, self.op.node_name))
9288

    
9289

    
9290
class LUNodeEvacStrategy(NoHooksLU):
9291
  """Computes the node evacuation strategy.
9292

9293
  """
9294
  REQ_BGL = False
9295

    
9296
  def CheckArguments(self):
9297
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9298

    
9299
  def ExpandNames(self):
9300
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9301
    self.needed_locks = locks = {}
9302
    if self.op.remote_node is None:
9303
      locks[locking.LEVEL_NODE] = locking.ALL_SET
9304
    else:
9305
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9306
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9307

    
9308
  def Exec(self, feedback_fn):
9309
    if self.op.remote_node is not None:
9310
      instances = []
9311
      for node in self.op.nodes:
9312
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9313
      result = []
9314
      for i in instances:
9315
        if i.primary_node == self.op.remote_node:
9316
          raise errors.OpPrereqError("Node %s is the primary node of"
9317
                                     " instance %s, cannot use it as"
9318
                                     " secondary" %
9319
                                     (self.op.remote_node, i.name),
9320
                                     errors.ECODE_INVAL)
9321
        result.append([i.name, self.op.remote_node])
9322
    else:
9323
      ial = IAllocator(self.cfg, self.rpc,
9324
                       mode=constants.IALLOCATOR_MODE_MEVAC,
9325
                       evac_nodes=self.op.nodes)
9326
      ial.Run(self.op.iallocator, validate=True)
9327
      if not ial.success:
9328
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9329
                                 errors.ECODE_NORES)
9330
      result = ial.result
9331
    return result
9332

    
9333

    
9334
class LUInstanceGrowDisk(LogicalUnit):
9335
  """Grow a disk of an instance.
9336

9337
  """
9338
  HPATH = "disk-grow"
9339
  HTYPE = constants.HTYPE_INSTANCE
9340
  REQ_BGL = False
9341

    
9342
  def ExpandNames(self):
9343
    self._ExpandAndLockInstance()
9344
    self.needed_locks[locking.LEVEL_NODE] = []
9345
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9346

    
9347
  def DeclareLocks(self, level):
9348
    if level == locking.LEVEL_NODE:
9349
      self._LockInstancesNodes()
9350

    
9351
  def BuildHooksEnv(self):
9352
    """Build hooks env.
9353

9354
    This runs on the master, the primary and all the secondaries.
9355

9356
    """
9357
    env = {
9358
      "DISK": self.op.disk,
9359
      "AMOUNT": self.op.amount,
9360
      }
9361
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9362
    return env
9363

    
9364
  def BuildHooksNodes(self):
9365
    """Build hooks nodes.
9366

9367
    """
9368
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9369
    return (nl, nl)
9370

    
9371
  def CheckPrereq(self):
9372
    """Check prerequisites.
9373

9374
    This checks that the instance is in the cluster.
9375

9376
    """
9377
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9378
    assert instance is not None, \
9379
      "Cannot retrieve locked instance %s" % self.op.instance_name
9380
    nodenames = list(instance.all_nodes)
9381
    for node in nodenames:
9382
      _CheckNodeOnline(self, node)
9383

    
9384
    self.instance = instance
9385

    
9386
    if instance.disk_template not in constants.DTS_GROWABLE:
9387
      raise errors.OpPrereqError("Instance's disk layout does not support"
9388
                                 " growing.", errors.ECODE_INVAL)
9389

    
9390
    self.disk = instance.FindDisk(self.op.disk)
9391

    
9392
    if instance.disk_template not in (constants.DT_FILE,
9393
                                      constants.DT_SHARED_FILE):
9394
      # TODO: check the free disk space for file, when that feature will be
9395
      # supported
9396
      _CheckNodesFreeDiskPerVG(self, nodenames,
9397
                               self.disk.ComputeGrowth(self.op.amount))
9398

    
9399
  def Exec(self, feedback_fn):
9400
    """Execute disk grow.
9401

9402
    """
9403
    instance = self.instance
9404
    disk = self.disk
9405

    
9406
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9407
    if not disks_ok:
9408
      raise errors.OpExecError("Cannot activate block device to grow")
9409

    
9410
    for node in instance.all_nodes:
9411
      self.cfg.SetDiskID(disk, node)
9412
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9413
      result.Raise("Grow request failed to node %s" % node)
9414

    
9415
      # TODO: Rewrite code to work properly
9416
      # DRBD goes into sync mode for a short amount of time after executing the
9417
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9418
      # calling "resize" in sync mode fails. Sleeping for a short amount of
9419
      # time is a work-around.
9420
      time.sleep(5)
9421

    
9422
    disk.RecordGrow(self.op.amount)
9423
    self.cfg.Update(instance, feedback_fn)
9424
    if self.op.wait_for_sync:
9425
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
9426
      if disk_abort:
9427
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
9428
                             " status.\nPlease check the instance.")
9429
      if not instance.admin_up:
9430
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9431
    elif not instance.admin_up:
9432
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
9433
                           " not supposed to be running because no wait for"
9434
                           " sync mode was requested.")
9435

    
9436

    
9437
class LUInstanceQueryData(NoHooksLU):
9438
  """Query runtime instance data.
9439

9440
  """
9441
  REQ_BGL = False
9442

    
9443
  def ExpandNames(self):
9444
    self.needed_locks = {}
9445
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9446

    
9447
    if self.op.instances:
9448
      self.wanted_names = []
9449
      for name in self.op.instances:
9450
        full_name = _ExpandInstanceName(self.cfg, name)
9451
        self.wanted_names.append(full_name)
9452
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9453
    else:
9454
      self.wanted_names = None
9455
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9456

    
9457
    self.needed_locks[locking.LEVEL_NODE] = []
9458
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9459

    
9460
  def DeclareLocks(self, level):
9461
    if level == locking.LEVEL_NODE:
9462
      self._LockInstancesNodes()
9463

    
9464
  def CheckPrereq(self):
9465
    """Check prerequisites.
9466

9467
    This only checks the optional instance list against the existing names.
9468

9469
    """
9470
    if self.wanted_names is None:
9471
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9472

    
9473
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
9474
                             in self.wanted_names]
9475

    
9476
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
9477
    """Returns the status of a block device
9478

9479
    """
9480
    if self.op.static or not node:
9481
      return None
9482

    
9483
    self.cfg.SetDiskID(dev, node)
9484

    
9485
    result = self.rpc.call_blockdev_find(node, dev)
9486
    if result.offline:
9487
      return None
9488

    
9489
    result.Raise("Can't compute disk status for %s" % instance_name)
9490

    
9491
    status = result.payload
9492
    if status is None:
9493
      return None
9494

    
9495
    return (status.dev_path, status.major, status.minor,
9496
            status.sync_percent, status.estimated_time,
9497
            status.is_degraded, status.ldisk_status)
9498

    
9499
  def _ComputeDiskStatus(self, instance, snode, dev):
9500
    """Compute block device status.
9501

9502
    """
9503
    if dev.dev_type in constants.LDS_DRBD:
9504
      # we change the snode then (otherwise we use the one passed in)
9505
      if dev.logical_id[0] == instance.primary_node:
9506
        snode = dev.logical_id[1]
9507
      else:
9508
        snode = dev.logical_id[0]
9509

    
9510
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9511
                                              instance.name, dev)
9512
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9513

    
9514
    if dev.children:
9515
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
9516
                      for child in dev.children]
9517
    else:
9518
      dev_children = []
9519

    
9520
    data = {
9521
      "iv_name": dev.iv_name,
9522
      "dev_type": dev.dev_type,
9523
      "logical_id": dev.logical_id,
9524
      "physical_id": dev.physical_id,
9525
      "pstatus": dev_pstatus,
9526
      "sstatus": dev_sstatus,
9527
      "children": dev_children,
9528
      "mode": dev.mode,
9529
      "size": dev.size,
9530
      }
9531

    
9532
    return data
9533

    
9534
  def Exec(self, feedback_fn):
9535
    """Gather and return data"""
9536
    result = {}
9537

    
9538
    cluster = self.cfg.GetClusterInfo()
9539

    
9540
    for instance in self.wanted_instances:
9541
      if not self.op.static:
9542
        remote_info = self.rpc.call_instance_info(instance.primary_node,
9543
                                                  instance.name,
9544
                                                  instance.hypervisor)
9545
        remote_info.Raise("Error checking node %s" % instance.primary_node)
9546
        remote_info = remote_info.payload
9547
        if remote_info and "state" in remote_info:
9548
          remote_state = "up"
9549
        else:
9550
          remote_state = "down"
9551
      else:
9552
        remote_state = None
9553
      if instance.admin_up:
9554
        config_state = "up"
9555
      else:
9556
        config_state = "down"
9557

    
9558
      disks = [self._ComputeDiskStatus(instance, None, device)
9559
               for device in instance.disks]
9560

    
9561
      idict = {
9562
        "name": instance.name,
9563
        "config_state": config_state,
9564
        "run_state": remote_state,
9565
        "pnode": instance.primary_node,
9566
        "snodes": instance.secondary_nodes,
9567
        "os": instance.os,
9568
        # this happens to be the same format used for hooks
9569
        "nics": _NICListToTuple(self, instance.nics),
9570
        "disk_template": instance.disk_template,
9571
        "disks": disks,
9572
        "hypervisor": instance.hypervisor,
9573
        "network_port": instance.network_port,
9574
        "hv_instance": instance.hvparams,
9575
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
9576
        "be_instance": instance.beparams,
9577
        "be_actual": cluster.FillBE(instance),
9578
        "os_instance": instance.osparams,
9579
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9580
        "serial_no": instance.serial_no,
9581
        "mtime": instance.mtime,
9582
        "ctime": instance.ctime,
9583
        "uuid": instance.uuid,
9584
        }
9585

    
9586
      result[instance.name] = idict
9587

    
9588
    return result
9589

    
9590

    
9591
class LUInstanceSetParams(LogicalUnit):
9592
  """Modifies an instances's parameters.
9593

9594
  """
9595
  HPATH = "instance-modify"
9596
  HTYPE = constants.HTYPE_INSTANCE
9597
  REQ_BGL = False
9598

    
9599
  def CheckArguments(self):
9600
    if not (self.op.nics or self.op.disks or self.op.disk_template or
9601
            self.op.hvparams or self.op.beparams or self.op.os_name):
9602
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9603

    
9604
    if self.op.hvparams:
9605
      _CheckGlobalHvParams(self.op.hvparams)
9606

    
9607
    # Disk validation
9608
    disk_addremove = 0
9609
    for disk_op, disk_dict in self.op.disks:
9610
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9611
      if disk_op == constants.DDM_REMOVE:
9612
        disk_addremove += 1
9613
        continue
9614
      elif disk_op == constants.DDM_ADD:
9615
        disk_addremove += 1
9616
      else:
9617
        if not isinstance(disk_op, int):
9618
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9619
        if not isinstance(disk_dict, dict):
9620
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9621
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9622

    
9623
      if disk_op == constants.DDM_ADD:
9624
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9625
        if mode not in constants.DISK_ACCESS_SET:
9626
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9627
                                     errors.ECODE_INVAL)
9628
        size = disk_dict.get(constants.IDISK_SIZE, None)
9629
        if size is None:
9630
          raise errors.OpPrereqError("Required disk parameter size missing",
9631
                                     errors.ECODE_INVAL)
9632
        try:
9633
          size = int(size)
9634
        except (TypeError, ValueError), err:
9635
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9636
                                     str(err), errors.ECODE_INVAL)
9637
        disk_dict[constants.IDISK_SIZE] = size
9638
      else:
9639
        # modification of disk
9640
        if constants.IDISK_SIZE in disk_dict:
9641
          raise errors.OpPrereqError("Disk size change not possible, use"
9642
                                     " grow-disk", errors.ECODE_INVAL)
9643

    
9644
    if disk_addremove > 1:
9645
      raise errors.OpPrereqError("Only one disk add or remove operation"
9646
                                 " supported at a time", errors.ECODE_INVAL)
9647

    
9648
    if self.op.disks and self.op.disk_template is not None:
9649
      raise errors.OpPrereqError("Disk template conversion and other disk"
9650
                                 " changes not supported at the same time",
9651
                                 errors.ECODE_INVAL)
9652

    
9653
    if (self.op.disk_template and
9654
        self.op.disk_template in constants.DTS_INT_MIRROR and
9655
        self.op.remote_node is None):
9656
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
9657
                                 " one requires specifying a secondary node",
9658
                                 errors.ECODE_INVAL)
9659

    
9660
    # NIC validation
9661
    nic_addremove = 0
9662
    for nic_op, nic_dict in self.op.nics:
9663
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9664
      if nic_op == constants.DDM_REMOVE:
9665
        nic_addremove += 1
9666
        continue
9667
      elif nic_op == constants.DDM_ADD:
9668
        nic_addremove += 1
9669
      else:
9670
        if not isinstance(nic_op, int):
9671
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9672
        if not isinstance(nic_dict, dict):
9673
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9674
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9675

    
9676
      # nic_dict should be a dict
9677
      nic_ip = nic_dict.get(constants.INIC_IP, None)
9678
      if nic_ip is not None:
9679
        if nic_ip.lower() == constants.VALUE_NONE:
9680
          nic_dict[constants.INIC_IP] = None
9681
        else:
9682
          if not netutils.IPAddress.IsValid(nic_ip):
9683
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9684
                                       errors.ECODE_INVAL)
9685

    
9686
      nic_bridge = nic_dict.get('bridge', None)
9687
      nic_link = nic_dict.get(constants.INIC_LINK, None)
9688
      if nic_bridge and nic_link:
9689
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9690
                                   " at the same time", errors.ECODE_INVAL)
9691
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9692
        nic_dict['bridge'] = None
9693
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9694
        nic_dict[constants.INIC_LINK] = None
9695

    
9696
      if nic_op == constants.DDM_ADD:
9697
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
9698
        if nic_mac is None:
9699
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
9700

    
9701
      if constants.INIC_MAC in nic_dict:
9702
        nic_mac = nic_dict[constants.INIC_MAC]
9703
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9704
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9705

    
9706
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9707
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9708
                                     " modifying an existing nic",
9709
                                     errors.ECODE_INVAL)
9710

    
9711
    if nic_addremove > 1:
9712
      raise errors.OpPrereqError("Only one NIC add or remove operation"
9713
                                 " supported at a time", errors.ECODE_INVAL)
9714

    
9715
  def ExpandNames(self):
9716
    self._ExpandAndLockInstance()
9717
    self.needed_locks[locking.LEVEL_NODE] = []
9718
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9719

    
9720
  def DeclareLocks(self, level):
9721
    if level == locking.LEVEL_NODE:
9722
      self._LockInstancesNodes()
9723
      if self.op.disk_template and self.op.remote_node:
9724
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9725
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9726

    
9727
  def BuildHooksEnv(self):
9728
    """Build hooks env.
9729

9730
    This runs on the master, primary and secondaries.
9731

9732
    """
9733
    args = dict()
9734
    if constants.BE_MEMORY in self.be_new:
9735
      args['memory'] = self.be_new[constants.BE_MEMORY]
9736
    if constants.BE_VCPUS in self.be_new:
9737
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
9738
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9739
    # information at all.
9740
    if self.op.nics:
9741
      args['nics'] = []
9742
      nic_override = dict(self.op.nics)
9743
      for idx, nic in enumerate(self.instance.nics):
9744
        if idx in nic_override:
9745
          this_nic_override = nic_override[idx]
9746
        else:
9747
          this_nic_override = {}
9748
        if constants.INIC_IP in this_nic_override:
9749
          ip = this_nic_override[constants.INIC_IP]
9750
        else:
9751
          ip = nic.ip
9752
        if constants.INIC_MAC in this_nic_override:
9753
          mac = this_nic_override[constants.INIC_MAC]
9754
        else:
9755
          mac = nic.mac
9756
        if idx in self.nic_pnew:
9757
          nicparams = self.nic_pnew[idx]
9758
        else:
9759
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9760
        mode = nicparams[constants.NIC_MODE]
9761
        link = nicparams[constants.NIC_LINK]
9762
        args['nics'].append((ip, mac, mode, link))
9763
      if constants.DDM_ADD in nic_override:
9764
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
9765
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
9766
        nicparams = self.nic_pnew[constants.DDM_ADD]
9767
        mode = nicparams[constants.NIC_MODE]
9768
        link = nicparams[constants.NIC_LINK]
9769
        args['nics'].append((ip, mac, mode, link))
9770
      elif constants.DDM_REMOVE in nic_override:
9771
        del args['nics'][-1]
9772

    
9773
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9774
    if self.op.disk_template:
9775
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9776

    
9777
    return env
9778

    
9779
  def BuildHooksNodes(self):
9780
    """Build hooks nodes.
9781

9782
    """
9783
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9784
    return (nl, nl)
9785

    
9786
  def CheckPrereq(self):
9787
    """Check prerequisites.
9788

9789
    This only checks the instance list against the existing names.
9790

9791
    """
9792
    # checking the new params on the primary/secondary nodes
9793

    
9794
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9795
    cluster = self.cluster = self.cfg.GetClusterInfo()
9796
    assert self.instance is not None, \
9797
      "Cannot retrieve locked instance %s" % self.op.instance_name
9798
    pnode = instance.primary_node
9799
    nodelist = list(instance.all_nodes)
9800

    
9801
    # OS change
9802
    if self.op.os_name and not self.op.force:
9803
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9804
                      self.op.force_variant)
9805
      instance_os = self.op.os_name
9806
    else:
9807
      instance_os = instance.os
9808

    
9809
    if self.op.disk_template:
9810
      if instance.disk_template == self.op.disk_template:
9811
        raise errors.OpPrereqError("Instance already has disk template %s" %
9812
                                   instance.disk_template, errors.ECODE_INVAL)
9813

    
9814
      if (instance.disk_template,
9815
          self.op.disk_template) not in self._DISK_CONVERSIONS:
9816
        raise errors.OpPrereqError("Unsupported disk template conversion from"
9817
                                   " %s to %s" % (instance.disk_template,
9818
                                                  self.op.disk_template),
9819
                                   errors.ECODE_INVAL)
9820
      _CheckInstanceDown(self, instance, "cannot change disk template")
9821
      if self.op.disk_template in constants.DTS_INT_MIRROR:
9822
        if self.op.remote_node == pnode:
9823
          raise errors.OpPrereqError("Given new secondary node %s is the same"
9824
                                     " as the primary node of the instance" %
9825
                                     self.op.remote_node, errors.ECODE_STATE)
9826
        _CheckNodeOnline(self, self.op.remote_node)
9827
        _CheckNodeNotDrained(self, self.op.remote_node)
9828
        # FIXME: here we assume that the old instance type is DT_PLAIN
9829
        assert instance.disk_template == constants.DT_PLAIN
9830
        disks = [{constants.IDISK_SIZE: d.size,
9831
                  constants.IDISK_VG: d.logical_id[0]}
9832
                 for d in instance.disks]
9833
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9834
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9835

    
9836
    # hvparams processing
9837
    if self.op.hvparams:
9838
      hv_type = instance.hypervisor
9839
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9840
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9841
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9842

    
9843
      # local check
9844
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9845
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9846
      self.hv_new = hv_new # the new actual values
9847
      self.hv_inst = i_hvdict # the new dict (without defaults)
9848
    else:
9849
      self.hv_new = self.hv_inst = {}
9850

    
9851
    # beparams processing
9852
    if self.op.beparams:
9853
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9854
                                   use_none=True)
9855
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9856
      be_new = cluster.SimpleFillBE(i_bedict)
9857
      self.be_new = be_new # the new actual values
9858
      self.be_inst = i_bedict # the new dict (without defaults)
9859
    else:
9860
      self.be_new = self.be_inst = {}
9861

    
9862
    # osparams processing
9863
    if self.op.osparams:
9864
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9865
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9866
      self.os_inst = i_osdict # the new dict (without defaults)
9867
    else:
9868
      self.os_inst = {}
9869

    
9870
    self.warn = []
9871

    
9872
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9873
      mem_check_list = [pnode]
9874
      if be_new[constants.BE_AUTO_BALANCE]:
9875
        # either we changed auto_balance to yes or it was from before
9876
        mem_check_list.extend(instance.secondary_nodes)
9877
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9878
                                                  instance.hypervisor)
9879
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9880
                                         instance.hypervisor)
9881
      pninfo = nodeinfo[pnode]
9882
      msg = pninfo.fail_msg
9883
      if msg:
9884
        # Assume the primary node is unreachable and go ahead
9885
        self.warn.append("Can't get info from primary node %s: %s" %
9886
                         (pnode,  msg))
9887
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
9888
        self.warn.append("Node data from primary node %s doesn't contain"
9889
                         " free memory information" % pnode)
9890
      elif instance_info.fail_msg:
9891
        self.warn.append("Can't get instance runtime information: %s" %
9892
                        instance_info.fail_msg)
9893
      else:
9894
        if instance_info.payload:
9895
          current_mem = int(instance_info.payload['memory'])
9896
        else:
9897
          # Assume instance not running
9898
          # (there is a slight race condition here, but it's not very probable,
9899
          # and we have no other way to check)
9900
          current_mem = 0
9901
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9902
                    pninfo.payload['memory_free'])
9903
        if miss_mem > 0:
9904
          raise errors.OpPrereqError("This change will prevent the instance"
9905
                                     " from starting, due to %d MB of memory"
9906
                                     " missing on its primary node" % miss_mem,
9907
                                     errors.ECODE_NORES)
9908

    
9909
      if be_new[constants.BE_AUTO_BALANCE]:
9910
        for node, nres in nodeinfo.items():
9911
          if node not in instance.secondary_nodes:
9912
            continue
9913
          msg = nres.fail_msg
9914
          if msg:
9915
            self.warn.append("Can't get info from secondary node %s: %s" %
9916
                             (node, msg))
9917
          elif not isinstance(nres.payload.get('memory_free', None), int):
9918
            self.warn.append("Secondary node %s didn't return free"
9919
                             " memory information" % node)
9920
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9921
            self.warn.append("Not enough memory to failover instance to"
9922
                             " secondary node %s" % node)
9923

    
9924
    # NIC processing
9925
    self.nic_pnew = {}
9926
    self.nic_pinst = {}
9927
    for nic_op, nic_dict in self.op.nics:
9928
      if nic_op == constants.DDM_REMOVE:
9929
        if not instance.nics:
9930
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9931
                                     errors.ECODE_INVAL)
9932
        continue
9933
      if nic_op != constants.DDM_ADD:
9934
        # an existing nic
9935
        if not instance.nics:
9936
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9937
                                     " no NICs" % nic_op,
9938
                                     errors.ECODE_INVAL)
9939
        if nic_op < 0 or nic_op >= len(instance.nics):
9940
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9941
                                     " are 0 to %d" %
9942
                                     (nic_op, len(instance.nics) - 1),
9943
                                     errors.ECODE_INVAL)
9944
        old_nic_params = instance.nics[nic_op].nicparams
9945
        old_nic_ip = instance.nics[nic_op].ip
9946
      else:
9947
        old_nic_params = {}
9948
        old_nic_ip = None
9949

    
9950
      update_params_dict = dict([(key, nic_dict[key])
9951
                                 for key in constants.NICS_PARAMETERS
9952
                                 if key in nic_dict])
9953

    
9954
      if 'bridge' in nic_dict:
9955
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9956

    
9957
      new_nic_params = _GetUpdatedParams(old_nic_params,
9958
                                         update_params_dict)
9959
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9960
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9961
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9962
      self.nic_pinst[nic_op] = new_nic_params
9963
      self.nic_pnew[nic_op] = new_filled_nic_params
9964
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9965

    
9966
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
9967
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9968
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9969
        if msg:
9970
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9971
          if self.op.force:
9972
            self.warn.append(msg)
9973
          else:
9974
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9975
      if new_nic_mode == constants.NIC_MODE_ROUTED:
9976
        if constants.INIC_IP in nic_dict:
9977
          nic_ip = nic_dict[constants.INIC_IP]
9978
        else:
9979
          nic_ip = old_nic_ip
9980
        if nic_ip is None:
9981
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9982
                                     ' on a routed nic', errors.ECODE_INVAL)
9983
      if constants.INIC_MAC in nic_dict:
9984
        nic_mac = nic_dict[constants.INIC_MAC]
9985
        if nic_mac is None:
9986
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9987
                                     errors.ECODE_INVAL)
9988
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9989
          # otherwise generate the mac
9990
          nic_dict[constants.INIC_MAC] = \
9991
            self.cfg.GenerateMAC(self.proc.GetECId())
9992
        else:
9993
          # or validate/reserve the current one
9994
          try:
9995
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9996
          except errors.ReservationError:
9997
            raise errors.OpPrereqError("MAC address %s already in use"
9998
                                       " in cluster" % nic_mac,
9999
                                       errors.ECODE_NOTUNIQUE)
10000

    
10001
    # DISK processing
10002
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10003
      raise errors.OpPrereqError("Disk operations not supported for"
10004
                                 " diskless instances",
10005
                                 errors.ECODE_INVAL)
10006
    for disk_op, _ in self.op.disks:
10007
      if disk_op == constants.DDM_REMOVE:
10008
        if len(instance.disks) == 1:
10009
          raise errors.OpPrereqError("Cannot remove the last disk of"
10010
                                     " an instance", errors.ECODE_INVAL)
10011
        _CheckInstanceDown(self, instance, "cannot remove disks")
10012

    
10013
      if (disk_op == constants.DDM_ADD and
10014
          len(instance.disks) >= constants.MAX_DISKS):
10015
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10016
                                   " add more" % constants.MAX_DISKS,
10017
                                   errors.ECODE_STATE)
10018
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10019
        # an existing disk
10020
        if disk_op < 0 or disk_op >= len(instance.disks):
10021
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
10022
                                     " are 0 to %d" %
10023
                                     (disk_op, len(instance.disks)),
10024
                                     errors.ECODE_INVAL)
10025

    
10026
    return
10027

    
10028
  def _ConvertPlainToDrbd(self, feedback_fn):
10029
    """Converts an instance from plain to drbd.
10030

10031
    """
10032
    feedback_fn("Converting template to drbd")
10033
    instance = self.instance
10034
    pnode = instance.primary_node
10035
    snode = self.op.remote_node
10036

    
10037
    # create a fake disk info for _GenerateDiskTemplate
10038
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode}
10039
                 for d in instance.disks]
10040
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10041
                                      instance.name, pnode, [snode],
10042
                                      disk_info, None, None, 0, feedback_fn)
10043
    info = _GetInstanceInfoText(instance)
10044
    feedback_fn("Creating aditional volumes...")
10045
    # first, create the missing data and meta devices
10046
    for disk in new_disks:
10047
      # unfortunately this is... not too nice
10048
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10049
                            info, True)
10050
      for child in disk.children:
10051
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
10052
    # at this stage, all new LVs have been created, we can rename the
10053
    # old ones
10054
    feedback_fn("Renaming original volumes...")
10055
    rename_list = [(o, n.children[0].logical_id)
10056
                   for (o, n) in zip(instance.disks, new_disks)]
10057
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10058
    result.Raise("Failed to rename original LVs")
10059

    
10060
    feedback_fn("Initializing DRBD devices...")
10061
    # all child devices are in place, we can now create the DRBD devices
10062
    for disk in new_disks:
10063
      for node in [pnode, snode]:
10064
        f_create = node == pnode
10065
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10066

    
10067
    # at this point, the instance has been modified
10068
    instance.disk_template = constants.DT_DRBD8
10069
    instance.disks = new_disks
10070
    self.cfg.Update(instance, feedback_fn)
10071

    
10072
    # disks are created, waiting for sync
10073
    disk_abort = not _WaitForSync(self, instance)
10074
    if disk_abort:
10075
      raise errors.OpExecError("There are some degraded disks for"
10076
                               " this instance, please cleanup manually")
10077

    
10078
  def _ConvertDrbdToPlain(self, feedback_fn):
10079
    """Converts an instance from drbd to plain.
10080

10081
    """
10082
    instance = self.instance
10083
    assert len(instance.secondary_nodes) == 1
10084
    pnode = instance.primary_node
10085
    snode = instance.secondary_nodes[0]
10086
    feedback_fn("Converting template to plain")
10087

    
10088
    old_disks = instance.disks
10089
    new_disks = [d.children[0] for d in old_disks]
10090

    
10091
    # copy over size and mode
10092
    for parent, child in zip(old_disks, new_disks):
10093
      child.size = parent.size
10094
      child.mode = parent.mode
10095

    
10096
    # update instance structure
10097
    instance.disks = new_disks
10098
    instance.disk_template = constants.DT_PLAIN
10099
    self.cfg.Update(instance, feedback_fn)
10100

    
10101
    feedback_fn("Removing volumes on the secondary node...")
10102
    for disk in old_disks:
10103
      self.cfg.SetDiskID(disk, snode)
10104
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10105
      if msg:
10106
        self.LogWarning("Could not remove block device %s on node %s,"
10107
                        " continuing anyway: %s", disk.iv_name, snode, msg)
10108

    
10109
    feedback_fn("Removing unneeded volumes on the primary node...")
10110
    for idx, disk in enumerate(old_disks):
10111
      meta = disk.children[1]
10112
      self.cfg.SetDiskID(meta, pnode)
10113
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10114
      if msg:
10115
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
10116
                        " continuing anyway: %s", idx, pnode, msg)
10117

    
10118
  def Exec(self, feedback_fn):
10119
    """Modifies an instance.
10120

10121
    All parameters take effect only at the next restart of the instance.
10122

10123
    """
10124
    # Process here the warnings from CheckPrereq, as we don't have a
10125
    # feedback_fn there.
10126
    for warn in self.warn:
10127
      feedback_fn("WARNING: %s" % warn)
10128

    
10129
    result = []
10130
    instance = self.instance
10131
    # disk changes
10132
    for disk_op, disk_dict in self.op.disks:
10133
      if disk_op == constants.DDM_REMOVE:
10134
        # remove the last disk
10135
        device = instance.disks.pop()
10136
        device_idx = len(instance.disks)
10137
        for node, disk in device.ComputeNodeTree(instance.primary_node):
10138
          self.cfg.SetDiskID(disk, node)
10139
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10140
          if msg:
10141
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
10142
                            " continuing anyway", device_idx, node, msg)
10143
        result.append(("disk/%d" % device_idx, "remove"))
10144
      elif disk_op == constants.DDM_ADD:
10145
        # add a new disk
10146
        if instance.disk_template in (constants.DT_FILE,
10147
                                        constants.DT_SHARED_FILE):
10148
          file_driver, file_path = instance.disks[0].logical_id
10149
          file_path = os.path.dirname(file_path)
10150
        else:
10151
          file_driver = file_path = None
10152
        disk_idx_base = len(instance.disks)
10153
        new_disk = _GenerateDiskTemplate(self,
10154
                                         instance.disk_template,
10155
                                         instance.name, instance.primary_node,
10156
                                         instance.secondary_nodes,
10157
                                         [disk_dict],
10158
                                         file_path,
10159
                                         file_driver,
10160
                                         disk_idx_base, feedback_fn)[0]
10161
        instance.disks.append(new_disk)
10162
        info = _GetInstanceInfoText(instance)
10163

    
10164
        logging.info("Creating volume %s for instance %s",
10165
                     new_disk.iv_name, instance.name)
10166
        # Note: this needs to be kept in sync with _CreateDisks
10167
        #HARDCODE
10168
        for node in instance.all_nodes:
10169
          f_create = node == instance.primary_node
10170
          try:
10171
            _CreateBlockDev(self, node, instance, new_disk,
10172
                            f_create, info, f_create)
10173
          except errors.OpExecError, err:
10174
            self.LogWarning("Failed to create volume %s (%s) on"
10175
                            " node %s: %s",
10176
                            new_disk.iv_name, new_disk, node, err)
10177
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10178
                       (new_disk.size, new_disk.mode)))
10179
      else:
10180
        # change a given disk
10181
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10182
        result.append(("disk.mode/%d" % disk_op,
10183
                       disk_dict[constants.IDISK_MODE]))
10184

    
10185
    if self.op.disk_template:
10186
      r_shut = _ShutdownInstanceDisks(self, instance)
10187
      if not r_shut:
10188
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10189
                                 " proceed with disk template conversion")
10190
      mode = (instance.disk_template, self.op.disk_template)
10191
      try:
10192
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
10193
      except:
10194
        self.cfg.ReleaseDRBDMinors(instance.name)
10195
        raise
10196
      result.append(("disk_template", self.op.disk_template))
10197

    
10198
    # NIC changes
10199
    for nic_op, nic_dict in self.op.nics:
10200
      if nic_op == constants.DDM_REMOVE:
10201
        # remove the last nic
10202
        del instance.nics[-1]
10203
        result.append(("nic.%d" % len(instance.nics), "remove"))
10204
      elif nic_op == constants.DDM_ADD:
10205
        # mac and bridge should be set, by now
10206
        mac = nic_dict[constants.INIC_MAC]
10207
        ip = nic_dict.get(constants.INIC_IP, None)
10208
        nicparams = self.nic_pinst[constants.DDM_ADD]
10209
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10210
        instance.nics.append(new_nic)
10211
        result.append(("nic.%d" % (len(instance.nics) - 1),
10212
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
10213
                       (new_nic.mac, new_nic.ip,
10214
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10215
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10216
                       )))
10217
      else:
10218
        for key in (constants.INIC_MAC, constants.INIC_IP):
10219
          if key in nic_dict:
10220
            setattr(instance.nics[nic_op], key, nic_dict[key])
10221
        if nic_op in self.nic_pinst:
10222
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10223
        for key, val in nic_dict.iteritems():
10224
          result.append(("nic.%s/%d" % (key, nic_op), val))
10225

    
10226
    # hvparams changes
10227
    if self.op.hvparams:
10228
      instance.hvparams = self.hv_inst
10229
      for key, val in self.op.hvparams.iteritems():
10230
        result.append(("hv/%s" % key, val))
10231

    
10232
    # beparams changes
10233
    if self.op.beparams:
10234
      instance.beparams = self.be_inst
10235
      for key, val in self.op.beparams.iteritems():
10236
        result.append(("be/%s" % key, val))
10237

    
10238
    # OS change
10239
    if self.op.os_name:
10240
      instance.os = self.op.os_name
10241

    
10242
    # osparams changes
10243
    if self.op.osparams:
10244
      instance.osparams = self.os_inst
10245
      for key, val in self.op.osparams.iteritems():
10246
        result.append(("os/%s" % key, val))
10247

    
10248
    self.cfg.Update(instance, feedback_fn)
10249

    
10250
    return result
10251

    
10252
  _DISK_CONVERSIONS = {
10253
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10254
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10255
    }
10256

    
10257

    
10258
class LUBackupQuery(NoHooksLU):
10259
  """Query the exports list
10260

10261
  """
10262
  REQ_BGL = False
10263

    
10264
  def ExpandNames(self):
10265
    self.needed_locks = {}
10266
    self.share_locks[locking.LEVEL_NODE] = 1
10267
    if not self.op.nodes:
10268
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10269
    else:
10270
      self.needed_locks[locking.LEVEL_NODE] = \
10271
        _GetWantedNodes(self, self.op.nodes)
10272

    
10273
  def Exec(self, feedback_fn):
10274
    """Compute the list of all the exported system images.
10275

10276
    @rtype: dict
10277
    @return: a dictionary with the structure node->(export-list)
10278
        where export-list is a list of the instances exported on
10279
        that node.
10280

10281
    """
10282
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
10283
    rpcresult = self.rpc.call_export_list(self.nodes)
10284
    result = {}
10285
    for node in rpcresult:
10286
      if rpcresult[node].fail_msg:
10287
        result[node] = False
10288
      else:
10289
        result[node] = rpcresult[node].payload
10290

    
10291
    return result
10292

    
10293

    
10294
class LUBackupPrepare(NoHooksLU):
10295
  """Prepares an instance for an export and returns useful information.
10296

10297
  """
10298
  REQ_BGL = False
10299

    
10300
  def ExpandNames(self):
10301
    self._ExpandAndLockInstance()
10302

    
10303
  def CheckPrereq(self):
10304
    """Check prerequisites.
10305

10306
    """
10307
    instance_name = self.op.instance_name
10308

    
10309
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10310
    assert self.instance is not None, \
10311
          "Cannot retrieve locked instance %s" % self.op.instance_name
10312
    _CheckNodeOnline(self, self.instance.primary_node)
10313

    
10314
    self._cds = _GetClusterDomainSecret()
10315

    
10316
  def Exec(self, feedback_fn):
10317
    """Prepares an instance for an export.
10318

10319
    """
10320
    instance = self.instance
10321

    
10322
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10323
      salt = utils.GenerateSecret(8)
10324

    
10325
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10326
      result = self.rpc.call_x509_cert_create(instance.primary_node,
10327
                                              constants.RIE_CERT_VALIDITY)
10328
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
10329

    
10330
      (name, cert_pem) = result.payload
10331

    
10332
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10333
                                             cert_pem)
10334

    
10335
      return {
10336
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10337
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10338
                          salt),
10339
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10340
        }
10341

    
10342
    return None
10343

    
10344

    
10345
class LUBackupExport(LogicalUnit):
10346
  """Export an instance to an image in the cluster.
10347

10348
  """
10349
  HPATH = "instance-export"
10350
  HTYPE = constants.HTYPE_INSTANCE
10351
  REQ_BGL = False
10352

    
10353
  def CheckArguments(self):
10354
    """Check the arguments.
10355

10356
    """
10357
    self.x509_key_name = self.op.x509_key_name
10358
    self.dest_x509_ca_pem = self.op.destination_x509_ca
10359

    
10360
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10361
      if not self.x509_key_name:
10362
        raise errors.OpPrereqError("Missing X509 key name for encryption",
10363
                                   errors.ECODE_INVAL)
10364

    
10365
      if not self.dest_x509_ca_pem:
10366
        raise errors.OpPrereqError("Missing destination X509 CA",
10367
                                   errors.ECODE_INVAL)
10368

    
10369
  def ExpandNames(self):
10370
    self._ExpandAndLockInstance()
10371

    
10372
    # Lock all nodes for local exports
10373
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10374
      # FIXME: lock only instance primary and destination node
10375
      #
10376
      # Sad but true, for now we have do lock all nodes, as we don't know where
10377
      # the previous export might be, and in this LU we search for it and
10378
      # remove it from its current node. In the future we could fix this by:
10379
      #  - making a tasklet to search (share-lock all), then create the
10380
      #    new one, then one to remove, after
10381
      #  - removing the removal operation altogether
10382
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10383

    
10384
  def DeclareLocks(self, level):
10385
    """Last minute lock declaration."""
10386
    # All nodes are locked anyway, so nothing to do here.
10387

    
10388
  def BuildHooksEnv(self):
10389
    """Build hooks env.
10390

10391
    This will run on the master, primary node and target node.
10392

10393
    """
10394
    env = {
10395
      "EXPORT_MODE": self.op.mode,
10396
      "EXPORT_NODE": self.op.target_node,
10397
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10398
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10399
      # TODO: Generic function for boolean env variables
10400
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10401
      }
10402

    
10403
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10404

    
10405
    return env
10406

    
10407
  def BuildHooksNodes(self):
10408
    """Build hooks nodes.
10409

10410
    """
10411
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10412

    
10413
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10414
      nl.append(self.op.target_node)
10415

    
10416
    return (nl, nl)
10417

    
10418
  def CheckPrereq(self):
10419
    """Check prerequisites.
10420

10421
    This checks that the instance and node names are valid.
10422

10423
    """
10424
    instance_name = self.op.instance_name
10425

    
10426
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10427
    assert self.instance is not None, \
10428
          "Cannot retrieve locked instance %s" % self.op.instance_name
10429
    _CheckNodeOnline(self, self.instance.primary_node)
10430

    
10431
    if (self.op.remove_instance and self.instance.admin_up and
10432
        not self.op.shutdown):
10433
      raise errors.OpPrereqError("Can not remove instance without shutting it"
10434
                                 " down before")
10435

    
10436
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10437
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10438
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10439
      assert self.dst_node is not None
10440

    
10441
      _CheckNodeOnline(self, self.dst_node.name)
10442
      _CheckNodeNotDrained(self, self.dst_node.name)
10443

    
10444
      self._cds = None
10445
      self.dest_disk_info = None
10446
      self.dest_x509_ca = None
10447

    
10448
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10449
      self.dst_node = None
10450

    
10451
      if len(self.op.target_node) != len(self.instance.disks):
10452
        raise errors.OpPrereqError(("Received destination information for %s"
10453
                                    " disks, but instance %s has %s disks") %
10454
                                   (len(self.op.target_node), instance_name,
10455
                                    len(self.instance.disks)),
10456
                                   errors.ECODE_INVAL)
10457

    
10458
      cds = _GetClusterDomainSecret()
10459

    
10460
      # Check X509 key name
10461
      try:
10462
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10463
      except (TypeError, ValueError), err:
10464
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10465

    
10466
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10467
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10468
                                   errors.ECODE_INVAL)
10469

    
10470
      # Load and verify CA
10471
      try:
10472
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10473
      except OpenSSL.crypto.Error, err:
10474
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10475
                                   (err, ), errors.ECODE_INVAL)
10476

    
10477
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10478
      if errcode is not None:
10479
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10480
                                   (msg, ), errors.ECODE_INVAL)
10481

    
10482
      self.dest_x509_ca = cert
10483

    
10484
      # Verify target information
10485
      disk_info = []
10486
      for idx, disk_data in enumerate(self.op.target_node):
10487
        try:
10488
          (host, port, magic) = \
10489
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10490
        except errors.GenericError, err:
10491
          raise errors.OpPrereqError("Target info for disk %s: %s" %
10492
                                     (idx, err), errors.ECODE_INVAL)
10493

    
10494
        disk_info.append((host, port, magic))
10495

    
10496
      assert len(disk_info) == len(self.op.target_node)
10497
      self.dest_disk_info = disk_info
10498

    
10499
    else:
10500
      raise errors.ProgrammerError("Unhandled export mode %r" %
10501
                                   self.op.mode)
10502

    
10503
    # instance disk type verification
10504
    # TODO: Implement export support for file-based disks
10505
    for disk in self.instance.disks:
10506
      if disk.dev_type == constants.LD_FILE:
10507
        raise errors.OpPrereqError("Export not supported for instances with"
10508
                                   " file-based disks", errors.ECODE_INVAL)
10509

    
10510
  def _CleanupExports(self, feedback_fn):
10511
    """Removes exports of current instance from all other nodes.
10512

10513
    If an instance in a cluster with nodes A..D was exported to node C, its
10514
    exports will be removed from the nodes A, B and D.
10515

10516
    """
10517
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
10518

    
10519
    nodelist = self.cfg.GetNodeList()
10520
    nodelist.remove(self.dst_node.name)
10521

    
10522
    # on one-node clusters nodelist will be empty after the removal
10523
    # if we proceed the backup would be removed because OpBackupQuery
10524
    # substitutes an empty list with the full cluster node list.
10525
    iname = self.instance.name
10526
    if nodelist:
10527
      feedback_fn("Removing old exports for instance %s" % iname)
10528
      exportlist = self.rpc.call_export_list(nodelist)
10529
      for node in exportlist:
10530
        if exportlist[node].fail_msg:
10531
          continue
10532
        if iname in exportlist[node].payload:
10533
          msg = self.rpc.call_export_remove(node, iname).fail_msg
10534
          if msg:
10535
            self.LogWarning("Could not remove older export for instance %s"
10536
                            " on node %s: %s", iname, node, msg)
10537

    
10538
  def Exec(self, feedback_fn):
10539
    """Export an instance to an image in the cluster.
10540

10541
    """
10542
    assert self.op.mode in constants.EXPORT_MODES
10543

    
10544
    instance = self.instance
10545
    src_node = instance.primary_node
10546

    
10547
    if self.op.shutdown:
10548
      # shutdown the instance, but not the disks
10549
      feedback_fn("Shutting down instance %s" % instance.name)
10550
      result = self.rpc.call_instance_shutdown(src_node, instance,
10551
                                               self.op.shutdown_timeout)
10552
      # TODO: Maybe ignore failures if ignore_remove_failures is set
10553
      result.Raise("Could not shutdown instance %s on"
10554
                   " node %s" % (instance.name, src_node))
10555

    
10556
    # set the disks ID correctly since call_instance_start needs the
10557
    # correct drbd minor to create the symlinks
10558
    for disk in instance.disks:
10559
      self.cfg.SetDiskID(disk, src_node)
10560

    
10561
    activate_disks = (not instance.admin_up)
10562

    
10563
    if activate_disks:
10564
      # Activate the instance disks if we'exporting a stopped instance
10565
      feedback_fn("Activating disks for %s" % instance.name)
10566
      _StartInstanceDisks(self, instance, None)
10567

    
10568
    try:
10569
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10570
                                                     instance)
10571

    
10572
      helper.CreateSnapshots()
10573
      try:
10574
        if (self.op.shutdown and instance.admin_up and
10575
            not self.op.remove_instance):
10576
          assert not activate_disks
10577
          feedback_fn("Starting instance %s" % instance.name)
10578
          result = self.rpc.call_instance_start(src_node, instance, None, None)
10579
          msg = result.fail_msg
10580
          if msg:
10581
            feedback_fn("Failed to start instance: %s" % msg)
10582
            _ShutdownInstanceDisks(self, instance)
10583
            raise errors.OpExecError("Could not start instance: %s" % msg)
10584

    
10585
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
10586
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10587
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10588
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
10589
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10590

    
10591
          (key_name, _, _) = self.x509_key_name
10592

    
10593
          dest_ca_pem = \
10594
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10595
                                            self.dest_x509_ca)
10596

    
10597
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10598
                                                     key_name, dest_ca_pem,
10599
                                                     timeouts)
10600
      finally:
10601
        helper.Cleanup()
10602

    
10603
      # Check for backwards compatibility
10604
      assert len(dresults) == len(instance.disks)
10605
      assert compat.all(isinstance(i, bool) for i in dresults), \
10606
             "Not all results are boolean: %r" % dresults
10607

    
10608
    finally:
10609
      if activate_disks:
10610
        feedback_fn("Deactivating disks for %s" % instance.name)
10611
        _ShutdownInstanceDisks(self, instance)
10612

    
10613
    if not (compat.all(dresults) and fin_resu):
10614
      failures = []
10615
      if not fin_resu:
10616
        failures.append("export finalization")
10617
      if not compat.all(dresults):
10618
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10619
                               if not dsk)
10620
        failures.append("disk export: disk(s) %s" % fdsk)
10621

    
10622
      raise errors.OpExecError("Export failed, errors in %s" %
10623
                               utils.CommaJoin(failures))
10624

    
10625
    # At this point, the export was successful, we can cleanup/finish
10626

    
10627
    # Remove instance if requested
10628
    if self.op.remove_instance:
10629
      feedback_fn("Removing instance %s" % instance.name)
10630
      _RemoveInstance(self, feedback_fn, instance,
10631
                      self.op.ignore_remove_failures)
10632

    
10633
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10634
      self._CleanupExports(feedback_fn)
10635

    
10636
    return fin_resu, dresults
10637

    
10638

    
10639
class LUBackupRemove(NoHooksLU):
10640
  """Remove exports related to the named instance.
10641

10642
  """
10643
  REQ_BGL = False
10644

    
10645
  def ExpandNames(self):
10646
    self.needed_locks = {}
10647
    # We need all nodes to be locked in order for RemoveExport to work, but we
10648
    # don't need to lock the instance itself, as nothing will happen to it (and
10649
    # we can remove exports also for a removed instance)
10650
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10651

    
10652
  def Exec(self, feedback_fn):
10653
    """Remove any export.
10654

10655
    """
10656
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10657
    # If the instance was not found we'll try with the name that was passed in.
10658
    # This will only work if it was an FQDN, though.
10659
    fqdn_warn = False
10660
    if not instance_name:
10661
      fqdn_warn = True
10662
      instance_name = self.op.instance_name
10663

    
10664
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10665
    exportlist = self.rpc.call_export_list(locked_nodes)
10666
    found = False
10667
    for node in exportlist:
10668
      msg = exportlist[node].fail_msg
10669
      if msg:
10670
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10671
        continue
10672
      if instance_name in exportlist[node].payload:
10673
        found = True
10674
        result = self.rpc.call_export_remove(node, instance_name)
10675
        msg = result.fail_msg
10676
        if msg:
10677
          logging.error("Could not remove export for instance %s"
10678
                        " on node %s: %s", instance_name, node, msg)
10679

    
10680
    if fqdn_warn and not found:
10681
      feedback_fn("Export not found. If trying to remove an export belonging"
10682
                  " to a deleted instance please use its Fully Qualified"
10683
                  " Domain Name.")
10684

    
10685

    
10686
class LUGroupAdd(LogicalUnit):
10687
  """Logical unit for creating node groups.
10688

10689
  """
10690
  HPATH = "group-add"
10691
  HTYPE = constants.HTYPE_GROUP
10692
  REQ_BGL = False
10693

    
10694
  def ExpandNames(self):
10695
    # We need the new group's UUID here so that we can create and acquire the
10696
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10697
    # that it should not check whether the UUID exists in the configuration.
10698
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10699
    self.needed_locks = {}
10700
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10701

    
10702
  def CheckPrereq(self):
10703
    """Check prerequisites.
10704

10705
    This checks that the given group name is not an existing node group
10706
    already.
10707

10708
    """
10709
    try:
10710
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10711
    except errors.OpPrereqError:
10712
      pass
10713
    else:
10714
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10715
                                 " node group (UUID: %s)" %
10716
                                 (self.op.group_name, existing_uuid),
10717
                                 errors.ECODE_EXISTS)
10718

    
10719
    if self.op.ndparams:
10720
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10721

    
10722
  def BuildHooksEnv(self):
10723
    """Build hooks env.
10724

10725
    """
10726
    return {
10727
      "GROUP_NAME": self.op.group_name,
10728
      }
10729

    
10730
  def BuildHooksNodes(self):
10731
    """Build hooks nodes.
10732

10733
    """
10734
    mn = self.cfg.GetMasterNode()
10735
    return ([mn], [mn])
10736

    
10737
  def Exec(self, feedback_fn):
10738
    """Add the node group to the cluster.
10739

10740
    """
10741
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10742
                                  uuid=self.group_uuid,
10743
                                  alloc_policy=self.op.alloc_policy,
10744
                                  ndparams=self.op.ndparams)
10745

    
10746
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10747
    del self.remove_locks[locking.LEVEL_NODEGROUP]
10748

    
10749

    
10750
class LUGroupAssignNodes(NoHooksLU):
10751
  """Logical unit for assigning nodes to groups.
10752

10753
  """
10754
  REQ_BGL = False
10755

    
10756
  def ExpandNames(self):
10757
    # These raise errors.OpPrereqError on their own:
10758
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10759
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10760

    
10761
    # We want to lock all the affected nodes and groups. We have readily
10762
    # available the list of nodes, and the *destination* group. To gather the
10763
    # list of "source" groups, we need to fetch node information.
10764
    self.node_data = self.cfg.GetAllNodesInfo()
10765
    affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10766
    affected_groups.add(self.group_uuid)
10767

    
10768
    self.needed_locks = {
10769
      locking.LEVEL_NODEGROUP: list(affected_groups),
10770
      locking.LEVEL_NODE: self.op.nodes,
10771
      }
10772

    
10773
  def CheckPrereq(self):
10774
    """Check prerequisites.
10775

10776
    """
10777
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10778
    instance_data = self.cfg.GetAllInstancesInfo()
10779

    
10780
    if self.group is None:
10781
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10782
                               (self.op.group_name, self.group_uuid))
10783

    
10784
    (new_splits, previous_splits) = \
10785
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10786
                                             for node in self.op.nodes],
10787
                                            self.node_data, instance_data)
10788

    
10789
    if new_splits:
10790
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10791

    
10792
      if not self.op.force:
10793
        raise errors.OpExecError("The following instances get split by this"
10794
                                 " change and --force was not given: %s" %
10795
                                 fmt_new_splits)
10796
      else:
10797
        self.LogWarning("This operation will split the following instances: %s",
10798
                        fmt_new_splits)
10799

    
10800
        if previous_splits:
10801
          self.LogWarning("In addition, these already-split instances continue"
10802
                          " to be spit across groups: %s",
10803
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
10804

    
10805
  def Exec(self, feedback_fn):
10806
    """Assign nodes to a new group.
10807

10808
    """
10809
    for node in self.op.nodes:
10810
      self.node_data[node].group = self.group_uuid
10811

    
10812
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10813

    
10814
  @staticmethod
10815
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10816
    """Check for split instances after a node assignment.
10817

10818
    This method considers a series of node assignments as an atomic operation,
10819
    and returns information about split instances after applying the set of
10820
    changes.
10821

10822
    In particular, it returns information about newly split instances, and
10823
    instances that were already split, and remain so after the change.
10824

10825
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10826
    considered.
10827

10828
    @type changes: list of (node_name, new_group_uuid) pairs.
10829
    @param changes: list of node assignments to consider.
10830
    @param node_data: a dict with data for all nodes
10831
    @param instance_data: a dict with all instances to consider
10832
    @rtype: a two-tuple
10833
    @return: a list of instances that were previously okay and result split as a
10834
      consequence of this change, and a list of instances that were previously
10835
      split and this change does not fix.
10836

10837
    """
10838
    changed_nodes = dict((node, group) for node, group in changes
10839
                         if node_data[node].group != group)
10840

    
10841
    all_split_instances = set()
10842
    previously_split_instances = set()
10843

    
10844
    def InstanceNodes(instance):
10845
      return [instance.primary_node] + list(instance.secondary_nodes)
10846

    
10847
    for inst in instance_data.values():
10848
      if inst.disk_template not in constants.DTS_INT_MIRROR:
10849
        continue
10850

    
10851
      instance_nodes = InstanceNodes(inst)
10852

    
10853
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
10854
        previously_split_instances.add(inst.name)
10855

    
10856
      if len(set(changed_nodes.get(node, node_data[node].group)
10857
                 for node in instance_nodes)) > 1:
10858
        all_split_instances.add(inst.name)
10859

    
10860
    return (list(all_split_instances - previously_split_instances),
10861
            list(previously_split_instances & all_split_instances))
10862

    
10863

    
10864
class _GroupQuery(_QueryBase):
10865
  FIELDS = query.GROUP_FIELDS
10866

    
10867
  def ExpandNames(self, lu):
10868
    lu.needed_locks = {}
10869

    
10870
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10871
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10872

    
10873
    if not self.names:
10874
      self.wanted = [name_to_uuid[name]
10875
                     for name in utils.NiceSort(name_to_uuid.keys())]
10876
    else:
10877
      # Accept names to be either names or UUIDs.
10878
      missing = []
10879
      self.wanted = []
10880
      all_uuid = frozenset(self._all_groups.keys())
10881

    
10882
      for name in self.names:
10883
        if name in all_uuid:
10884
          self.wanted.append(name)
10885
        elif name in name_to_uuid:
10886
          self.wanted.append(name_to_uuid[name])
10887
        else:
10888
          missing.append(name)
10889

    
10890
      if missing:
10891
        raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10892
                                   errors.ECODE_NOENT)
10893

    
10894
  def DeclareLocks(self, lu, level):
10895
    pass
10896

    
10897
  def _GetQueryData(self, lu):
10898
    """Computes the list of node groups and their attributes.
10899

10900
    """
10901
    do_nodes = query.GQ_NODE in self.requested_data
10902
    do_instances = query.GQ_INST in self.requested_data
10903

    
10904
    group_to_nodes = None
10905
    group_to_instances = None
10906

    
10907
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10908
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10909
    # latter GetAllInstancesInfo() is not enough, for we have to go through
10910
    # instance->node. Hence, we will need to process nodes even if we only need
10911
    # instance information.
10912
    if do_nodes or do_instances:
10913
      all_nodes = lu.cfg.GetAllNodesInfo()
10914
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10915
      node_to_group = {}
10916

    
10917
      for node in all_nodes.values():
10918
        if node.group in group_to_nodes:
10919
          group_to_nodes[node.group].append(node.name)
10920
          node_to_group[node.name] = node.group
10921

    
10922
      if do_instances:
10923
        all_instances = lu.cfg.GetAllInstancesInfo()
10924
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
10925

    
10926
        for instance in all_instances.values():
10927
          node = instance.primary_node
10928
          if node in node_to_group:
10929
            group_to_instances[node_to_group[node]].append(instance.name)
10930

    
10931
        if not do_nodes:
10932
          # Do not pass on node information if it was not requested.
10933
          group_to_nodes = None
10934

    
10935
    return query.GroupQueryData([self._all_groups[uuid]
10936
                                 for uuid in self.wanted],
10937
                                group_to_nodes, group_to_instances)
10938

    
10939

    
10940
class LUGroupQuery(NoHooksLU):
10941
  """Logical unit for querying node groups.
10942

10943
  """
10944
  REQ_BGL = False
10945

    
10946
  def CheckArguments(self):
10947
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
10948
                          self.op.output_fields, False)
10949

    
10950
  def ExpandNames(self):
10951
    self.gq.ExpandNames(self)
10952

    
10953
  def Exec(self, feedback_fn):
10954
    return self.gq.OldStyleQuery(self)
10955

    
10956

    
10957
class LUGroupSetParams(LogicalUnit):
10958
  """Modifies the parameters of a node group.
10959

10960
  """
10961
  HPATH = "group-modify"
10962
  HTYPE = constants.HTYPE_GROUP
10963
  REQ_BGL = False
10964

    
10965
  def CheckArguments(self):
10966
    all_changes = [
10967
      self.op.ndparams,
10968
      self.op.alloc_policy,
10969
      ]
10970

    
10971
    if all_changes.count(None) == len(all_changes):
10972
      raise errors.OpPrereqError("Please pass at least one modification",
10973
                                 errors.ECODE_INVAL)
10974

    
10975
  def ExpandNames(self):
10976
    # This raises errors.OpPrereqError on its own:
10977
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10978

    
10979
    self.needed_locks = {
10980
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10981
      }
10982

    
10983
  def CheckPrereq(self):
10984
    """Check prerequisites.
10985

10986
    """
10987
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10988

    
10989
    if self.group is None:
10990
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10991
                               (self.op.group_name, self.group_uuid))
10992

    
10993
    if self.op.ndparams:
10994
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10995
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10996
      self.new_ndparams = new_ndparams
10997

    
10998
  def BuildHooksEnv(self):
10999
    """Build hooks env.
11000

11001
    """
11002
    return {
11003
      "GROUP_NAME": self.op.group_name,
11004
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
11005
      }
11006

    
11007
  def BuildHooksNodes(self):
11008
    """Build hooks nodes.
11009

11010
    """
11011
    mn = self.cfg.GetMasterNode()
11012
    return ([mn], [mn])
11013

    
11014
  def Exec(self, feedback_fn):
11015
    """Modifies the node group.
11016

11017
    """
11018
    result = []
11019

    
11020
    if self.op.ndparams:
11021
      self.group.ndparams = self.new_ndparams
11022
      result.append(("ndparams", str(self.group.ndparams)))
11023

    
11024
    if self.op.alloc_policy:
11025
      self.group.alloc_policy = self.op.alloc_policy
11026

    
11027
    self.cfg.Update(self.group, feedback_fn)
11028
    return result
11029

    
11030

    
11031

    
11032
class LUGroupRemove(LogicalUnit):
11033
  HPATH = "group-remove"
11034
  HTYPE = constants.HTYPE_GROUP
11035
  REQ_BGL = False
11036

    
11037
  def ExpandNames(self):
11038
    # This will raises errors.OpPrereqError on its own:
11039
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11040
    self.needed_locks = {
11041
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11042
      }
11043

    
11044
  def CheckPrereq(self):
11045
    """Check prerequisites.
11046

11047
    This checks that the given group name exists as a node group, that is
11048
    empty (i.e., contains no nodes), and that is not the last group of the
11049
    cluster.
11050

11051
    """
11052
    # Verify that the group is empty.
11053
    group_nodes = [node.name
11054
                   for node in self.cfg.GetAllNodesInfo().values()
11055
                   if node.group == self.group_uuid]
11056

    
11057
    if group_nodes:
11058
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
11059
                                 " nodes: %s" %
11060
                                 (self.op.group_name,
11061
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
11062
                                 errors.ECODE_STATE)
11063

    
11064
    # Verify the cluster would not be left group-less.
11065
    if len(self.cfg.GetNodeGroupList()) == 1:
11066
      raise errors.OpPrereqError("Group '%s' is the only group,"
11067
                                 " cannot be removed" %
11068
                                 self.op.group_name,
11069
                                 errors.ECODE_STATE)
11070

    
11071
  def BuildHooksEnv(self):
11072
    """Build hooks env.
11073

11074
    """
11075
    return {
11076
      "GROUP_NAME": self.op.group_name,
11077
      }
11078

    
11079
  def BuildHooksNodes(self):
11080
    """Build hooks nodes.
11081

11082
    """
11083
    mn = self.cfg.GetMasterNode()
11084
    return ([mn], [mn])
11085

    
11086
  def Exec(self, feedback_fn):
11087
    """Remove the node group.
11088

11089
    """
11090
    try:
11091
      self.cfg.RemoveNodeGroup(self.group_uuid)
11092
    except errors.ConfigurationError:
11093
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11094
                               (self.op.group_name, self.group_uuid))
11095

    
11096
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11097

    
11098

    
11099
class LUGroupRename(LogicalUnit):
11100
  HPATH = "group-rename"
11101
  HTYPE = constants.HTYPE_GROUP
11102
  REQ_BGL = False
11103

    
11104
  def ExpandNames(self):
11105
    # This raises errors.OpPrereqError on its own:
11106
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11107

    
11108
    self.needed_locks = {
11109
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11110
      }
11111

    
11112
  def CheckPrereq(self):
11113
    """Check prerequisites.
11114

11115
    Ensures requested new name is not yet used.
11116

11117
    """
11118
    try:
11119
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11120
    except errors.OpPrereqError:
11121
      pass
11122
    else:
11123
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11124
                                 " node group (UUID: %s)" %
11125
                                 (self.op.new_name, new_name_uuid),
11126
                                 errors.ECODE_EXISTS)
11127

    
11128
  def BuildHooksEnv(self):
11129
    """Build hooks env.
11130

11131
    """
11132
    return {
11133
      "OLD_NAME": self.op.group_name,
11134
      "NEW_NAME": self.op.new_name,
11135
      }
11136

    
11137
  def BuildHooksNodes(self):
11138
    """Build hooks nodes.
11139

11140
    """
11141
    mn = self.cfg.GetMasterNode()
11142

    
11143
    all_nodes = self.cfg.GetAllNodesInfo()
11144
    all_nodes.pop(mn, None)
11145

    
11146
    run_nodes = [mn]
11147
    run_nodes.extend(node.name for node in all_nodes.values()
11148
                     if node.group == self.group_uuid)
11149

    
11150
    return (run_nodes, run_nodes)
11151

    
11152
  def Exec(self, feedback_fn):
11153
    """Rename the node group.
11154

11155
    """
11156
    group = self.cfg.GetNodeGroup(self.group_uuid)
11157

    
11158
    if group is None:
11159
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11160
                               (self.op.group_name, self.group_uuid))
11161

    
11162
    group.name = self.op.new_name
11163
    self.cfg.Update(group, feedback_fn)
11164

    
11165
    return self.op.new_name
11166

    
11167

    
11168
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11169
  """Generic tags LU.
11170

11171
  This is an abstract class which is the parent of all the other tags LUs.
11172

11173
  """
11174

    
11175
  def ExpandNames(self):
11176
    self.needed_locks = {}
11177
    if self.op.kind == constants.TAG_NODE:
11178
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11179
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
11180
    elif self.op.kind == constants.TAG_INSTANCE:
11181
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11182
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11183

    
11184
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11185
    # not possible to acquire the BGL based on opcode parameters)
11186

    
11187
  def CheckPrereq(self):
11188
    """Check prerequisites.
11189

11190
    """
11191
    if self.op.kind == constants.TAG_CLUSTER:
11192
      self.target = self.cfg.GetClusterInfo()
11193
    elif self.op.kind == constants.TAG_NODE:
11194
      self.target = self.cfg.GetNodeInfo(self.op.name)
11195
    elif self.op.kind == constants.TAG_INSTANCE:
11196
      self.target = self.cfg.GetInstanceInfo(self.op.name)
11197
    else:
11198
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11199
                                 str(self.op.kind), errors.ECODE_INVAL)
11200

    
11201

    
11202
class LUTagsGet(TagsLU):
11203
  """Returns the tags of a given object.
11204

11205
  """
11206
  REQ_BGL = False
11207

    
11208
  def ExpandNames(self):
11209
    TagsLU.ExpandNames(self)
11210

    
11211
    # Share locks as this is only a read operation
11212
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11213

    
11214
  def Exec(self, feedback_fn):
11215
    """Returns the tag list.
11216

11217
    """
11218
    return list(self.target.GetTags())
11219

    
11220

    
11221
class LUTagsSearch(NoHooksLU):
11222
  """Searches the tags for a given pattern.
11223

11224
  """
11225
  REQ_BGL = False
11226

    
11227
  def ExpandNames(self):
11228
    self.needed_locks = {}
11229

    
11230
  def CheckPrereq(self):
11231
    """Check prerequisites.
11232

11233
    This checks the pattern passed for validity by compiling it.
11234

11235
    """
11236
    try:
11237
      self.re = re.compile(self.op.pattern)
11238
    except re.error, err:
11239
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11240
                                 (self.op.pattern, err), errors.ECODE_INVAL)
11241

    
11242
  def Exec(self, feedback_fn):
11243
    """Returns the tag list.
11244

11245
    """
11246
    cfg = self.cfg
11247
    tgts = [("/cluster", cfg.GetClusterInfo())]
11248
    ilist = cfg.GetAllInstancesInfo().values()
11249
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11250
    nlist = cfg.GetAllNodesInfo().values()
11251
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11252
    results = []
11253
    for path, target in tgts:
11254
      for tag in target.GetTags():
11255
        if self.re.search(tag):
11256
          results.append((path, tag))
11257
    return results
11258

    
11259

    
11260
class LUTagsSet(TagsLU):
11261
  """Sets a tag on a given object.
11262

11263
  """
11264
  REQ_BGL = False
11265

    
11266
  def CheckPrereq(self):
11267
    """Check prerequisites.
11268

11269
    This checks the type and length of the tag name and value.
11270

11271
    """
11272
    TagsLU.CheckPrereq(self)
11273
    for tag in self.op.tags:
11274
      objects.TaggableObject.ValidateTag(tag)
11275

    
11276
  def Exec(self, feedback_fn):
11277
    """Sets the tag.
11278

11279
    """
11280
    try:
11281
      for tag in self.op.tags:
11282
        self.target.AddTag(tag)
11283
    except errors.TagError, err:
11284
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
11285
    self.cfg.Update(self.target, feedback_fn)
11286

    
11287

    
11288
class LUTagsDel(TagsLU):
11289
  """Delete a list of tags from a given object.
11290

11291
  """
11292
  REQ_BGL = False
11293

    
11294
  def CheckPrereq(self):
11295
    """Check prerequisites.
11296

11297
    This checks that we have the given tag.
11298

11299
    """
11300
    TagsLU.CheckPrereq(self)
11301
    for tag in self.op.tags:
11302
      objects.TaggableObject.ValidateTag(tag)
11303
    del_tags = frozenset(self.op.tags)
11304
    cur_tags = self.target.GetTags()
11305

    
11306
    diff_tags = del_tags - cur_tags
11307
    if diff_tags:
11308
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
11309
      raise errors.OpPrereqError("Tag(s) %s not found" %
11310
                                 (utils.CommaJoin(diff_names), ),
11311
                                 errors.ECODE_NOENT)
11312

    
11313
  def Exec(self, feedback_fn):
11314
    """Remove the tag from the object.
11315

11316
    """
11317
    for tag in self.op.tags:
11318
      self.target.RemoveTag(tag)
11319
    self.cfg.Update(self.target, feedback_fn)
11320

    
11321

    
11322
class LUTestDelay(NoHooksLU):
11323
  """Sleep for a specified amount of time.
11324

11325
  This LU sleeps on the master and/or nodes for a specified amount of
11326
  time.
11327

11328
  """
11329
  REQ_BGL = False
11330

    
11331
  def ExpandNames(self):
11332
    """Expand names and set required locks.
11333

11334
    This expands the node list, if any.
11335

11336
    """
11337
    self.needed_locks = {}
11338
    if self.op.on_nodes:
11339
      # _GetWantedNodes can be used here, but is not always appropriate to use
11340
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11341
      # more information.
11342
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11343
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11344

    
11345
  def _TestDelay(self):
11346
    """Do the actual sleep.
11347

11348
    """
11349
    if self.op.on_master:
11350
      if not utils.TestDelay(self.op.duration):
11351
        raise errors.OpExecError("Error during master delay test")
11352
    if self.op.on_nodes:
11353
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11354
      for node, node_result in result.items():
11355
        node_result.Raise("Failure during rpc call to node %s" % node)
11356

    
11357
  def Exec(self, feedback_fn):
11358
    """Execute the test delay opcode, with the wanted repetitions.
11359

11360
    """
11361
    if self.op.repeat == 0:
11362
      self._TestDelay()
11363
    else:
11364
      top_value = self.op.repeat - 1
11365
      for i in range(self.op.repeat):
11366
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11367
        self._TestDelay()
11368

    
11369

    
11370
class LUTestJqueue(NoHooksLU):
11371
  """Utility LU to test some aspects of the job queue.
11372

11373
  """
11374
  REQ_BGL = False
11375

    
11376
  # Must be lower than default timeout for WaitForJobChange to see whether it
11377
  # notices changed jobs
11378
  _CLIENT_CONNECT_TIMEOUT = 20.0
11379
  _CLIENT_CONFIRM_TIMEOUT = 60.0
11380

    
11381
  @classmethod
11382
  def _NotifyUsingSocket(cls, cb, errcls):
11383
    """Opens a Unix socket and waits for another program to connect.
11384

11385
    @type cb: callable
11386
    @param cb: Callback to send socket name to client
11387
    @type errcls: class
11388
    @param errcls: Exception class to use for errors
11389

11390
    """
11391
    # Using a temporary directory as there's no easy way to create temporary
11392
    # sockets without writing a custom loop around tempfile.mktemp and
11393
    # socket.bind
11394
    tmpdir = tempfile.mkdtemp()
11395
    try:
11396
      tmpsock = utils.PathJoin(tmpdir, "sock")
11397

    
11398
      logging.debug("Creating temporary socket at %s", tmpsock)
11399
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11400
      try:
11401
        sock.bind(tmpsock)
11402
        sock.listen(1)
11403

    
11404
        # Send details to client
11405
        cb(tmpsock)
11406

    
11407
        # Wait for client to connect before continuing
11408
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11409
        try:
11410
          (conn, _) = sock.accept()
11411
        except socket.error, err:
11412
          raise errcls("Client didn't connect in time (%s)" % err)
11413
      finally:
11414
        sock.close()
11415
    finally:
11416
      # Remove as soon as client is connected
11417
      shutil.rmtree(tmpdir)
11418

    
11419
    # Wait for client to close
11420
    try:
11421
      try:
11422
        # pylint: disable-msg=E1101
11423
        # Instance of '_socketobject' has no ... member
11424
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11425
        conn.recv(1)
11426
      except socket.error, err:
11427
        raise errcls("Client failed to confirm notification (%s)" % err)
11428
    finally:
11429
      conn.close()
11430

    
11431
  def _SendNotification(self, test, arg, sockname):
11432
    """Sends a notification to the client.
11433

11434
    @type test: string
11435
    @param test: Test name
11436
    @param arg: Test argument (depends on test)
11437
    @type sockname: string
11438
    @param sockname: Socket path
11439

11440
    """
11441
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11442

    
11443
  def _Notify(self, prereq, test, arg):
11444
    """Notifies the client of a test.
11445

11446
    @type prereq: bool
11447
    @param prereq: Whether this is a prereq-phase test
11448
    @type test: string
11449
    @param test: Test name
11450
    @param arg: Test argument (depends on test)
11451

11452
    """
11453
    if prereq:
11454
      errcls = errors.OpPrereqError
11455
    else:
11456
      errcls = errors.OpExecError
11457

    
11458
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11459
                                                  test, arg),
11460
                                   errcls)
11461

    
11462
  def CheckArguments(self):
11463
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11464
    self.expandnames_calls = 0
11465

    
11466
  def ExpandNames(self):
11467
    checkargs_calls = getattr(self, "checkargs_calls", 0)
11468
    if checkargs_calls < 1:
11469
      raise errors.ProgrammerError("CheckArguments was not called")
11470

    
11471
    self.expandnames_calls += 1
11472

    
11473
    if self.op.notify_waitlock:
11474
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
11475

    
11476
    self.LogInfo("Expanding names")
11477

    
11478
    # Get lock on master node (just to get a lock, not for a particular reason)
11479
    self.needed_locks = {
11480
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11481
      }
11482

    
11483
  def Exec(self, feedback_fn):
11484
    if self.expandnames_calls < 1:
11485
      raise errors.ProgrammerError("ExpandNames was not called")
11486

    
11487
    if self.op.notify_exec:
11488
      self._Notify(False, constants.JQT_EXEC, None)
11489

    
11490
    self.LogInfo("Executing")
11491

    
11492
    if self.op.log_messages:
11493
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11494
      for idx, msg in enumerate(self.op.log_messages):
11495
        self.LogInfo("Sending log message %s", idx + 1)
11496
        feedback_fn(constants.JQT_MSGPREFIX + msg)
11497
        # Report how many test messages have been sent
11498
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11499

    
11500
    if self.op.fail:
11501
      raise errors.OpExecError("Opcode failure was requested")
11502

    
11503
    return True
11504

    
11505

    
11506
class IAllocator(object):
11507
  """IAllocator framework.
11508

11509
  An IAllocator instance has three sets of attributes:
11510
    - cfg that is needed to query the cluster
11511
    - input data (all members of the _KEYS class attribute are required)
11512
    - four buffer attributes (in|out_data|text), that represent the
11513
      input (to the external script) in text and data structure format,
11514
      and the output from it, again in two formats
11515
    - the result variables from the script (success, info, nodes) for
11516
      easy usage
11517

11518
  """
11519
  # pylint: disable-msg=R0902
11520
  # lots of instance attributes
11521
  _ALLO_KEYS = [
11522
    "name", "mem_size", "disks", "disk_template",
11523
    "os", "tags", "nics", "vcpus", "hypervisor",
11524
    ]
11525
  _RELO_KEYS = [
11526
    "name", "relocate_from",
11527
    ]
11528
  _EVAC_KEYS = [
11529
    "evac_nodes",
11530
    ]
11531

    
11532
  def __init__(self, cfg, rpc, mode, **kwargs):
11533
    self.cfg = cfg
11534
    self.rpc = rpc
11535
    # init buffer variables
11536
    self.in_text = self.out_text = self.in_data = self.out_data = None
11537
    # init all input fields so that pylint is happy
11538
    self.mode = mode
11539
    self.mem_size = self.disks = self.disk_template = None
11540
    self.os = self.tags = self.nics = self.vcpus = None
11541
    self.hypervisor = None
11542
    self.relocate_from = None
11543
    self.name = None
11544
    self.evac_nodes = None
11545
    # computed fields
11546
    self.required_nodes = None
11547
    # init result fields
11548
    self.success = self.info = self.result = None
11549
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11550
      keyset = self._ALLO_KEYS
11551
      fn = self._AddNewInstance
11552
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11553
      keyset = self._RELO_KEYS
11554
      fn = self._AddRelocateInstance
11555
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11556
      keyset = self._EVAC_KEYS
11557
      fn = self._AddEvacuateNodes
11558
    else:
11559
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11560
                                   " IAllocator" % self.mode)
11561
    for key in kwargs:
11562
      if key not in keyset:
11563
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
11564
                                     " IAllocator" % key)
11565
      setattr(self, key, kwargs[key])
11566

    
11567
    for key in keyset:
11568
      if key not in kwargs:
11569
        raise errors.ProgrammerError("Missing input parameter '%s' to"
11570
                                     " IAllocator" % key)
11571
    self._BuildInputData(fn)
11572

    
11573
  def _ComputeClusterData(self):
11574
    """Compute the generic allocator input data.
11575

11576
    This is the data that is independent of the actual operation.
11577

11578
    """
11579
    cfg = self.cfg
11580
    cluster_info = cfg.GetClusterInfo()
11581
    # cluster data
11582
    data = {
11583
      "version": constants.IALLOCATOR_VERSION,
11584
      "cluster_name": cfg.GetClusterName(),
11585
      "cluster_tags": list(cluster_info.GetTags()),
11586
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11587
      # we don't have job IDs
11588
      }
11589
    ninfo = cfg.GetAllNodesInfo()
11590
    iinfo = cfg.GetAllInstancesInfo().values()
11591
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11592

    
11593
    # node data
11594
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
11595

    
11596
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11597
      hypervisor_name = self.hypervisor
11598
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11599
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11600
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11601
      hypervisor_name = cluster_info.enabled_hypervisors[0]
11602

    
11603
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11604
                                        hypervisor_name)
11605
    node_iinfo = \
11606
      self.rpc.call_all_instances_info(node_list,
11607
                                       cluster_info.enabled_hypervisors)
11608

    
11609
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11610

    
11611
    config_ndata = self._ComputeBasicNodeData(ninfo)
11612
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11613
                                                 i_list, config_ndata)
11614
    assert len(data["nodes"]) == len(ninfo), \
11615
        "Incomplete node data computed"
11616

    
11617
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11618

    
11619
    self.in_data = data
11620

    
11621
  @staticmethod
11622
  def _ComputeNodeGroupData(cfg):
11623
    """Compute node groups data.
11624

11625
    """
11626
    ng = {}
11627
    for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11628
      ng[guuid] = {
11629
        "name": gdata.name,
11630
        "alloc_policy": gdata.alloc_policy,
11631
        }
11632
    return ng
11633

    
11634
  @staticmethod
11635
  def _ComputeBasicNodeData(node_cfg):
11636
    """Compute global node data.
11637

11638
    @rtype: dict
11639
    @returns: a dict of name: (node dict, node config)
11640

11641
    """
11642
    node_results = {}
11643
    for ninfo in node_cfg.values():
11644
      # fill in static (config-based) values
11645
      pnr = {
11646
        "tags": list(ninfo.GetTags()),
11647
        "primary_ip": ninfo.primary_ip,
11648
        "secondary_ip": ninfo.secondary_ip,
11649
        "offline": ninfo.offline,
11650
        "drained": ninfo.drained,
11651
        "master_candidate": ninfo.master_candidate,
11652
        "group": ninfo.group,
11653
        "master_capable": ninfo.master_capable,
11654
        "vm_capable": ninfo.vm_capable,
11655
        }
11656

    
11657
      node_results[ninfo.name] = pnr
11658

    
11659
    return node_results
11660

    
11661
  @staticmethod
11662
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11663
                              node_results):
11664
    """Compute global node data.
11665

11666
    @param node_results: the basic node structures as filled from the config
11667

11668
    """
11669
    # make a copy of the current dict
11670
    node_results = dict(node_results)
11671
    for nname, nresult in node_data.items():
11672
      assert nname in node_results, "Missing basic data for node %s" % nname
11673
      ninfo = node_cfg[nname]
11674

    
11675
      if not (ninfo.offline or ninfo.drained):
11676
        nresult.Raise("Can't get data for node %s" % nname)
11677
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11678
                                nname)
11679
        remote_info = nresult.payload
11680

    
11681
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
11682
                     'vg_size', 'vg_free', 'cpu_total']:
11683
          if attr not in remote_info:
11684
            raise errors.OpExecError("Node '%s' didn't return attribute"
11685
                                     " '%s'" % (nname, attr))
11686
          if not isinstance(remote_info[attr], int):
11687
            raise errors.OpExecError("Node '%s' returned invalid value"
11688
                                     " for '%s': %s" %
11689
                                     (nname, attr, remote_info[attr]))
11690
        # compute memory used by primary instances
11691
        i_p_mem = i_p_up_mem = 0
11692
        for iinfo, beinfo in i_list:
11693
          if iinfo.primary_node == nname:
11694
            i_p_mem += beinfo[constants.BE_MEMORY]
11695
            if iinfo.name not in node_iinfo[nname].payload:
11696
              i_used_mem = 0
11697
            else:
11698
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11699
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11700
            remote_info['memory_free'] -= max(0, i_mem_diff)
11701

    
11702
            if iinfo.admin_up:
11703
              i_p_up_mem += beinfo[constants.BE_MEMORY]
11704

    
11705
        # compute memory used by instances
11706
        pnr_dyn = {
11707
          "total_memory": remote_info['memory_total'],
11708
          "reserved_memory": remote_info['memory_dom0'],
11709
          "free_memory": remote_info['memory_free'],
11710
          "total_disk": remote_info['vg_size'],
11711
          "free_disk": remote_info['vg_free'],
11712
          "total_cpus": remote_info['cpu_total'],
11713
          "i_pri_memory": i_p_mem,
11714
          "i_pri_up_memory": i_p_up_mem,
11715
          }
11716
        pnr_dyn.update(node_results[nname])
11717
        node_results[nname] = pnr_dyn
11718

    
11719
    return node_results
11720

    
11721
  @staticmethod
11722
  def _ComputeInstanceData(cluster_info, i_list):
11723
    """Compute global instance data.
11724

11725
    """
11726
    instance_data = {}
11727
    for iinfo, beinfo in i_list:
11728
      nic_data = []
11729
      for nic in iinfo.nics:
11730
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11731
        nic_dict = {"mac": nic.mac,
11732
                    "ip": nic.ip,
11733
                    "mode": filled_params[constants.NIC_MODE],
11734
                    "link": filled_params[constants.NIC_LINK],
11735
                   }
11736
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11737
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11738
        nic_data.append(nic_dict)
11739
      pir = {
11740
        "tags": list(iinfo.GetTags()),
11741
        "admin_up": iinfo.admin_up,
11742
        "vcpus": beinfo[constants.BE_VCPUS],
11743
        "memory": beinfo[constants.BE_MEMORY],
11744
        "os": iinfo.os,
11745
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11746
        "nics": nic_data,
11747
        "disks": [{constants.IDISK_SIZE: dsk.size,
11748
                   constants.IDISK_MODE: dsk.mode}
11749
                  for dsk in iinfo.disks],
11750
        "disk_template": iinfo.disk_template,
11751
        "hypervisor": iinfo.hypervisor,
11752
        }
11753
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11754
                                                 pir["disks"])
11755
      instance_data[iinfo.name] = pir
11756

    
11757
    return instance_data
11758

    
11759
  def _AddNewInstance(self):
11760
    """Add new instance data to allocator structure.
11761

11762
    This in combination with _AllocatorGetClusterData will create the
11763
    correct structure needed as input for the allocator.
11764

11765
    The checks for the completeness of the opcode must have already been
11766
    done.
11767

11768
    """
11769
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11770

    
11771
    if self.disk_template in constants.DTS_INT_MIRROR:
11772
      self.required_nodes = 2
11773
    else:
11774
      self.required_nodes = 1
11775
    request = {
11776
      "name": self.name,
11777
      "disk_template": self.disk_template,
11778
      "tags": self.tags,
11779
      "os": self.os,
11780
      "vcpus": self.vcpus,
11781
      "memory": self.mem_size,
11782
      "disks": self.disks,
11783
      "disk_space_total": disk_space,
11784
      "nics": self.nics,
11785
      "required_nodes": self.required_nodes,
11786
      }
11787
    return request
11788

    
11789
  def _AddRelocateInstance(self):
11790
    """Add relocate instance data to allocator structure.
11791

11792
    This in combination with _IAllocatorGetClusterData will create the
11793
    correct structure needed as input for the allocator.
11794

11795
    The checks for the completeness of the opcode must have already been
11796
    done.
11797

11798
    """
11799
    instance = self.cfg.GetInstanceInfo(self.name)
11800
    if instance is None:
11801
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
11802
                                   " IAllocator" % self.name)
11803

    
11804
    if instance.disk_template not in constants.DTS_MIRRORED:
11805
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11806
                                 errors.ECODE_INVAL)
11807

    
11808
    if instance.disk_template in constants.DTS_INT_MIRROR and \
11809
        len(instance.secondary_nodes) != 1:
11810
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
11811
                                 errors.ECODE_STATE)
11812

    
11813
    self.required_nodes = 1
11814
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
11815
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11816

    
11817
    request = {
11818
      "name": self.name,
11819
      "disk_space_total": disk_space,
11820
      "required_nodes": self.required_nodes,
11821
      "relocate_from": self.relocate_from,
11822
      }
11823
    return request
11824

    
11825
  def _AddEvacuateNodes(self):
11826
    """Add evacuate nodes data to allocator structure.
11827

11828
    """
11829
    request = {
11830
      "evac_nodes": self.evac_nodes
11831
      }
11832
    return request
11833

    
11834
  def _BuildInputData(self, fn):
11835
    """Build input data structures.
11836

11837
    """
11838
    self._ComputeClusterData()
11839

    
11840
    request = fn()
11841
    request["type"] = self.mode
11842
    self.in_data["request"] = request
11843

    
11844
    self.in_text = serializer.Dump(self.in_data)
11845

    
11846
  def Run(self, name, validate=True, call_fn=None):
11847
    """Run an instance allocator and return the results.
11848

11849
    """
11850
    if call_fn is None:
11851
      call_fn = self.rpc.call_iallocator_runner
11852

    
11853
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11854
    result.Raise("Failure while running the iallocator script")
11855

    
11856
    self.out_text = result.payload
11857
    if validate:
11858
      self._ValidateResult()
11859

    
11860
  def _ValidateResult(self):
11861
    """Process the allocator results.
11862

11863
    This will process and if successful save the result in
11864
    self.out_data and the other parameters.
11865

11866
    """
11867
    try:
11868
      rdict = serializer.Load(self.out_text)
11869
    except Exception, err:
11870
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11871

    
11872
    if not isinstance(rdict, dict):
11873
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
11874

    
11875
    # TODO: remove backwards compatiblity in later versions
11876
    if "nodes" in rdict and "result" not in rdict:
11877
      rdict["result"] = rdict["nodes"]
11878
      del rdict["nodes"]
11879

    
11880
    for key in "success", "info", "result":
11881
      if key not in rdict:
11882
        raise errors.OpExecError("Can't parse iallocator results:"
11883
                                 " missing key '%s'" % key)
11884
      setattr(self, key, rdict[key])
11885

    
11886
    if not isinstance(rdict["result"], list):
11887
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11888
                               " is not a list")
11889
    self.out_data = rdict
11890

    
11891

    
11892
class LUTestAllocator(NoHooksLU):
11893
  """Run allocator tests.
11894

11895
  This LU runs the allocator tests
11896

11897
  """
11898
  def CheckPrereq(self):
11899
    """Check prerequisites.
11900

11901
    This checks the opcode parameters depending on the director and mode test.
11902

11903
    """
11904
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11905
      for attr in ["mem_size", "disks", "disk_template",
11906
                   "os", "tags", "nics", "vcpus"]:
11907
        if not hasattr(self.op, attr):
11908
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11909
                                     attr, errors.ECODE_INVAL)
11910
      iname = self.cfg.ExpandInstanceName(self.op.name)
11911
      if iname is not None:
11912
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11913
                                   iname, errors.ECODE_EXISTS)
11914
      if not isinstance(self.op.nics, list):
11915
        raise errors.OpPrereqError("Invalid parameter 'nics'",
11916
                                   errors.ECODE_INVAL)
11917
      if not isinstance(self.op.disks, list):
11918
        raise errors.OpPrereqError("Invalid parameter 'disks'",
11919
                                   errors.ECODE_INVAL)
11920
      for row in self.op.disks:
11921
        if (not isinstance(row, dict) or
11922
            "size" not in row or
11923
            not isinstance(row["size"], int) or
11924
            "mode" not in row or
11925
            row["mode"] not in ['r', 'w']):
11926
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
11927
                                     " parameter", errors.ECODE_INVAL)
11928
      if self.op.hypervisor is None:
11929
        self.op.hypervisor = self.cfg.GetHypervisorType()
11930
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11931
      fname = _ExpandInstanceName(self.cfg, self.op.name)
11932
      self.op.name = fname
11933
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11934
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11935
      if not hasattr(self.op, "evac_nodes"):
11936
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11937
                                   " opcode input", errors.ECODE_INVAL)
11938
    else:
11939
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11940
                                 self.op.mode, errors.ECODE_INVAL)
11941

    
11942
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11943
      if self.op.allocator is None:
11944
        raise errors.OpPrereqError("Missing allocator name",
11945
                                   errors.ECODE_INVAL)
11946
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11947
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
11948
                                 self.op.direction, errors.ECODE_INVAL)
11949

    
11950
  def Exec(self, feedback_fn):
11951
    """Run the allocator test.
11952

11953
    """
11954
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11955
      ial = IAllocator(self.cfg, self.rpc,
11956
                       mode=self.op.mode,
11957
                       name=self.op.name,
11958
                       mem_size=self.op.mem_size,
11959
                       disks=self.op.disks,
11960
                       disk_template=self.op.disk_template,
11961
                       os=self.op.os,
11962
                       tags=self.op.tags,
11963
                       nics=self.op.nics,
11964
                       vcpus=self.op.vcpus,
11965
                       hypervisor=self.op.hypervisor,
11966
                       )
11967
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11968
      ial = IAllocator(self.cfg, self.rpc,
11969
                       mode=self.op.mode,
11970
                       name=self.op.name,
11971
                       relocate_from=list(self.relocate_from),
11972
                       )
11973
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11974
      ial = IAllocator(self.cfg, self.rpc,
11975
                       mode=self.op.mode,
11976
                       evac_nodes=self.op.evac_nodes)
11977
    else:
11978
      raise errors.ProgrammerError("Uncatched mode %s in"
11979
                                   " LUTestAllocator.Exec", self.op.mode)
11980

    
11981
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
11982
      result = ial.in_text
11983
    else:
11984
      ial.Run(self.op.allocator, validate=False)
11985
      result = ial.out_text
11986
    return result
11987

    
11988

    
11989
#: Query type implementations
11990
_QUERY_IMPL = {
11991
  constants.QR_INSTANCE: _InstanceQuery,
11992
  constants.QR_NODE: _NodeQuery,
11993
  constants.QR_GROUP: _GroupQuery,
11994
  constants.QR_OS: _OsQuery,
11995
  }
11996

    
11997
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
11998

    
11999

    
12000
def _GetQueryImplementation(name):
12001
  """Returns the implemtnation for a query type.
12002

12003
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
12004

12005
  """
12006
  try:
12007
    return _QUERY_IMPL[name]
12008
  except KeyError:
12009
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12010
                               errors.ECODE_INVAL)