Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 5ed4c956

History | View | Annotate | Download (425.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import query
58
from ganeti import qlang
59
from ganeti import opcodes
60

    
61
import ganeti.masterd.instance # pylint: disable-msg=W0611
62

    
63

    
64
def _SupportsOob(cfg, node):
65
  """Tells if node supports OOB.
66

67
  @type cfg: L{config.ConfigWriter}
68
  @param cfg: The cluster configuration
69
  @type node: L{objects.Node}
70
  @param node: The node
71
  @return: The OOB script if supported or an empty string otherwise
72

73
  """
74
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
75

    
76

    
77
class ResultWithJobs:
78
  """Data container for LU results with jobs.
79

80
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
81
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
82
  contained in the C{jobs} attribute and include the job IDs in the opcode
83
  result.
84

85
  """
86
  def __init__(self, jobs, **kwargs):
87
    """Initializes this class.
88

89
    Additional return values can be specified as keyword arguments.
90

91
    @type jobs: list of lists of L{opcode.OpCode}
92
    @param jobs: A list of lists of opcode objects
93

94
    """
95
    self.jobs = jobs
96
    self.other = kwargs
97

    
98

    
99
class LogicalUnit(object):
100
  """Logical Unit base class.
101

102
  Subclasses must follow these rules:
103
    - implement ExpandNames
104
    - implement CheckPrereq (except when tasklets are used)
105
    - implement Exec (except when tasklets are used)
106
    - implement BuildHooksEnv
107
    - implement BuildHooksNodes
108
    - redefine HPATH and HTYPE
109
    - optionally redefine their run requirements:
110
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
111

112
  Note that all commands require root permissions.
113

114
  @ivar dry_run_result: the value (if any) that will be returned to the caller
115
      in dry-run mode (signalled by opcode dry_run parameter)
116

117
  """
118
  HPATH = None
119
  HTYPE = None
120
  REQ_BGL = True
121

    
122
  def __init__(self, processor, op, context, rpc):
123
    """Constructor for LogicalUnit.
124

125
    This needs to be overridden in derived classes in order to check op
126
    validity.
127

128
    """
129
    self.proc = processor
130
    self.op = op
131
    self.cfg = context.cfg
132
    self.glm = context.glm
133
    self.context = context
134
    self.rpc = rpc
135
    # Dicts used to declare locking needs to mcpu
136
    self.needed_locks = None
137
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
138
    self.add_locks = {}
139
    self.remove_locks = {}
140
    # Used to force good behavior when calling helper functions
141
    self.recalculate_locks = {}
142
    # logging
143
    self.Log = processor.Log # pylint: disable-msg=C0103
144
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
145
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
146
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
147
    # support for dry-run
148
    self.dry_run_result = None
149
    # support for generic debug attribute
150
    if (not hasattr(self.op, "debug_level") or
151
        not isinstance(self.op.debug_level, int)):
152
      self.op.debug_level = 0
153

    
154
    # Tasklets
155
    self.tasklets = None
156

    
157
    # Validate opcode parameters and set defaults
158
    self.op.Validate(True)
159

    
160
    self.CheckArguments()
161

    
162
  def CheckArguments(self):
163
    """Check syntactic validity for the opcode arguments.
164

165
    This method is for doing a simple syntactic check and ensure
166
    validity of opcode parameters, without any cluster-related
167
    checks. While the same can be accomplished in ExpandNames and/or
168
    CheckPrereq, doing these separate is better because:
169

170
      - ExpandNames is left as as purely a lock-related function
171
      - CheckPrereq is run after we have acquired locks (and possible
172
        waited for them)
173

174
    The function is allowed to change the self.op attribute so that
175
    later methods can no longer worry about missing parameters.
176

177
    """
178
    pass
179

    
180
  def ExpandNames(self):
181
    """Expand names for this LU.
182

183
    This method is called before starting to execute the opcode, and it should
184
    update all the parameters of the opcode to their canonical form (e.g. a
185
    short node name must be fully expanded after this method has successfully
186
    completed). This way locking, hooks, logging, etc. can work correctly.
187

188
    LUs which implement this method must also populate the self.needed_locks
189
    member, as a dict with lock levels as keys, and a list of needed lock names
190
    as values. Rules:
191

192
      - use an empty dict if you don't need any lock
193
      - if you don't need any lock at a particular level omit that level
194
      - don't put anything for the BGL level
195
      - if you want all locks at a level use locking.ALL_SET as a value
196

197
    If you need to share locks (rather than acquire them exclusively) at one
198
    level you can modify self.share_locks, setting a true value (usually 1) for
199
    that level. By default locks are not shared.
200

201
    This function can also define a list of tasklets, which then will be
202
    executed in order instead of the usual LU-level CheckPrereq and Exec
203
    functions, if those are not defined by the LU.
204

205
    Examples::
206

207
      # Acquire all nodes and one instance
208
      self.needed_locks = {
209
        locking.LEVEL_NODE: locking.ALL_SET,
210
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
211
      }
212
      # Acquire just two nodes
213
      self.needed_locks = {
214
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
215
      }
216
      # Acquire no locks
217
      self.needed_locks = {} # No, you can't leave it to the default value None
218

219
    """
220
    # The implementation of this method is mandatory only if the new LU is
221
    # concurrent, so that old LUs don't need to be changed all at the same
222
    # time.
223
    if self.REQ_BGL:
224
      self.needed_locks = {} # Exclusive LUs don't need locks.
225
    else:
226
      raise NotImplementedError
227

    
228
  def DeclareLocks(self, level):
229
    """Declare LU locking needs for a level
230

231
    While most LUs can just declare their locking needs at ExpandNames time,
232
    sometimes there's the need to calculate some locks after having acquired
233
    the ones before. This function is called just before acquiring locks at a
234
    particular level, but after acquiring the ones at lower levels, and permits
235
    such calculations. It can be used to modify self.needed_locks, and by
236
    default it does nothing.
237

238
    This function is only called if you have something already set in
239
    self.needed_locks for the level.
240

241
    @param level: Locking level which is going to be locked
242
    @type level: member of ganeti.locking.LEVELS
243

244
    """
245

    
246
  def CheckPrereq(self):
247
    """Check prerequisites for this LU.
248

249
    This method should check that the prerequisites for the execution
250
    of this LU are fulfilled. It can do internode communication, but
251
    it should be idempotent - no cluster or system changes are
252
    allowed.
253

254
    The method should raise errors.OpPrereqError in case something is
255
    not fulfilled. Its return value is ignored.
256

257
    This method should also update all the parameters of the opcode to
258
    their canonical form if it hasn't been done by ExpandNames before.
259

260
    """
261
    if self.tasklets is not None:
262
      for (idx, tl) in enumerate(self.tasklets):
263
        logging.debug("Checking prerequisites for tasklet %s/%s",
264
                      idx + 1, len(self.tasklets))
265
        tl.CheckPrereq()
266
    else:
267
      pass
268

    
269
  def Exec(self, feedback_fn):
270
    """Execute the LU.
271

272
    This method should implement the actual work. It should raise
273
    errors.OpExecError for failures that are somewhat dealt with in
274
    code, or expected.
275

276
    """
277
    if self.tasklets is not None:
278
      for (idx, tl) in enumerate(self.tasklets):
279
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
280
        tl.Exec(feedback_fn)
281
    else:
282
      raise NotImplementedError
283

    
284
  def BuildHooksEnv(self):
285
    """Build hooks environment for this LU.
286

287
    @rtype: dict
288
    @return: Dictionary containing the environment that will be used for
289
      running the hooks for this LU. The keys of the dict must not be prefixed
290
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
291
      will extend the environment with additional variables. If no environment
292
      should be defined, an empty dictionary should be returned (not C{None}).
293
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
294
      will not be called.
295

296
    """
297
    raise NotImplementedError
298

    
299
  def BuildHooksNodes(self):
300
    """Build list of nodes to run LU's hooks.
301

302
    @rtype: tuple; (list, list)
303
    @return: Tuple containing a list of node names on which the hook
304
      should run before the execution and a list of node names on which the
305
      hook should run after the execution. No nodes should be returned as an
306
      empty list (and not None).
307
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
308
      will not be called.
309

310
    """
311
    raise NotImplementedError
312

    
313
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
314
    """Notify the LU about the results of its hooks.
315

316
    This method is called every time a hooks phase is executed, and notifies
317
    the Logical Unit about the hooks' result. The LU can then use it to alter
318
    its result based on the hooks.  By default the method does nothing and the
319
    previous result is passed back unchanged but any LU can define it if it
320
    wants to use the local cluster hook-scripts somehow.
321

322
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
323
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
324
    @param hook_results: the results of the multi-node hooks rpc call
325
    @param feedback_fn: function used send feedback back to the caller
326
    @param lu_result: the previous Exec result this LU had, or None
327
        in the PRE phase
328
    @return: the new Exec result, based on the previous result
329
        and hook results
330

331
    """
332
    # API must be kept, thus we ignore the unused argument and could
333
    # be a function warnings
334
    # pylint: disable-msg=W0613,R0201
335
    return lu_result
336

    
337
  def _ExpandAndLockInstance(self):
338
    """Helper function to expand and lock an instance.
339

340
    Many LUs that work on an instance take its name in self.op.instance_name
341
    and need to expand it and then declare the expanded name for locking. This
342
    function does it, and then updates self.op.instance_name to the expanded
343
    name. It also initializes needed_locks as a dict, if this hasn't been done
344
    before.
345

346
    """
347
    if self.needed_locks is None:
348
      self.needed_locks = {}
349
    else:
350
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
351
        "_ExpandAndLockInstance called with instance-level locks set"
352
    self.op.instance_name = _ExpandInstanceName(self.cfg,
353
                                                self.op.instance_name)
354
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
355

    
356
  def _LockInstancesNodes(self, primary_only=False):
357
    """Helper function to declare instances' nodes for locking.
358

359
    This function should be called after locking one or more instances to lock
360
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
361
    with all primary or secondary nodes for instances already locked and
362
    present in self.needed_locks[locking.LEVEL_INSTANCE].
363

364
    It should be called from DeclareLocks, and for safety only works if
365
    self.recalculate_locks[locking.LEVEL_NODE] is set.
366

367
    In the future it may grow parameters to just lock some instance's nodes, or
368
    to just lock primaries or secondary nodes, if needed.
369

370
    If should be called in DeclareLocks in a way similar to::
371

372
      if level == locking.LEVEL_NODE:
373
        self._LockInstancesNodes()
374

375
    @type primary_only: boolean
376
    @param primary_only: only lock primary nodes of locked instances
377

378
    """
379
    assert locking.LEVEL_NODE in self.recalculate_locks, \
380
      "_LockInstancesNodes helper function called with no nodes to recalculate"
381

    
382
    # TODO: check if we're really been called with the instance locks held
383

    
384
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
385
    # future we might want to have different behaviors depending on the value
386
    # of self.recalculate_locks[locking.LEVEL_NODE]
387
    wanted_nodes = []
388
    for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
389
      instance = self.context.cfg.GetInstanceInfo(instance_name)
390
      wanted_nodes.append(instance.primary_node)
391
      if not primary_only:
392
        wanted_nodes.extend(instance.secondary_nodes)
393

    
394
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
395
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
396
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
397
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
398

    
399
    del self.recalculate_locks[locking.LEVEL_NODE]
400

    
401

    
402
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
403
  """Simple LU which runs no hooks.
404

405
  This LU is intended as a parent for other LogicalUnits which will
406
  run no hooks, in order to reduce duplicate code.
407

408
  """
409
  HPATH = None
410
  HTYPE = None
411

    
412
  def BuildHooksEnv(self):
413
    """Empty BuildHooksEnv for NoHooksLu.
414

415
    This just raises an error.
416

417
    """
418
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
419

    
420
  def BuildHooksNodes(self):
421
    """Empty BuildHooksNodes for NoHooksLU.
422

423
    """
424
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
425

    
426

    
427
class Tasklet:
428
  """Tasklet base class.
429

430
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
431
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
432
  tasklets know nothing about locks.
433

434
  Subclasses must follow these rules:
435
    - Implement CheckPrereq
436
    - Implement Exec
437

438
  """
439
  def __init__(self, lu):
440
    self.lu = lu
441

    
442
    # Shortcuts
443
    self.cfg = lu.cfg
444
    self.rpc = lu.rpc
445

    
446
  def CheckPrereq(self):
447
    """Check prerequisites for this tasklets.
448

449
    This method should check whether the prerequisites for the execution of
450
    this tasklet are fulfilled. It can do internode communication, but it
451
    should be idempotent - no cluster or system changes are allowed.
452

453
    The method should raise errors.OpPrereqError in case something is not
454
    fulfilled. Its return value is ignored.
455

456
    This method should also update all parameters to their canonical form if it
457
    hasn't been done before.
458

459
    """
460
    pass
461

    
462
  def Exec(self, feedback_fn):
463
    """Execute the tasklet.
464

465
    This method should implement the actual work. It should raise
466
    errors.OpExecError for failures that are somewhat dealt with in code, or
467
    expected.
468

469
    """
470
    raise NotImplementedError
471

    
472

    
473
class _QueryBase:
474
  """Base for query utility classes.
475

476
  """
477
  #: Attribute holding field definitions
478
  FIELDS = None
479

    
480
  def __init__(self, filter_, fields, use_locking):
481
    """Initializes this class.
482

483
    """
484
    self.use_locking = use_locking
485

    
486
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
487
                             namefield="name")
488
    self.requested_data = self.query.RequestedData()
489
    self.names = self.query.RequestedNames()
490

    
491
    # Sort only if no names were requested
492
    self.sort_by_name = not self.names
493

    
494
    self.do_locking = None
495
    self.wanted = None
496

    
497
  def _GetNames(self, lu, all_names, lock_level):
498
    """Helper function to determine names asked for in the query.
499

500
    """
501
    if self.do_locking:
502
      names = lu.glm.list_owned(lock_level)
503
    else:
504
      names = all_names
505

    
506
    if self.wanted == locking.ALL_SET:
507
      assert not self.names
508
      # caller didn't specify names, so ordering is not important
509
      return utils.NiceSort(names)
510

    
511
    # caller specified names and we must keep the same order
512
    assert self.names
513
    assert not self.do_locking or lu.glm.is_owned(lock_level)
514

    
515
    missing = set(self.wanted).difference(names)
516
    if missing:
517
      raise errors.OpExecError("Some items were removed before retrieving"
518
                               " their data: %s" % missing)
519

    
520
    # Return expanded names
521
    return self.wanted
522

    
523
  def ExpandNames(self, lu):
524
    """Expand names for this query.
525

526
    See L{LogicalUnit.ExpandNames}.
527

528
    """
529
    raise NotImplementedError()
530

    
531
  def DeclareLocks(self, lu, level):
532
    """Declare locks for this query.
533

534
    See L{LogicalUnit.DeclareLocks}.
535

536
    """
537
    raise NotImplementedError()
538

    
539
  def _GetQueryData(self, lu):
540
    """Collects all data for this query.
541

542
    @return: Query data object
543

544
    """
545
    raise NotImplementedError()
546

    
547
  def NewStyleQuery(self, lu):
548
    """Collect data and execute query.
549

550
    """
551
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
552
                                  sort_by_name=self.sort_by_name)
553

    
554
  def OldStyleQuery(self, lu):
555
    """Collect data and execute query.
556

557
    """
558
    return self.query.OldStyleQuery(self._GetQueryData(lu),
559
                                    sort_by_name=self.sort_by_name)
560

    
561

    
562
def _GetWantedNodes(lu, nodes):
563
  """Returns list of checked and expanded node names.
564

565
  @type lu: L{LogicalUnit}
566
  @param lu: the logical unit on whose behalf we execute
567
  @type nodes: list
568
  @param nodes: list of node names or None for all nodes
569
  @rtype: list
570
  @return: the list of nodes, sorted
571
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
572

573
  """
574
  if nodes:
575
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
576

    
577
  return utils.NiceSort(lu.cfg.GetNodeList())
578

    
579

    
580
def _GetWantedInstances(lu, instances):
581
  """Returns list of checked and expanded instance names.
582

583
  @type lu: L{LogicalUnit}
584
  @param lu: the logical unit on whose behalf we execute
585
  @type instances: list
586
  @param instances: list of instance names or None for all instances
587
  @rtype: list
588
  @return: the list of instances, sorted
589
  @raise errors.OpPrereqError: if the instances parameter is wrong type
590
  @raise errors.OpPrereqError: if any of the passed instances is not found
591

592
  """
593
  if instances:
594
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
595
  else:
596
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
597
  return wanted
598

    
599

    
600
def _GetUpdatedParams(old_params, update_dict,
601
                      use_default=True, use_none=False):
602
  """Return the new version of a parameter dictionary.
603

604
  @type old_params: dict
605
  @param old_params: old parameters
606
  @type update_dict: dict
607
  @param update_dict: dict containing new parameter values, or
608
      constants.VALUE_DEFAULT to reset the parameter to its default
609
      value
610
  @param use_default: boolean
611
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
612
      values as 'to be deleted' values
613
  @param use_none: boolean
614
  @type use_none: whether to recognise C{None} values as 'to be
615
      deleted' values
616
  @rtype: dict
617
  @return: the new parameter dictionary
618

619
  """
620
  params_copy = copy.deepcopy(old_params)
621
  for key, val in update_dict.iteritems():
622
    if ((use_default and val == constants.VALUE_DEFAULT) or
623
        (use_none and val is None)):
624
      try:
625
        del params_copy[key]
626
      except KeyError:
627
        pass
628
    else:
629
      params_copy[key] = val
630
  return params_copy
631

    
632

    
633
def _ReleaseLocks(lu, level, names=None, keep=None):
634
  """Releases locks owned by an LU.
635

636
  @type lu: L{LogicalUnit}
637
  @param level: Lock level
638
  @type names: list or None
639
  @param names: Names of locks to release
640
  @type keep: list or None
641
  @param keep: Names of locks to retain
642

643
  """
644
  assert not (keep is not None and names is not None), \
645
         "Only one of the 'names' and the 'keep' parameters can be given"
646

    
647
  if names is not None:
648
    should_release = names.__contains__
649
  elif keep:
650
    should_release = lambda name: name not in keep
651
  else:
652
    should_release = None
653

    
654
  if should_release:
655
    retain = []
656
    release = []
657

    
658
    # Determine which locks to release
659
    for name in lu.glm.list_owned(level):
660
      if should_release(name):
661
        release.append(name)
662
      else:
663
        retain.append(name)
664

    
665
    assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
666

    
667
    # Release just some locks
668
    lu.glm.release(level, names=release)
669

    
670
    assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
671
  else:
672
    # Release everything
673
    lu.glm.release(level)
674

    
675
    assert not lu.glm.is_owned(level), "No locks should be owned"
676

    
677

    
678
def _RunPostHook(lu, node_name):
679
  """Runs the post-hook for an opcode on a single node.
680

681
  """
682
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
683
  try:
684
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
685
  except:
686
    # pylint: disable-msg=W0702
687
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
688

    
689

    
690
def _CheckOutputFields(static, dynamic, selected):
691
  """Checks whether all selected fields are valid.
692

693
  @type static: L{utils.FieldSet}
694
  @param static: static fields set
695
  @type dynamic: L{utils.FieldSet}
696
  @param dynamic: dynamic fields set
697

698
  """
699
  f = utils.FieldSet()
700
  f.Extend(static)
701
  f.Extend(dynamic)
702

    
703
  delta = f.NonMatching(selected)
704
  if delta:
705
    raise errors.OpPrereqError("Unknown output fields selected: %s"
706
                               % ",".join(delta), errors.ECODE_INVAL)
707

    
708

    
709
def _CheckGlobalHvParams(params):
710
  """Validates that given hypervisor params are not global ones.
711

712
  This will ensure that instances don't get customised versions of
713
  global params.
714

715
  """
716
  used_globals = constants.HVC_GLOBALS.intersection(params)
717
  if used_globals:
718
    msg = ("The following hypervisor parameters are global and cannot"
719
           " be customized at instance level, please modify them at"
720
           " cluster level: %s" % utils.CommaJoin(used_globals))
721
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
722

    
723

    
724
def _CheckNodeOnline(lu, node, msg=None):
725
  """Ensure that a given node is online.
726

727
  @param lu: the LU on behalf of which we make the check
728
  @param node: the node to check
729
  @param msg: if passed, should be a message to replace the default one
730
  @raise errors.OpPrereqError: if the node is offline
731

732
  """
733
  if msg is None:
734
    msg = "Can't use offline node"
735
  if lu.cfg.GetNodeInfo(node).offline:
736
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
737

    
738

    
739
def _CheckNodeNotDrained(lu, node):
740
  """Ensure that a given node is not drained.
741

742
  @param lu: the LU on behalf of which we make the check
743
  @param node: the node to check
744
  @raise errors.OpPrereqError: if the node is drained
745

746
  """
747
  if lu.cfg.GetNodeInfo(node).drained:
748
    raise errors.OpPrereqError("Can't use drained node %s" % node,
749
                               errors.ECODE_STATE)
750

    
751

    
752
def _CheckNodeVmCapable(lu, node):
753
  """Ensure that a given node is vm capable.
754

755
  @param lu: the LU on behalf of which we make the check
756
  @param node: the node to check
757
  @raise errors.OpPrereqError: if the node is not vm capable
758

759
  """
760
  if not lu.cfg.GetNodeInfo(node).vm_capable:
761
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
762
                               errors.ECODE_STATE)
763

    
764

    
765
def _CheckNodeHasOS(lu, node, os_name, force_variant):
766
  """Ensure that a node supports a given OS.
767

768
  @param lu: the LU on behalf of which we make the check
769
  @param node: the node to check
770
  @param os_name: the OS to query about
771
  @param force_variant: whether to ignore variant errors
772
  @raise errors.OpPrereqError: if the node is not supporting the OS
773

774
  """
775
  result = lu.rpc.call_os_get(node, os_name)
776
  result.Raise("OS '%s' not in supported OS list for node %s" %
777
               (os_name, node),
778
               prereq=True, ecode=errors.ECODE_INVAL)
779
  if not force_variant:
780
    _CheckOSVariant(result.payload, os_name)
781

    
782

    
783
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
784
  """Ensure that a node has the given secondary ip.
785

786
  @type lu: L{LogicalUnit}
787
  @param lu: the LU on behalf of which we make the check
788
  @type node: string
789
  @param node: the node to check
790
  @type secondary_ip: string
791
  @param secondary_ip: the ip to check
792
  @type prereq: boolean
793
  @param prereq: whether to throw a prerequisite or an execute error
794
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
795
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
796

797
  """
798
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
799
  result.Raise("Failure checking secondary ip on node %s" % node,
800
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
801
  if not result.payload:
802
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
803
           " please fix and re-run this command" % secondary_ip)
804
    if prereq:
805
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
806
    else:
807
      raise errors.OpExecError(msg)
808

    
809

    
810
def _GetClusterDomainSecret():
811
  """Reads the cluster domain secret.
812

813
  """
814
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
815
                               strict=True)
816

    
817

    
818
def _CheckInstanceDown(lu, instance, reason):
819
  """Ensure that an instance is not running."""
820
  if instance.admin_up:
821
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
822
                               (instance.name, reason), errors.ECODE_STATE)
823

    
824
  pnode = instance.primary_node
825
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
826
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
827
              prereq=True, ecode=errors.ECODE_ENVIRON)
828

    
829
  if instance.name in ins_l.payload:
830
    raise errors.OpPrereqError("Instance %s is running, %s" %
831
                               (instance.name, reason), errors.ECODE_STATE)
832

    
833

    
834
def _ExpandItemName(fn, name, kind):
835
  """Expand an item name.
836

837
  @param fn: the function to use for expansion
838
  @param name: requested item name
839
  @param kind: text description ('Node' or 'Instance')
840
  @return: the resolved (full) name
841
  @raise errors.OpPrereqError: if the item is not found
842

843
  """
844
  full_name = fn(name)
845
  if full_name is None:
846
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
847
                               errors.ECODE_NOENT)
848
  return full_name
849

    
850

    
851
def _ExpandNodeName(cfg, name):
852
  """Wrapper over L{_ExpandItemName} for nodes."""
853
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
854

    
855

    
856
def _ExpandInstanceName(cfg, name):
857
  """Wrapper over L{_ExpandItemName} for instance."""
858
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
859

    
860

    
861
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
862
                          memory, vcpus, nics, disk_template, disks,
863
                          bep, hvp, hypervisor_name):
864
  """Builds instance related env variables for hooks
865

866
  This builds the hook environment from individual variables.
867

868
  @type name: string
869
  @param name: the name of the instance
870
  @type primary_node: string
871
  @param primary_node: the name of the instance's primary node
872
  @type secondary_nodes: list
873
  @param secondary_nodes: list of secondary nodes as strings
874
  @type os_type: string
875
  @param os_type: the name of the instance's OS
876
  @type status: boolean
877
  @param status: the should_run status of the instance
878
  @type memory: string
879
  @param memory: the memory size of the instance
880
  @type vcpus: string
881
  @param vcpus: the count of VCPUs the instance has
882
  @type nics: list
883
  @param nics: list of tuples (ip, mac, mode, link) representing
884
      the NICs the instance has
885
  @type disk_template: string
886
  @param disk_template: the disk template of the instance
887
  @type disks: list
888
  @param disks: the list of (size, mode) pairs
889
  @type bep: dict
890
  @param bep: the backend parameters for the instance
891
  @type hvp: dict
892
  @param hvp: the hypervisor parameters for the instance
893
  @type hypervisor_name: string
894
  @param hypervisor_name: the hypervisor for the instance
895
  @rtype: dict
896
  @return: the hook environment for this instance
897

898
  """
899
  if status:
900
    str_status = "up"
901
  else:
902
    str_status = "down"
903
  env = {
904
    "OP_TARGET": name,
905
    "INSTANCE_NAME": name,
906
    "INSTANCE_PRIMARY": primary_node,
907
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
908
    "INSTANCE_OS_TYPE": os_type,
909
    "INSTANCE_STATUS": str_status,
910
    "INSTANCE_MEMORY": memory,
911
    "INSTANCE_VCPUS": vcpus,
912
    "INSTANCE_DISK_TEMPLATE": disk_template,
913
    "INSTANCE_HYPERVISOR": hypervisor_name,
914
  }
915

    
916
  if nics:
917
    nic_count = len(nics)
918
    for idx, (ip, mac, mode, link) in enumerate(nics):
919
      if ip is None:
920
        ip = ""
921
      env["INSTANCE_NIC%d_IP" % idx] = ip
922
      env["INSTANCE_NIC%d_MAC" % idx] = mac
923
      env["INSTANCE_NIC%d_MODE" % idx] = mode
924
      env["INSTANCE_NIC%d_LINK" % idx] = link
925
      if mode == constants.NIC_MODE_BRIDGED:
926
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
927
  else:
928
    nic_count = 0
929

    
930
  env["INSTANCE_NIC_COUNT"] = nic_count
931

    
932
  if disks:
933
    disk_count = len(disks)
934
    for idx, (size, mode) in enumerate(disks):
935
      env["INSTANCE_DISK%d_SIZE" % idx] = size
936
      env["INSTANCE_DISK%d_MODE" % idx] = mode
937
  else:
938
    disk_count = 0
939

    
940
  env["INSTANCE_DISK_COUNT"] = disk_count
941

    
942
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
943
    for key, value in source.items():
944
      env["INSTANCE_%s_%s" % (kind, key)] = value
945

    
946
  return env
947

    
948

    
949
def _NICListToTuple(lu, nics):
950
  """Build a list of nic information tuples.
951

952
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
953
  value in LUInstanceQueryData.
954

955
  @type lu:  L{LogicalUnit}
956
  @param lu: the logical unit on whose behalf we execute
957
  @type nics: list of L{objects.NIC}
958
  @param nics: list of nics to convert to hooks tuples
959

960
  """
961
  hooks_nics = []
962
  cluster = lu.cfg.GetClusterInfo()
963
  for nic in nics:
964
    ip = nic.ip
965
    mac = nic.mac
966
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
967
    mode = filled_params[constants.NIC_MODE]
968
    link = filled_params[constants.NIC_LINK]
969
    hooks_nics.append((ip, mac, mode, link))
970
  return hooks_nics
971

    
972

    
973
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
974
  """Builds instance related env variables for hooks from an object.
975

976
  @type lu: L{LogicalUnit}
977
  @param lu: the logical unit on whose behalf we execute
978
  @type instance: L{objects.Instance}
979
  @param instance: the instance for which we should build the
980
      environment
981
  @type override: dict
982
  @param override: dictionary with key/values that will override
983
      our values
984
  @rtype: dict
985
  @return: the hook environment dictionary
986

987
  """
988
  cluster = lu.cfg.GetClusterInfo()
989
  bep = cluster.FillBE(instance)
990
  hvp = cluster.FillHV(instance)
991
  args = {
992
    'name': instance.name,
993
    'primary_node': instance.primary_node,
994
    'secondary_nodes': instance.secondary_nodes,
995
    'os_type': instance.os,
996
    'status': instance.admin_up,
997
    'memory': bep[constants.BE_MEMORY],
998
    'vcpus': bep[constants.BE_VCPUS],
999
    'nics': _NICListToTuple(lu, instance.nics),
1000
    'disk_template': instance.disk_template,
1001
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
1002
    'bep': bep,
1003
    'hvp': hvp,
1004
    'hypervisor_name': instance.hypervisor,
1005
  }
1006
  if override:
1007
    args.update(override)
1008
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1009

    
1010

    
1011
def _AdjustCandidatePool(lu, exceptions):
1012
  """Adjust the candidate pool after node operations.
1013

1014
  """
1015
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1016
  if mod_list:
1017
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1018
               utils.CommaJoin(node.name for node in mod_list))
1019
    for name in mod_list:
1020
      lu.context.ReaddNode(name)
1021
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1022
  if mc_now > mc_max:
1023
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1024
               (mc_now, mc_max))
1025

    
1026

    
1027
def _DecideSelfPromotion(lu, exceptions=None):
1028
  """Decide whether I should promote myself as a master candidate.
1029

1030
  """
1031
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1032
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1033
  # the new node will increase mc_max with one, so:
1034
  mc_should = min(mc_should + 1, cp_size)
1035
  return mc_now < mc_should
1036

    
1037

    
1038
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1039
  """Check that the brigdes needed by a list of nics exist.
1040

1041
  """
1042
  cluster = lu.cfg.GetClusterInfo()
1043
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1044
  brlist = [params[constants.NIC_LINK] for params in paramslist
1045
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1046
  if brlist:
1047
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1048
    result.Raise("Error checking bridges on destination node '%s'" %
1049
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1050

    
1051

    
1052
def _CheckInstanceBridgesExist(lu, instance, node=None):
1053
  """Check that the brigdes needed by an instance exist.
1054

1055
  """
1056
  if node is None:
1057
    node = instance.primary_node
1058
  _CheckNicsBridgesExist(lu, instance.nics, node)
1059

    
1060

    
1061
def _CheckOSVariant(os_obj, name):
1062
  """Check whether an OS name conforms to the os variants specification.
1063

1064
  @type os_obj: L{objects.OS}
1065
  @param os_obj: OS object to check
1066
  @type name: string
1067
  @param name: OS name passed by the user, to check for validity
1068

1069
  """
1070
  if not os_obj.supported_variants:
1071
    return
1072
  variant = objects.OS.GetVariant(name)
1073
  if not variant:
1074
    raise errors.OpPrereqError("OS name must include a variant",
1075
                               errors.ECODE_INVAL)
1076

    
1077
  if variant not in os_obj.supported_variants:
1078
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1079

    
1080

    
1081
def _GetNodeInstancesInner(cfg, fn):
1082
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1083

    
1084

    
1085
def _GetNodeInstances(cfg, node_name):
1086
  """Returns a list of all primary and secondary instances on a node.
1087

1088
  """
1089

    
1090
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1091

    
1092

    
1093
def _GetNodePrimaryInstances(cfg, node_name):
1094
  """Returns primary instances on a node.
1095

1096
  """
1097
  return _GetNodeInstancesInner(cfg,
1098
                                lambda inst: node_name == inst.primary_node)
1099

    
1100

    
1101
def _GetNodeSecondaryInstances(cfg, node_name):
1102
  """Returns secondary instances on a node.
1103

1104
  """
1105
  return _GetNodeInstancesInner(cfg,
1106
                                lambda inst: node_name in inst.secondary_nodes)
1107

    
1108

    
1109
def _GetStorageTypeArgs(cfg, storage_type):
1110
  """Returns the arguments for a storage type.
1111

1112
  """
1113
  # Special case for file storage
1114
  if storage_type == constants.ST_FILE:
1115
    # storage.FileStorage wants a list of storage directories
1116
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1117

    
1118
  return []
1119

    
1120

    
1121
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1122
  faulty = []
1123

    
1124
  for dev in instance.disks:
1125
    cfg.SetDiskID(dev, node_name)
1126

    
1127
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1128
  result.Raise("Failed to get disk status from node %s" % node_name,
1129
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1130

    
1131
  for idx, bdev_status in enumerate(result.payload):
1132
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1133
      faulty.append(idx)
1134

    
1135
  return faulty
1136

    
1137

    
1138
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1139
  """Check the sanity of iallocator and node arguments and use the
1140
  cluster-wide iallocator if appropriate.
1141

1142
  Check that at most one of (iallocator, node) is specified. If none is
1143
  specified, then the LU's opcode's iallocator slot is filled with the
1144
  cluster-wide default iallocator.
1145

1146
  @type iallocator_slot: string
1147
  @param iallocator_slot: the name of the opcode iallocator slot
1148
  @type node_slot: string
1149
  @param node_slot: the name of the opcode target node slot
1150

1151
  """
1152
  node = getattr(lu.op, node_slot, None)
1153
  iallocator = getattr(lu.op, iallocator_slot, None)
1154

    
1155
  if node is not None and iallocator is not None:
1156
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1157
                               errors.ECODE_INVAL)
1158
  elif node is None and iallocator is None:
1159
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1160
    if default_iallocator:
1161
      setattr(lu.op, iallocator_slot, default_iallocator)
1162
    else:
1163
      raise errors.OpPrereqError("No iallocator or node given and no"
1164
                                 " cluster-wide default iallocator found."
1165
                                 " Please specify either an iallocator or a"
1166
                                 " node, or set a cluster-wide default"
1167
                                 " iallocator.")
1168

    
1169

    
1170
class LUClusterPostInit(LogicalUnit):
1171
  """Logical unit for running hooks after cluster initialization.
1172

1173
  """
1174
  HPATH = "cluster-init"
1175
  HTYPE = constants.HTYPE_CLUSTER
1176

    
1177
  def BuildHooksEnv(self):
1178
    """Build hooks env.
1179

1180
    """
1181
    return {
1182
      "OP_TARGET": self.cfg.GetClusterName(),
1183
      }
1184

    
1185
  def BuildHooksNodes(self):
1186
    """Build hooks nodes.
1187

1188
    """
1189
    return ([], [self.cfg.GetMasterNode()])
1190

    
1191
  def Exec(self, feedback_fn):
1192
    """Nothing to do.
1193

1194
    """
1195
    return True
1196

    
1197

    
1198
class LUClusterDestroy(LogicalUnit):
1199
  """Logical unit for destroying the cluster.
1200

1201
  """
1202
  HPATH = "cluster-destroy"
1203
  HTYPE = constants.HTYPE_CLUSTER
1204

    
1205
  def BuildHooksEnv(self):
1206
    """Build hooks env.
1207

1208
    """
1209
    return {
1210
      "OP_TARGET": self.cfg.GetClusterName(),
1211
      }
1212

    
1213
  def BuildHooksNodes(self):
1214
    """Build hooks nodes.
1215

1216
    """
1217
    return ([], [])
1218

    
1219
  def CheckPrereq(self):
1220
    """Check prerequisites.
1221

1222
    This checks whether the cluster is empty.
1223

1224
    Any errors are signaled by raising errors.OpPrereqError.
1225

1226
    """
1227
    master = self.cfg.GetMasterNode()
1228

    
1229
    nodelist = self.cfg.GetNodeList()
1230
    if len(nodelist) != 1 or nodelist[0] != master:
1231
      raise errors.OpPrereqError("There are still %d node(s) in"
1232
                                 " this cluster." % (len(nodelist) - 1),
1233
                                 errors.ECODE_INVAL)
1234
    instancelist = self.cfg.GetInstanceList()
1235
    if instancelist:
1236
      raise errors.OpPrereqError("There are still %d instance(s) in"
1237
                                 " this cluster." % len(instancelist),
1238
                                 errors.ECODE_INVAL)
1239

    
1240
  def Exec(self, feedback_fn):
1241
    """Destroys the cluster.
1242

1243
    """
1244
    master = self.cfg.GetMasterNode()
1245

    
1246
    # Run post hooks on master node before it's removed
1247
    _RunPostHook(self, master)
1248

    
1249
    result = self.rpc.call_node_stop_master(master, False)
1250
    result.Raise("Could not disable the master role")
1251

    
1252
    return master
1253

    
1254

    
1255
def _VerifyCertificate(filename):
1256
  """Verifies a certificate for LUClusterVerify.
1257

1258
  @type filename: string
1259
  @param filename: Path to PEM file
1260

1261
  """
1262
  try:
1263
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1264
                                           utils.ReadFile(filename))
1265
  except Exception, err: # pylint: disable-msg=W0703
1266
    return (LUClusterVerify.ETYPE_ERROR,
1267
            "Failed to load X509 certificate %s: %s" % (filename, err))
1268

    
1269
  (errcode, msg) = \
1270
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1271
                                constants.SSL_CERT_EXPIRATION_ERROR)
1272

    
1273
  if msg:
1274
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1275
  else:
1276
    fnamemsg = None
1277

    
1278
  if errcode is None:
1279
    return (None, fnamemsg)
1280
  elif errcode == utils.CERT_WARNING:
1281
    return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1282
  elif errcode == utils.CERT_ERROR:
1283
    return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1284

    
1285
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1286

    
1287

    
1288
class LUClusterVerify(LogicalUnit):
1289
  """Verifies the cluster status.
1290

1291
  """
1292
  HPATH = "cluster-verify"
1293
  HTYPE = constants.HTYPE_CLUSTER
1294
  REQ_BGL = False
1295

    
1296
  TCLUSTER = "cluster"
1297
  TNODE = "node"
1298
  TINSTANCE = "instance"
1299

    
1300
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1301
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1302
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1303
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1304
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1305
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1306
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1307
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1308
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1309
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1310
  ENODEDRBD = (TNODE, "ENODEDRBD")
1311
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1312
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1313
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1314
  ENODEHV = (TNODE, "ENODEHV")
1315
  ENODELVM = (TNODE, "ENODELVM")
1316
  ENODEN1 = (TNODE, "ENODEN1")
1317
  ENODENET = (TNODE, "ENODENET")
1318
  ENODEOS = (TNODE, "ENODEOS")
1319
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1320
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1321
  ENODERPC = (TNODE, "ENODERPC")
1322
  ENODESSH = (TNODE, "ENODESSH")
1323
  ENODEVERSION = (TNODE, "ENODEVERSION")
1324
  ENODESETUP = (TNODE, "ENODESETUP")
1325
  ENODETIME = (TNODE, "ENODETIME")
1326
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1327

    
1328
  ETYPE_FIELD = "code"
1329
  ETYPE_ERROR = "ERROR"
1330
  ETYPE_WARNING = "WARNING"
1331

    
1332
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1333

    
1334
  class NodeImage(object):
1335
    """A class representing the logical and physical status of a node.
1336

1337
    @type name: string
1338
    @ivar name: the node name to which this object refers
1339
    @ivar volumes: a structure as returned from
1340
        L{ganeti.backend.GetVolumeList} (runtime)
1341
    @ivar instances: a list of running instances (runtime)
1342
    @ivar pinst: list of configured primary instances (config)
1343
    @ivar sinst: list of configured secondary instances (config)
1344
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1345
        instances for which this node is secondary (config)
1346
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1347
    @ivar dfree: free disk, as reported by the node (runtime)
1348
    @ivar offline: the offline status (config)
1349
    @type rpc_fail: boolean
1350
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1351
        not whether the individual keys were correct) (runtime)
1352
    @type lvm_fail: boolean
1353
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1354
    @type hyp_fail: boolean
1355
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1356
    @type ghost: boolean
1357
    @ivar ghost: whether this is a known node or not (config)
1358
    @type os_fail: boolean
1359
    @ivar os_fail: whether the RPC call didn't return valid OS data
1360
    @type oslist: list
1361
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1362
    @type vm_capable: boolean
1363
    @ivar vm_capable: whether the node can host instances
1364

1365
    """
1366
    def __init__(self, offline=False, name=None, vm_capable=True):
1367
      self.name = name
1368
      self.volumes = {}
1369
      self.instances = []
1370
      self.pinst = []
1371
      self.sinst = []
1372
      self.sbp = {}
1373
      self.mfree = 0
1374
      self.dfree = 0
1375
      self.offline = offline
1376
      self.vm_capable = vm_capable
1377
      self.rpc_fail = False
1378
      self.lvm_fail = False
1379
      self.hyp_fail = False
1380
      self.ghost = False
1381
      self.os_fail = False
1382
      self.oslist = {}
1383

    
1384
  def ExpandNames(self):
1385
    self.needed_locks = {
1386
      locking.LEVEL_NODE: locking.ALL_SET,
1387
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1388
    }
1389
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1390

    
1391
  def _Error(self, ecode, item, msg, *args, **kwargs):
1392
    """Format an error message.
1393

1394
    Based on the opcode's error_codes parameter, either format a
1395
    parseable error code, or a simpler error string.
1396

1397
    This must be called only from Exec and functions called from Exec.
1398

1399
    """
1400
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1401
    itype, etxt = ecode
1402
    # first complete the msg
1403
    if args:
1404
      msg = msg % args
1405
    # then format the whole message
1406
    if self.op.error_codes:
1407
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1408
    else:
1409
      if item:
1410
        item = " " + item
1411
      else:
1412
        item = ""
1413
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1414
    # and finally report it via the feedback_fn
1415
    self._feedback_fn("  - %s" % msg)
1416

    
1417
  def _ErrorIf(self, cond, *args, **kwargs):
1418
    """Log an error message if the passed condition is True.
1419

1420
    """
1421
    cond = bool(cond) or self.op.debug_simulate_errors
1422
    if cond:
1423
      self._Error(*args, **kwargs)
1424
    # do not mark the operation as failed for WARN cases only
1425
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1426
      self.bad = self.bad or cond
1427

    
1428
  def _VerifyNode(self, ninfo, nresult):
1429
    """Perform some basic validation on data returned from a node.
1430

1431
      - check the result data structure is well formed and has all the
1432
        mandatory fields
1433
      - check ganeti version
1434

1435
    @type ninfo: L{objects.Node}
1436
    @param ninfo: the node to check
1437
    @param nresult: the results from the node
1438
    @rtype: boolean
1439
    @return: whether overall this call was successful (and we can expect
1440
         reasonable values in the respose)
1441

1442
    """
1443
    node = ninfo.name
1444
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1445

    
1446
    # main result, nresult should be a non-empty dict
1447
    test = not nresult or not isinstance(nresult, dict)
1448
    _ErrorIf(test, self.ENODERPC, node,
1449
                  "unable to verify node: no data returned")
1450
    if test:
1451
      return False
1452

    
1453
    # compares ganeti version
1454
    local_version = constants.PROTOCOL_VERSION
1455
    remote_version = nresult.get("version", None)
1456
    test = not (remote_version and
1457
                isinstance(remote_version, (list, tuple)) and
1458
                len(remote_version) == 2)
1459
    _ErrorIf(test, self.ENODERPC, node,
1460
             "connection to node returned invalid data")
1461
    if test:
1462
      return False
1463

    
1464
    test = local_version != remote_version[0]
1465
    _ErrorIf(test, self.ENODEVERSION, node,
1466
             "incompatible protocol versions: master %s,"
1467
             " node %s", local_version, remote_version[0])
1468
    if test:
1469
      return False
1470

    
1471
    # node seems compatible, we can actually try to look into its results
1472

    
1473
    # full package version
1474
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1475
                  self.ENODEVERSION, node,
1476
                  "software version mismatch: master %s, node %s",
1477
                  constants.RELEASE_VERSION, remote_version[1],
1478
                  code=self.ETYPE_WARNING)
1479

    
1480
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1481
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1482
      for hv_name, hv_result in hyp_result.iteritems():
1483
        test = hv_result is not None
1484
        _ErrorIf(test, self.ENODEHV, node,
1485
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1486

    
1487
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1488
    if ninfo.vm_capable and isinstance(hvp_result, list):
1489
      for item, hv_name, hv_result in hvp_result:
1490
        _ErrorIf(True, self.ENODEHV, node,
1491
                 "hypervisor %s parameter verify failure (source %s): %s",
1492
                 hv_name, item, hv_result)
1493

    
1494
    test = nresult.get(constants.NV_NODESETUP,
1495
                       ["Missing NODESETUP results"])
1496
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1497
             "; ".join(test))
1498

    
1499
    return True
1500

    
1501
  def _VerifyNodeTime(self, ninfo, nresult,
1502
                      nvinfo_starttime, nvinfo_endtime):
1503
    """Check the node time.
1504

1505
    @type ninfo: L{objects.Node}
1506
    @param ninfo: the node to check
1507
    @param nresult: the remote results for the node
1508
    @param nvinfo_starttime: the start time of the RPC call
1509
    @param nvinfo_endtime: the end time of the RPC call
1510

1511
    """
1512
    node = ninfo.name
1513
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1514

    
1515
    ntime = nresult.get(constants.NV_TIME, None)
1516
    try:
1517
      ntime_merged = utils.MergeTime(ntime)
1518
    except (ValueError, TypeError):
1519
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1520
      return
1521

    
1522
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1523
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1524
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1525
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1526
    else:
1527
      ntime_diff = None
1528

    
1529
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1530
             "Node time diverges by at least %s from master node time",
1531
             ntime_diff)
1532

    
1533
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1534
    """Check the node time.
1535

1536
    @type ninfo: L{objects.Node}
1537
    @param ninfo: the node to check
1538
    @param nresult: the remote results for the node
1539
    @param vg_name: the configured VG name
1540

1541
    """
1542
    if vg_name is None:
1543
      return
1544

    
1545
    node = ninfo.name
1546
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1547

    
1548
    # checks vg existence and size > 20G
1549
    vglist = nresult.get(constants.NV_VGLIST, None)
1550
    test = not vglist
1551
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1552
    if not test:
1553
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1554
                                            constants.MIN_VG_SIZE)
1555
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1556

    
1557
    # check pv names
1558
    pvlist = nresult.get(constants.NV_PVLIST, None)
1559
    test = pvlist is None
1560
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1561
    if not test:
1562
      # check that ':' is not present in PV names, since it's a
1563
      # special character for lvcreate (denotes the range of PEs to
1564
      # use on the PV)
1565
      for _, pvname, owner_vg in pvlist:
1566
        test = ":" in pvname
1567
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1568
                 " '%s' of VG '%s'", pvname, owner_vg)
1569

    
1570
  def _VerifyNodeNetwork(self, ninfo, nresult):
1571
    """Check the node time.
1572

1573
    @type ninfo: L{objects.Node}
1574
    @param ninfo: the node to check
1575
    @param nresult: the remote results for the node
1576

1577
    """
1578
    node = ninfo.name
1579
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1580

    
1581
    test = constants.NV_NODELIST not in nresult
1582
    _ErrorIf(test, self.ENODESSH, node,
1583
             "node hasn't returned node ssh connectivity data")
1584
    if not test:
1585
      if nresult[constants.NV_NODELIST]:
1586
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1587
          _ErrorIf(True, self.ENODESSH, node,
1588
                   "ssh communication with node '%s': %s", a_node, a_msg)
1589

    
1590
    test = constants.NV_NODENETTEST not in nresult
1591
    _ErrorIf(test, self.ENODENET, node,
1592
             "node hasn't returned node tcp connectivity data")
1593
    if not test:
1594
      if nresult[constants.NV_NODENETTEST]:
1595
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1596
        for anode in nlist:
1597
          _ErrorIf(True, self.ENODENET, node,
1598
                   "tcp communication with node '%s': %s",
1599
                   anode, nresult[constants.NV_NODENETTEST][anode])
1600

    
1601
    test = constants.NV_MASTERIP not in nresult
1602
    _ErrorIf(test, self.ENODENET, node,
1603
             "node hasn't returned node master IP reachability data")
1604
    if not test:
1605
      if not nresult[constants.NV_MASTERIP]:
1606
        if node == self.master_node:
1607
          msg = "the master node cannot reach the master IP (not configured?)"
1608
        else:
1609
          msg = "cannot reach the master IP"
1610
        _ErrorIf(True, self.ENODENET, node, msg)
1611

    
1612
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1613
                      diskstatus):
1614
    """Verify an instance.
1615

1616
    This function checks to see if the required block devices are
1617
    available on the instance's node.
1618

1619
    """
1620
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1621
    node_current = instanceconfig.primary_node
1622

    
1623
    node_vol_should = {}
1624
    instanceconfig.MapLVsByNode(node_vol_should)
1625

    
1626
    for node in node_vol_should:
1627
      n_img = node_image[node]
1628
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1629
        # ignore missing volumes on offline or broken nodes
1630
        continue
1631
      for volume in node_vol_should[node]:
1632
        test = volume not in n_img.volumes
1633
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1634
                 "volume %s missing on node %s", volume, node)
1635

    
1636
    if instanceconfig.admin_up:
1637
      pri_img = node_image[node_current]
1638
      test = instance not in pri_img.instances and not pri_img.offline
1639
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1640
               "instance not running on its primary node %s",
1641
               node_current)
1642

    
1643
    for node, n_img in node_image.items():
1644
      if node != node_current:
1645
        test = instance in n_img.instances
1646
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1647
                 "instance should not run on node %s", node)
1648

    
1649
    diskdata = [(nname, success, status, idx)
1650
                for (nname, disks) in diskstatus.items()
1651
                for idx, (success, status) in enumerate(disks)]
1652

    
1653
    for nname, success, bdev_status, idx in diskdata:
1654
      # the 'ghost node' construction in Exec() ensures that we have a
1655
      # node here
1656
      snode = node_image[nname]
1657
      bad_snode = snode.ghost or snode.offline
1658
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1659
               self.EINSTANCEFAULTYDISK, instance,
1660
               "couldn't retrieve status for disk/%s on %s: %s",
1661
               idx, nname, bdev_status)
1662
      _ErrorIf((instanceconfig.admin_up and success and
1663
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1664
               self.EINSTANCEFAULTYDISK, instance,
1665
               "disk/%s on %s is faulty", idx, nname)
1666

    
1667
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1668
    """Verify if there are any unknown volumes in the cluster.
1669

1670
    The .os, .swap and backup volumes are ignored. All other volumes are
1671
    reported as unknown.
1672

1673
    @type reserved: L{ganeti.utils.FieldSet}
1674
    @param reserved: a FieldSet of reserved volume names
1675

1676
    """
1677
    for node, n_img in node_image.items():
1678
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1679
        # skip non-healthy nodes
1680
        continue
1681
      for volume in n_img.volumes:
1682
        test = ((node not in node_vol_should or
1683
                volume not in node_vol_should[node]) and
1684
                not reserved.Matches(volume))
1685
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1686
                      "volume %s is unknown", volume)
1687

    
1688
  def _VerifyOrphanInstances(self, instancelist, node_image):
1689
    """Verify the list of running instances.
1690

1691
    This checks what instances are running but unknown to the cluster.
1692

1693
    """
1694
    for node, n_img in node_image.items():
1695
      for o_inst in n_img.instances:
1696
        test = o_inst not in instancelist
1697
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1698
                      "instance %s on node %s should not exist", o_inst, node)
1699

    
1700
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1701
    """Verify N+1 Memory Resilience.
1702

1703
    Check that if one single node dies we can still start all the
1704
    instances it was primary for.
1705

1706
    """
1707
    cluster_info = self.cfg.GetClusterInfo()
1708
    for node, n_img in node_image.items():
1709
      # This code checks that every node which is now listed as
1710
      # secondary has enough memory to host all instances it is
1711
      # supposed to should a single other node in the cluster fail.
1712
      # FIXME: not ready for failover to an arbitrary node
1713
      # FIXME: does not support file-backed instances
1714
      # WARNING: we currently take into account down instances as well
1715
      # as up ones, considering that even if they're down someone
1716
      # might want to start them even in the event of a node failure.
1717
      if n_img.offline:
1718
        # we're skipping offline nodes from the N+1 warning, since
1719
        # most likely we don't have good memory infromation from them;
1720
        # we already list instances living on such nodes, and that's
1721
        # enough warning
1722
        continue
1723
      for prinode, instances in n_img.sbp.items():
1724
        needed_mem = 0
1725
        for instance in instances:
1726
          bep = cluster_info.FillBE(instance_cfg[instance])
1727
          if bep[constants.BE_AUTO_BALANCE]:
1728
            needed_mem += bep[constants.BE_MEMORY]
1729
        test = n_img.mfree < needed_mem
1730
        self._ErrorIf(test, self.ENODEN1, node,
1731
                      "not enough memory to accomodate instance failovers"
1732
                      " should node %s fail (%dMiB needed, %dMiB available)",
1733
                      prinode, needed_mem, n_img.mfree)
1734

    
1735
  @classmethod
1736
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1737
                   (files_all, files_all_opt, files_mc, files_vm)):
1738
    """Verifies file checksums collected from all nodes.
1739

1740
    @param errorif: Callback for reporting errors
1741
    @param nodeinfo: List of L{objects.Node} objects
1742
    @param master_node: Name of master node
1743
    @param all_nvinfo: RPC results
1744

1745
    """
1746
    node_names = frozenset(node.name for node in nodeinfo)
1747

    
1748
    assert master_node in node_names
1749
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1750
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1751
           "Found file listed in more than one file list"
1752

    
1753
    # Define functions determining which nodes to consider for a file
1754
    file2nodefn = dict([(filename, fn)
1755
      for (files, fn) in [(files_all, None),
1756
                          (files_all_opt, None),
1757
                          (files_mc, lambda node: (node.master_candidate or
1758
                                                   node.name == master_node)),
1759
                          (files_vm, lambda node: node.vm_capable)]
1760
      for filename in files])
1761

    
1762
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1763

    
1764
    for node in nodeinfo:
1765
      nresult = all_nvinfo[node.name]
1766

    
1767
      if nresult.fail_msg or not nresult.payload:
1768
        node_files = None
1769
      else:
1770
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
1771

    
1772
      test = not (node_files and isinstance(node_files, dict))
1773
      errorif(test, cls.ENODEFILECHECK, node.name,
1774
              "Node did not return file checksum data")
1775
      if test:
1776
        continue
1777

    
1778
      for (filename, checksum) in node_files.items():
1779
        # Check if the file should be considered for a node
1780
        fn = file2nodefn[filename]
1781
        if fn is None or fn(node):
1782
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
1783

    
1784
    for (filename, checksums) in fileinfo.items():
1785
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1786

    
1787
      # Nodes having the file
1788
      with_file = frozenset(node_name
1789
                            for nodes in fileinfo[filename].values()
1790
                            for node_name in nodes)
1791

    
1792
      # Nodes missing file
1793
      missing_file = node_names - with_file
1794

    
1795
      if filename in files_all_opt:
1796
        # All or no nodes
1797
        errorif(missing_file and missing_file != node_names,
1798
                cls.ECLUSTERFILECHECK, None,
1799
                "File %s is optional, but it must exist on all or no nodes (not"
1800
                " found on %s)",
1801
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
1802
      else:
1803
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
1804
                "File %s is missing from node(s) %s", filename,
1805
                utils.CommaJoin(utils.NiceSort(missing_file)))
1806

    
1807
      # See if there are multiple versions of the file
1808
      test = len(checksums) > 1
1809
      if test:
1810
        variants = ["variant %s on %s" %
1811
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
1812
                    for (idx, (checksum, nodes)) in
1813
                      enumerate(sorted(checksums.items()))]
1814
      else:
1815
        variants = []
1816

    
1817
      errorif(test, cls.ECLUSTERFILECHECK, None,
1818
              "File %s found with %s different checksums (%s)",
1819
              filename, len(checksums), "; ".join(variants))
1820

    
1821
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1822
                      drbd_map):
1823
    """Verifies and the node DRBD status.
1824

1825
    @type ninfo: L{objects.Node}
1826
    @param ninfo: the node to check
1827
    @param nresult: the remote results for the node
1828
    @param instanceinfo: the dict of instances
1829
    @param drbd_helper: the configured DRBD usermode helper
1830
    @param drbd_map: the DRBD map as returned by
1831
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1832

1833
    """
1834
    node = ninfo.name
1835
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1836

    
1837
    if drbd_helper:
1838
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1839
      test = (helper_result == None)
1840
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1841
               "no drbd usermode helper returned")
1842
      if helper_result:
1843
        status, payload = helper_result
1844
        test = not status
1845
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1846
                 "drbd usermode helper check unsuccessful: %s", payload)
1847
        test = status and (payload != drbd_helper)
1848
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1849
                 "wrong drbd usermode helper: %s", payload)
1850

    
1851
    # compute the DRBD minors
1852
    node_drbd = {}
1853
    for minor, instance in drbd_map[node].items():
1854
      test = instance not in instanceinfo
1855
      _ErrorIf(test, self.ECLUSTERCFG, None,
1856
               "ghost instance '%s' in temporary DRBD map", instance)
1857
        # ghost instance should not be running, but otherwise we
1858
        # don't give double warnings (both ghost instance and
1859
        # unallocated minor in use)
1860
      if test:
1861
        node_drbd[minor] = (instance, False)
1862
      else:
1863
        instance = instanceinfo[instance]
1864
        node_drbd[minor] = (instance.name, instance.admin_up)
1865

    
1866
    # and now check them
1867
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1868
    test = not isinstance(used_minors, (tuple, list))
1869
    _ErrorIf(test, self.ENODEDRBD, node,
1870
             "cannot parse drbd status file: %s", str(used_minors))
1871
    if test:
1872
      # we cannot check drbd status
1873
      return
1874

    
1875
    for minor, (iname, must_exist) in node_drbd.items():
1876
      test = minor not in used_minors and must_exist
1877
      _ErrorIf(test, self.ENODEDRBD, node,
1878
               "drbd minor %d of instance %s is not active", minor, iname)
1879
    for minor in used_minors:
1880
      test = minor not in node_drbd
1881
      _ErrorIf(test, self.ENODEDRBD, node,
1882
               "unallocated drbd minor %d is in use", minor)
1883

    
1884
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1885
    """Builds the node OS structures.
1886

1887
    @type ninfo: L{objects.Node}
1888
    @param ninfo: the node to check
1889
    @param nresult: the remote results for the node
1890
    @param nimg: the node image object
1891

1892
    """
1893
    node = ninfo.name
1894
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1895

    
1896
    remote_os = nresult.get(constants.NV_OSLIST, None)
1897
    test = (not isinstance(remote_os, list) or
1898
            not compat.all(isinstance(v, list) and len(v) == 7
1899
                           for v in remote_os))
1900

    
1901
    _ErrorIf(test, self.ENODEOS, node,
1902
             "node hasn't returned valid OS data")
1903

    
1904
    nimg.os_fail = test
1905

    
1906
    if test:
1907
      return
1908

    
1909
    os_dict = {}
1910

    
1911
    for (name, os_path, status, diagnose,
1912
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1913

    
1914
      if name not in os_dict:
1915
        os_dict[name] = []
1916

    
1917
      # parameters is a list of lists instead of list of tuples due to
1918
      # JSON lacking a real tuple type, fix it:
1919
      parameters = [tuple(v) for v in parameters]
1920
      os_dict[name].append((os_path, status, diagnose,
1921
                            set(variants), set(parameters), set(api_ver)))
1922

    
1923
    nimg.oslist = os_dict
1924

    
1925
  def _VerifyNodeOS(self, ninfo, nimg, base):
1926
    """Verifies the node OS list.
1927

1928
    @type ninfo: L{objects.Node}
1929
    @param ninfo: the node to check
1930
    @param nimg: the node image object
1931
    @param base: the 'template' node we match against (e.g. from the master)
1932

1933
    """
1934
    node = ninfo.name
1935
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1936

    
1937
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1938

    
1939
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
1940
    for os_name, os_data in nimg.oslist.items():
1941
      assert os_data, "Empty OS status for OS %s?!" % os_name
1942
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1943
      _ErrorIf(not f_status, self.ENODEOS, node,
1944
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1945
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1946
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1947
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1948
      # this will catched in backend too
1949
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1950
               and not f_var, self.ENODEOS, node,
1951
               "OS %s with API at least %d does not declare any variant",
1952
               os_name, constants.OS_API_V15)
1953
      # comparisons with the 'base' image
1954
      test = os_name not in base.oslist
1955
      _ErrorIf(test, self.ENODEOS, node,
1956
               "Extra OS %s not present on reference node (%s)",
1957
               os_name, base.name)
1958
      if test:
1959
        continue
1960
      assert base.oslist[os_name], "Base node has empty OS status?"
1961
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1962
      if not b_status:
1963
        # base OS is invalid, skipping
1964
        continue
1965
      for kind, a, b in [("API version", f_api, b_api),
1966
                         ("variants list", f_var, b_var),
1967
                         ("parameters", beautify_params(f_param),
1968
                          beautify_params(b_param))]:
1969
        _ErrorIf(a != b, self.ENODEOS, node,
1970
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
1971
                 kind, os_name, base.name,
1972
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
1973

    
1974
    # check any missing OSes
1975
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1976
    _ErrorIf(missing, self.ENODEOS, node,
1977
             "OSes present on reference node %s but missing on this node: %s",
1978
             base.name, utils.CommaJoin(missing))
1979

    
1980
  def _VerifyOob(self, ninfo, nresult):
1981
    """Verifies out of band functionality of a node.
1982

1983
    @type ninfo: L{objects.Node}
1984
    @param ninfo: the node to check
1985
    @param nresult: the remote results for the node
1986

1987
    """
1988
    node = ninfo.name
1989
    # We just have to verify the paths on master and/or master candidates
1990
    # as the oob helper is invoked on the master
1991
    if ((ninfo.master_candidate or ninfo.master_capable) and
1992
        constants.NV_OOB_PATHS in nresult):
1993
      for path_result in nresult[constants.NV_OOB_PATHS]:
1994
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1995

    
1996
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1997
    """Verifies and updates the node volume data.
1998

1999
    This function will update a L{NodeImage}'s internal structures
2000
    with data from the remote call.
2001

2002
    @type ninfo: L{objects.Node}
2003
    @param ninfo: the node to check
2004
    @param nresult: the remote results for the node
2005
    @param nimg: the node image object
2006
    @param vg_name: the configured VG name
2007

2008
    """
2009
    node = ninfo.name
2010
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2011

    
2012
    nimg.lvm_fail = True
2013
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2014
    if vg_name is None:
2015
      pass
2016
    elif isinstance(lvdata, basestring):
2017
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2018
               utils.SafeEncode(lvdata))
2019
    elif not isinstance(lvdata, dict):
2020
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2021
    else:
2022
      nimg.volumes = lvdata
2023
      nimg.lvm_fail = False
2024

    
2025
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2026
    """Verifies and updates the node instance list.
2027

2028
    If the listing was successful, then updates this node's instance
2029
    list. Otherwise, it marks the RPC call as failed for the instance
2030
    list key.
2031

2032
    @type ninfo: L{objects.Node}
2033
    @param ninfo: the node to check
2034
    @param nresult: the remote results for the node
2035
    @param nimg: the node image object
2036

2037
    """
2038
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2039
    test = not isinstance(idata, list)
2040
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2041
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2042
    if test:
2043
      nimg.hyp_fail = True
2044
    else:
2045
      nimg.instances = idata
2046

    
2047
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2048
    """Verifies and computes a node information map
2049

2050
    @type ninfo: L{objects.Node}
2051
    @param ninfo: the node to check
2052
    @param nresult: the remote results for the node
2053
    @param nimg: the node image object
2054
    @param vg_name: the configured VG name
2055

2056
    """
2057
    node = ninfo.name
2058
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2059

    
2060
    # try to read free memory (from the hypervisor)
2061
    hv_info = nresult.get(constants.NV_HVINFO, None)
2062
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2063
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2064
    if not test:
2065
      try:
2066
        nimg.mfree = int(hv_info["memory_free"])
2067
      except (ValueError, TypeError):
2068
        _ErrorIf(True, self.ENODERPC, node,
2069
                 "node returned invalid nodeinfo, check hypervisor")
2070

    
2071
    # FIXME: devise a free space model for file based instances as well
2072
    if vg_name is not None:
2073
      test = (constants.NV_VGLIST not in nresult or
2074
              vg_name not in nresult[constants.NV_VGLIST])
2075
      _ErrorIf(test, self.ENODELVM, node,
2076
               "node didn't return data for the volume group '%s'"
2077
               " - it is either missing or broken", vg_name)
2078
      if not test:
2079
        try:
2080
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2081
        except (ValueError, TypeError):
2082
          _ErrorIf(True, self.ENODERPC, node,
2083
                   "node returned invalid LVM info, check LVM status")
2084

    
2085
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2086
    """Gets per-disk status information for all instances.
2087

2088
    @type nodelist: list of strings
2089
    @param nodelist: Node names
2090
    @type node_image: dict of (name, L{objects.Node})
2091
    @param node_image: Node objects
2092
    @type instanceinfo: dict of (name, L{objects.Instance})
2093
    @param instanceinfo: Instance objects
2094
    @rtype: {instance: {node: [(succes, payload)]}}
2095
    @return: a dictionary of per-instance dictionaries with nodes as
2096
        keys and disk information as values; the disk information is a
2097
        list of tuples (success, payload)
2098

2099
    """
2100
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2101

    
2102
    node_disks = {}
2103
    node_disks_devonly = {}
2104
    diskless_instances = set()
2105
    diskless = constants.DT_DISKLESS
2106

    
2107
    for nname in nodelist:
2108
      node_instances = list(itertools.chain(node_image[nname].pinst,
2109
                                            node_image[nname].sinst))
2110
      diskless_instances.update(inst for inst in node_instances
2111
                                if instanceinfo[inst].disk_template == diskless)
2112
      disks = [(inst, disk)
2113
               for inst in node_instances
2114
               for disk in instanceinfo[inst].disks]
2115

    
2116
      if not disks:
2117
        # No need to collect data
2118
        continue
2119

    
2120
      node_disks[nname] = disks
2121

    
2122
      # Creating copies as SetDiskID below will modify the objects and that can
2123
      # lead to incorrect data returned from nodes
2124
      devonly = [dev.Copy() for (_, dev) in disks]
2125

    
2126
      for dev in devonly:
2127
        self.cfg.SetDiskID(dev, nname)
2128

    
2129
      node_disks_devonly[nname] = devonly
2130

    
2131
    assert len(node_disks) == len(node_disks_devonly)
2132

    
2133
    # Collect data from all nodes with disks
2134
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2135
                                                          node_disks_devonly)
2136

    
2137
    assert len(result) == len(node_disks)
2138

    
2139
    instdisk = {}
2140

    
2141
    for (nname, nres) in result.items():
2142
      disks = node_disks[nname]
2143

    
2144
      if nres.offline:
2145
        # No data from this node
2146
        data = len(disks) * [(False, "node offline")]
2147
      else:
2148
        msg = nres.fail_msg
2149
        _ErrorIf(msg, self.ENODERPC, nname,
2150
                 "while getting disk information: %s", msg)
2151
        if msg:
2152
          # No data from this node
2153
          data = len(disks) * [(False, msg)]
2154
        else:
2155
          data = []
2156
          for idx, i in enumerate(nres.payload):
2157
            if isinstance(i, (tuple, list)) and len(i) == 2:
2158
              data.append(i)
2159
            else:
2160
              logging.warning("Invalid result from node %s, entry %d: %s",
2161
                              nname, idx, i)
2162
              data.append((False, "Invalid result from the remote node"))
2163

    
2164
      for ((inst, _), status) in zip(disks, data):
2165
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2166

    
2167
    # Add empty entries for diskless instances.
2168
    for inst in diskless_instances:
2169
      assert inst not in instdisk
2170
      instdisk[inst] = {}
2171

    
2172
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2173
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2174
                      compat.all(isinstance(s, (tuple, list)) and
2175
                                 len(s) == 2 for s in statuses)
2176
                      for inst, nnames in instdisk.items()
2177
                      for nname, statuses in nnames.items())
2178
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2179

    
2180
    return instdisk
2181

    
2182
  def _VerifyHVP(self, hvp_data):
2183
    """Verifies locally the syntax of the hypervisor parameters.
2184

2185
    """
2186
    for item, hv_name, hv_params in hvp_data:
2187
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2188
             (item, hv_name))
2189
      try:
2190
        hv_class = hypervisor.GetHypervisor(hv_name)
2191
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2192
        hv_class.CheckParameterSyntax(hv_params)
2193
      except errors.GenericError, err:
2194
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2195

    
2196
  def BuildHooksEnv(self):
2197
    """Build hooks env.
2198

2199
    Cluster-Verify hooks just ran in the post phase and their failure makes
2200
    the output be logged in the verify output and the verification to fail.
2201

2202
    """
2203
    cfg = self.cfg
2204

    
2205
    env = {
2206
      "CLUSTER_TAGS": " ".join(cfg.GetClusterInfo().GetTags())
2207
      }
2208

    
2209
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2210
               for node in cfg.GetAllNodesInfo().values())
2211

    
2212
    return env
2213

    
2214
  def BuildHooksNodes(self):
2215
    """Build hooks nodes.
2216

2217
    """
2218
    return ([], self.cfg.GetNodeList())
2219

    
2220
  def Exec(self, feedback_fn):
2221
    """Verify integrity of cluster, performing various test on nodes.
2222

2223
    """
2224
    # This method has too many local variables. pylint: disable-msg=R0914
2225
    self.bad = False
2226
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2227
    verbose = self.op.verbose
2228
    self._feedback_fn = feedback_fn
2229
    feedback_fn("* Verifying global settings")
2230
    for msg in self.cfg.VerifyConfig():
2231
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2232

    
2233
    # Check the cluster certificates
2234
    for cert_filename in constants.ALL_CERT_FILES:
2235
      (errcode, msg) = _VerifyCertificate(cert_filename)
2236
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2237

    
2238
    vg_name = self.cfg.GetVGName()
2239
    drbd_helper = self.cfg.GetDRBDHelper()
2240
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2241
    cluster = self.cfg.GetClusterInfo()
2242
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2243
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2244
    nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2245
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2246
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2247
                        for iname in instancelist)
2248
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2249
    i_non_redundant = [] # Non redundant instances
2250
    i_non_a_balanced = [] # Non auto-balanced instances
2251
    n_offline = 0 # Count of offline nodes
2252
    n_drained = 0 # Count of nodes being drained
2253
    node_vol_should = {}
2254

    
2255
    # FIXME: verify OS list
2256

    
2257
    # File verification
2258
    filemap = _ComputeAncillaryFiles(cluster, False)
2259

    
2260
    # do local checksums
2261
    master_node = self.master_node = self.cfg.GetMasterNode()
2262
    master_ip = self.cfg.GetMasterIP()
2263

    
2264
    # Compute the set of hypervisor parameters
2265
    hvp_data = []
2266
    for hv_name in hypervisors:
2267
      hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2268
    for os_name, os_hvp in cluster.os_hvp.items():
2269
      for hv_name, hv_params in os_hvp.items():
2270
        if not hv_params:
2271
          continue
2272
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2273
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
2274
    # TODO: collapse identical parameter values in a single one
2275
    for instance in instanceinfo.values():
2276
      if not instance.hvparams:
2277
        continue
2278
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2279
                       cluster.FillHV(instance)))
2280
    # and verify them locally
2281
    self._VerifyHVP(hvp_data)
2282

    
2283
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2284
    node_verify_param = {
2285
      constants.NV_FILELIST:
2286
        utils.UniqueSequence(filename
2287
                             for files in filemap
2288
                             for filename in files),
2289
      constants.NV_NODELIST: [node.name for node in nodeinfo
2290
                              if not node.offline],
2291
      constants.NV_HYPERVISOR: hypervisors,
2292
      constants.NV_HVPARAMS: hvp_data,
2293
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2294
                                  node.secondary_ip) for node in nodeinfo
2295
                                 if not node.offline],
2296
      constants.NV_INSTANCELIST: hypervisors,
2297
      constants.NV_VERSION: None,
2298
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2299
      constants.NV_NODESETUP: None,
2300
      constants.NV_TIME: None,
2301
      constants.NV_MASTERIP: (master_node, master_ip),
2302
      constants.NV_OSLIST: None,
2303
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2304
      }
2305

    
2306
    if vg_name is not None:
2307
      node_verify_param[constants.NV_VGLIST] = None
2308
      node_verify_param[constants.NV_LVLIST] = vg_name
2309
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2310
      node_verify_param[constants.NV_DRBDLIST] = None
2311

    
2312
    if drbd_helper:
2313
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2314

    
2315
    # Build our expected cluster state
2316
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2317
                                                 name=node.name,
2318
                                                 vm_capable=node.vm_capable))
2319
                      for node in nodeinfo)
2320

    
2321
    # Gather OOB paths
2322
    oob_paths = []
2323
    for node in nodeinfo:
2324
      path = _SupportsOob(self.cfg, node)
2325
      if path and path not in oob_paths:
2326
        oob_paths.append(path)
2327

    
2328
    if oob_paths:
2329
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2330

    
2331
    for instance in instancelist:
2332
      inst_config = instanceinfo[instance]
2333

    
2334
      for nname in inst_config.all_nodes:
2335
        if nname not in node_image:
2336
          # ghost node
2337
          gnode = self.NodeImage(name=nname)
2338
          gnode.ghost = True
2339
          node_image[nname] = gnode
2340

    
2341
      inst_config.MapLVsByNode(node_vol_should)
2342

    
2343
      pnode = inst_config.primary_node
2344
      node_image[pnode].pinst.append(instance)
2345

    
2346
      for snode in inst_config.secondary_nodes:
2347
        nimg = node_image[snode]
2348
        nimg.sinst.append(instance)
2349
        if pnode not in nimg.sbp:
2350
          nimg.sbp[pnode] = []
2351
        nimg.sbp[pnode].append(instance)
2352

    
2353
    # At this point, we have the in-memory data structures complete,
2354
    # except for the runtime information, which we'll gather next
2355

    
2356
    # Due to the way our RPC system works, exact response times cannot be
2357
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2358
    # time before and after executing the request, we can at least have a time
2359
    # window.
2360
    nvinfo_starttime = time.time()
2361
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2362
                                           self.cfg.GetClusterName())
2363
    nvinfo_endtime = time.time()
2364

    
2365
    all_drbd_map = self.cfg.ComputeDRBDMap()
2366

    
2367
    feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2368
    instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2369

    
2370
    feedback_fn("* Verifying configuration file consistency")
2371
    self._VerifyFiles(_ErrorIf, nodeinfo, master_node, all_nvinfo, filemap)
2372

    
2373
    feedback_fn("* Verifying node status")
2374

    
2375
    refos_img = None
2376

    
2377
    for node_i in nodeinfo:
2378
      node = node_i.name
2379
      nimg = node_image[node]
2380

    
2381
      if node_i.offline:
2382
        if verbose:
2383
          feedback_fn("* Skipping offline node %s" % (node,))
2384
        n_offline += 1
2385
        continue
2386

    
2387
      if node == master_node:
2388
        ntype = "master"
2389
      elif node_i.master_candidate:
2390
        ntype = "master candidate"
2391
      elif node_i.drained:
2392
        ntype = "drained"
2393
        n_drained += 1
2394
      else:
2395
        ntype = "regular"
2396
      if verbose:
2397
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2398

    
2399
      msg = all_nvinfo[node].fail_msg
2400
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2401
      if msg:
2402
        nimg.rpc_fail = True
2403
        continue
2404

    
2405
      nresult = all_nvinfo[node].payload
2406

    
2407
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2408
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2409
      self._VerifyNodeNetwork(node_i, nresult)
2410
      self._VerifyOob(node_i, nresult)
2411

    
2412
      if nimg.vm_capable:
2413
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2414
        self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2415
                             all_drbd_map)
2416

    
2417
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2418
        self._UpdateNodeInstances(node_i, nresult, nimg)
2419
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2420
        self._UpdateNodeOS(node_i, nresult, nimg)
2421
        if not nimg.os_fail:
2422
          if refos_img is None:
2423
            refos_img = nimg
2424
          self._VerifyNodeOS(node_i, nimg, refos_img)
2425

    
2426
    feedback_fn("* Verifying instance status")
2427
    for instance in instancelist:
2428
      if verbose:
2429
        feedback_fn("* Verifying instance %s" % instance)
2430
      inst_config = instanceinfo[instance]
2431
      self._VerifyInstance(instance, inst_config, node_image,
2432
                           instdisk[instance])
2433
      inst_nodes_offline = []
2434

    
2435
      pnode = inst_config.primary_node
2436
      pnode_img = node_image[pnode]
2437
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2438
               self.ENODERPC, pnode, "instance %s, connection to"
2439
               " primary node failed", instance)
2440

    
2441
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2442
               self.EINSTANCEBADNODE, instance,
2443
               "instance is marked as running and lives on offline node %s",
2444
               inst_config.primary_node)
2445

    
2446
      # If the instance is non-redundant we cannot survive losing its primary
2447
      # node, so we are not N+1 compliant. On the other hand we have no disk
2448
      # templates with more than one secondary so that situation is not well
2449
      # supported either.
2450
      # FIXME: does not support file-backed instances
2451
      if not inst_config.secondary_nodes:
2452
        i_non_redundant.append(instance)
2453

    
2454
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2455
               instance, "instance has multiple secondary nodes: %s",
2456
               utils.CommaJoin(inst_config.secondary_nodes),
2457
               code=self.ETYPE_WARNING)
2458

    
2459
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2460
        pnode = inst_config.primary_node
2461
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2462
        instance_groups = {}
2463

    
2464
        for node in instance_nodes:
2465
          instance_groups.setdefault(nodeinfo_byname[node].group,
2466
                                     []).append(node)
2467

    
2468
        pretty_list = [
2469
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2470
          # Sort so that we always list the primary node first.
2471
          for group, nodes in sorted(instance_groups.items(),
2472
                                     key=lambda (_, nodes): pnode in nodes,
2473
                                     reverse=True)]
2474

    
2475
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2476
                      instance, "instance has primary and secondary nodes in"
2477
                      " different groups: %s", utils.CommaJoin(pretty_list),
2478
                      code=self.ETYPE_WARNING)
2479

    
2480
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2481
        i_non_a_balanced.append(instance)
2482

    
2483
      for snode in inst_config.secondary_nodes:
2484
        s_img = node_image[snode]
2485
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2486
                 "instance %s, connection to secondary node failed", instance)
2487

    
2488
        if s_img.offline:
2489
          inst_nodes_offline.append(snode)
2490

    
2491
      # warn that the instance lives on offline nodes
2492
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2493
               "instance has offline secondary node(s) %s",
2494
               utils.CommaJoin(inst_nodes_offline))
2495
      # ... or ghost/non-vm_capable nodes
2496
      for node in inst_config.all_nodes:
2497
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2498
                 "instance lives on ghost node %s", node)
2499
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2500
                 instance, "instance lives on non-vm_capable node %s", node)
2501

    
2502
    feedback_fn("* Verifying orphan volumes")
2503
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2504
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2505

    
2506
    feedback_fn("* Verifying orphan instances")
2507
    self._VerifyOrphanInstances(instancelist, node_image)
2508

    
2509
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2510
      feedback_fn("* Verifying N+1 Memory redundancy")
2511
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2512

    
2513
    feedback_fn("* Other Notes")
2514
    if i_non_redundant:
2515
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2516
                  % len(i_non_redundant))
2517

    
2518
    if i_non_a_balanced:
2519
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2520
                  % len(i_non_a_balanced))
2521

    
2522
    if n_offline:
2523
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2524

    
2525
    if n_drained:
2526
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2527

    
2528
    return not self.bad
2529

    
2530
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2531
    """Analyze the post-hooks' result
2532

2533
    This method analyses the hook result, handles it, and sends some
2534
    nicely-formatted feedback back to the user.
2535

2536
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2537
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2538
    @param hooks_results: the results of the multi-node hooks rpc call
2539
    @param feedback_fn: function used send feedback back to the caller
2540
    @param lu_result: previous Exec result
2541
    @return: the new Exec result, based on the previous result
2542
        and hook results
2543

2544
    """
2545
    # We only really run POST phase hooks, and are only interested in
2546
    # their results
2547
    if phase == constants.HOOKS_PHASE_POST:
2548
      # Used to change hooks' output to proper indentation
2549
      feedback_fn("* Hooks Results")
2550
      assert hooks_results, "invalid result from hooks"
2551

    
2552
      for node_name in hooks_results:
2553
        res = hooks_results[node_name]
2554
        msg = res.fail_msg
2555
        test = msg and not res.offline
2556
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2557
                      "Communication failure in hooks execution: %s", msg)
2558
        if res.offline or msg:
2559
          # No need to investigate payload if node is offline or gave an error.
2560
          # override manually lu_result here as _ErrorIf only
2561
          # overrides self.bad
2562
          lu_result = 1
2563
          continue
2564
        for script, hkr, output in res.payload:
2565
          test = hkr == constants.HKR_FAIL
2566
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2567
                        "Script %s failed, output:", script)
2568
          if test:
2569
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2570
            feedback_fn("%s" % output)
2571
            lu_result = 0
2572

    
2573
      return lu_result
2574

    
2575

    
2576
class LUClusterVerifyDisks(NoHooksLU):
2577
  """Verifies the cluster disks status.
2578

2579
  """
2580
  REQ_BGL = False
2581

    
2582
  def ExpandNames(self):
2583
    self.needed_locks = {
2584
      locking.LEVEL_NODE: locking.ALL_SET,
2585
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2586
    }
2587
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2588

    
2589
  def Exec(self, feedback_fn):
2590
    """Verify integrity of cluster disks.
2591

2592
    @rtype: tuple of three items
2593
    @return: a tuple of (dict of node-to-node_error, list of instances
2594
        which need activate-disks, dict of instance: (node, volume) for
2595
        missing volumes
2596

2597
    """
2598
    result = res_nodes, res_instances, res_missing = {}, [], {}
2599

    
2600
    nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2601
    instances = self.cfg.GetAllInstancesInfo().values()
2602

    
2603
    nv_dict = {}
2604
    for inst in instances:
2605
      inst_lvs = {}
2606
      if not inst.admin_up:
2607
        continue
2608
      inst.MapLVsByNode(inst_lvs)
2609
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2610
      for node, vol_list in inst_lvs.iteritems():
2611
        for vol in vol_list:
2612
          nv_dict[(node, vol)] = inst
2613

    
2614
    if not nv_dict:
2615
      return result
2616

    
2617
    node_lvs = self.rpc.call_lv_list(nodes, [])
2618
    for node, node_res in node_lvs.items():
2619
      if node_res.offline:
2620
        continue
2621
      msg = node_res.fail_msg
2622
      if msg:
2623
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2624
        res_nodes[node] = msg
2625
        continue
2626

    
2627
      lvs = node_res.payload
2628
      for lv_name, (_, _, lv_online) in lvs.items():
2629
        inst = nv_dict.pop((node, lv_name), None)
2630
        if (not lv_online and inst is not None
2631
            and inst.name not in res_instances):
2632
          res_instances.append(inst.name)
2633

    
2634
    # any leftover items in nv_dict are missing LVs, let's arrange the
2635
    # data better
2636
    for key, inst in nv_dict.iteritems():
2637
      if inst.name not in res_missing:
2638
        res_missing[inst.name] = []
2639
      res_missing[inst.name].append(key)
2640

    
2641
    return result
2642

    
2643

    
2644
class LUClusterRepairDiskSizes(NoHooksLU):
2645
  """Verifies the cluster disks sizes.
2646

2647
  """
2648
  REQ_BGL = False
2649

    
2650
  def ExpandNames(self):
2651
    if self.op.instances:
2652
      self.wanted_names = []
2653
      for name in self.op.instances:
2654
        full_name = _ExpandInstanceName(self.cfg, name)
2655
        self.wanted_names.append(full_name)
2656
      self.needed_locks = {
2657
        locking.LEVEL_NODE: [],
2658
        locking.LEVEL_INSTANCE: self.wanted_names,
2659
        }
2660
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2661
    else:
2662
      self.wanted_names = None
2663
      self.needed_locks = {
2664
        locking.LEVEL_NODE: locking.ALL_SET,
2665
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2666
        }
2667
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2668

    
2669
  def DeclareLocks(self, level):
2670
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2671
      self._LockInstancesNodes(primary_only=True)
2672

    
2673
  def CheckPrereq(self):
2674
    """Check prerequisites.
2675

2676
    This only checks the optional instance list against the existing names.
2677

2678
    """
2679
    if self.wanted_names is None:
2680
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
2681

    
2682
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2683
                             in self.wanted_names]
2684

    
2685
  def _EnsureChildSizes(self, disk):
2686
    """Ensure children of the disk have the needed disk size.
2687

2688
    This is valid mainly for DRBD8 and fixes an issue where the
2689
    children have smaller disk size.
2690

2691
    @param disk: an L{ganeti.objects.Disk} object
2692

2693
    """
2694
    if disk.dev_type == constants.LD_DRBD8:
2695
      assert disk.children, "Empty children for DRBD8?"
2696
      fchild = disk.children[0]
2697
      mismatch = fchild.size < disk.size
2698
      if mismatch:
2699
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2700
                     fchild.size, disk.size)
2701
        fchild.size = disk.size
2702

    
2703
      # and we recurse on this child only, not on the metadev
2704
      return self._EnsureChildSizes(fchild) or mismatch
2705
    else:
2706
      return False
2707

    
2708
  def Exec(self, feedback_fn):
2709
    """Verify the size of cluster disks.
2710

2711
    """
2712
    # TODO: check child disks too
2713
    # TODO: check differences in size between primary/secondary nodes
2714
    per_node_disks = {}
2715
    for instance in self.wanted_instances:
2716
      pnode = instance.primary_node
2717
      if pnode not in per_node_disks:
2718
        per_node_disks[pnode] = []
2719
      for idx, disk in enumerate(instance.disks):
2720
        per_node_disks[pnode].append((instance, idx, disk))
2721

    
2722
    changed = []
2723
    for node, dskl in per_node_disks.items():
2724
      newl = [v[2].Copy() for v in dskl]
2725
      for dsk in newl:
2726
        self.cfg.SetDiskID(dsk, node)
2727
      result = self.rpc.call_blockdev_getsize(node, newl)
2728
      if result.fail_msg:
2729
        self.LogWarning("Failure in blockdev_getsize call to node"
2730
                        " %s, ignoring", node)
2731
        continue
2732
      if len(result.payload) != len(dskl):
2733
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
2734
                        " result.payload=%s", node, len(dskl), result.payload)
2735
        self.LogWarning("Invalid result from node %s, ignoring node results",
2736
                        node)
2737
        continue
2738
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
2739
        if size is None:
2740
          self.LogWarning("Disk %d of instance %s did not return size"
2741
                          " information, ignoring", idx, instance.name)
2742
          continue
2743
        if not isinstance(size, (int, long)):
2744
          self.LogWarning("Disk %d of instance %s did not return valid"
2745
                          " size information, ignoring", idx, instance.name)
2746
          continue
2747
        size = size >> 20
2748
        if size != disk.size:
2749
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2750
                       " correcting: recorded %d, actual %d", idx,
2751
                       instance.name, disk.size, size)
2752
          disk.size = size
2753
          self.cfg.Update(instance, feedback_fn)
2754
          changed.append((instance.name, idx, size))
2755
        if self._EnsureChildSizes(disk):
2756
          self.cfg.Update(instance, feedback_fn)
2757
          changed.append((instance.name, idx, disk.size))
2758
    return changed
2759

    
2760

    
2761
class LUClusterRename(LogicalUnit):
2762
  """Rename the cluster.
2763

2764
  """
2765
  HPATH = "cluster-rename"
2766
  HTYPE = constants.HTYPE_CLUSTER
2767

    
2768
  def BuildHooksEnv(self):
2769
    """Build hooks env.
2770

2771
    """
2772
    return {
2773
      "OP_TARGET": self.cfg.GetClusterName(),
2774
      "NEW_NAME": self.op.name,
2775
      }
2776

    
2777
  def BuildHooksNodes(self):
2778
    """Build hooks nodes.
2779

2780
    """
2781
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2782

    
2783
  def CheckPrereq(self):
2784
    """Verify that the passed name is a valid one.
2785

2786
    """
2787
    hostname = netutils.GetHostname(name=self.op.name,
2788
                                    family=self.cfg.GetPrimaryIPFamily())
2789

    
2790
    new_name = hostname.name
2791
    self.ip = new_ip = hostname.ip
2792
    old_name = self.cfg.GetClusterName()
2793
    old_ip = self.cfg.GetMasterIP()
2794
    if new_name == old_name and new_ip == old_ip:
2795
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2796
                                 " cluster has changed",
2797
                                 errors.ECODE_INVAL)
2798
    if new_ip != old_ip:
2799
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2800
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2801
                                   " reachable on the network" %
2802
                                   new_ip, errors.ECODE_NOTUNIQUE)
2803

    
2804
    self.op.name = new_name
2805

    
2806
  def Exec(self, feedback_fn):
2807
    """Rename the cluster.
2808

2809
    """
2810
    clustername = self.op.name
2811
    ip = self.ip
2812

    
2813
    # shutdown the master IP
2814
    master = self.cfg.GetMasterNode()
2815
    result = self.rpc.call_node_stop_master(master, False)
2816
    result.Raise("Could not disable the master role")
2817

    
2818
    try:
2819
      cluster = self.cfg.GetClusterInfo()
2820
      cluster.cluster_name = clustername
2821
      cluster.master_ip = ip
2822
      self.cfg.Update(cluster, feedback_fn)
2823

    
2824
      # update the known hosts file
2825
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2826
      node_list = self.cfg.GetOnlineNodeList()
2827
      try:
2828
        node_list.remove(master)
2829
      except ValueError:
2830
        pass
2831
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2832
    finally:
2833
      result = self.rpc.call_node_start_master(master, False, False)
2834
      msg = result.fail_msg
2835
      if msg:
2836
        self.LogWarning("Could not re-enable the master role on"
2837
                        " the master, please restart manually: %s", msg)
2838

    
2839
    return clustername
2840

    
2841

    
2842
class LUClusterSetParams(LogicalUnit):
2843
  """Change the parameters of the cluster.
2844

2845
  """
2846
  HPATH = "cluster-modify"
2847
  HTYPE = constants.HTYPE_CLUSTER
2848
  REQ_BGL = False
2849

    
2850
  def CheckArguments(self):
2851
    """Check parameters
2852

2853
    """
2854
    if self.op.uid_pool:
2855
      uidpool.CheckUidPool(self.op.uid_pool)
2856

    
2857
    if self.op.add_uids:
2858
      uidpool.CheckUidPool(self.op.add_uids)
2859

    
2860
    if self.op.remove_uids:
2861
      uidpool.CheckUidPool(self.op.remove_uids)
2862

    
2863
  def ExpandNames(self):
2864
    # FIXME: in the future maybe other cluster params won't require checking on
2865
    # all nodes to be modified.
2866
    self.needed_locks = {
2867
      locking.LEVEL_NODE: locking.ALL_SET,
2868
    }
2869
    self.share_locks[locking.LEVEL_NODE] = 1
2870

    
2871
  def BuildHooksEnv(self):
2872
    """Build hooks env.
2873

2874
    """
2875
    return {
2876
      "OP_TARGET": self.cfg.GetClusterName(),
2877
      "NEW_VG_NAME": self.op.vg_name,
2878
      }
2879

    
2880
  def BuildHooksNodes(self):
2881
    """Build hooks nodes.
2882

2883
    """
2884
    mn = self.cfg.GetMasterNode()
2885
    return ([mn], [mn])
2886

    
2887
  def CheckPrereq(self):
2888
    """Check prerequisites.
2889

2890
    This checks whether the given params don't conflict and
2891
    if the given volume group is valid.
2892

2893
    """
2894
    if self.op.vg_name is not None and not self.op.vg_name:
2895
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2896
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2897
                                   " instances exist", errors.ECODE_INVAL)
2898

    
2899
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2900
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2901
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2902
                                   " drbd-based instances exist",
2903
                                   errors.ECODE_INVAL)
2904

    
2905
    node_list = self.glm.list_owned(locking.LEVEL_NODE)
2906

    
2907
    # if vg_name not None, checks given volume group on all nodes
2908
    if self.op.vg_name:
2909
      vglist = self.rpc.call_vg_list(node_list)
2910
      for node in node_list:
2911
        msg = vglist[node].fail_msg
2912
        if msg:
2913
          # ignoring down node
2914
          self.LogWarning("Error while gathering data on node %s"
2915
                          " (ignoring node): %s", node, msg)
2916
          continue
2917
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2918
                                              self.op.vg_name,
2919
                                              constants.MIN_VG_SIZE)
2920
        if vgstatus:
2921
          raise errors.OpPrereqError("Error on node '%s': %s" %
2922
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2923

    
2924
    if self.op.drbd_helper:
2925
      # checks given drbd helper on all nodes
2926
      helpers = self.rpc.call_drbd_helper(node_list)
2927
      for node in node_list:
2928
        ninfo = self.cfg.GetNodeInfo(node)
2929
        if ninfo.offline:
2930
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2931
          continue
2932
        msg = helpers[node].fail_msg
2933
        if msg:
2934
          raise errors.OpPrereqError("Error checking drbd helper on node"
2935
                                     " '%s': %s" % (node, msg),
2936
                                     errors.ECODE_ENVIRON)
2937
        node_helper = helpers[node].payload
2938
        if node_helper != self.op.drbd_helper:
2939
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2940
                                     (node, node_helper), errors.ECODE_ENVIRON)
2941

    
2942
    self.cluster = cluster = self.cfg.GetClusterInfo()
2943
    # validate params changes
2944
    if self.op.beparams:
2945
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2946
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2947

    
2948
    if self.op.ndparams:
2949
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2950
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2951

    
2952
      # TODO: we need a more general way to handle resetting
2953
      # cluster-level parameters to default values
2954
      if self.new_ndparams["oob_program"] == "":
2955
        self.new_ndparams["oob_program"] = \
2956
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
2957

    
2958
    if self.op.nicparams:
2959
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2960
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2961
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2962
      nic_errors = []
2963

    
2964
      # check all instances for consistency
2965
      for instance in self.cfg.GetAllInstancesInfo().values():
2966
        for nic_idx, nic in enumerate(instance.nics):
2967
          params_copy = copy.deepcopy(nic.nicparams)
2968
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2969

    
2970
          # check parameter syntax
2971
          try:
2972
            objects.NIC.CheckParameterSyntax(params_filled)
2973
          except errors.ConfigurationError, err:
2974
            nic_errors.append("Instance %s, nic/%d: %s" %
2975
                              (instance.name, nic_idx, err))
2976

    
2977
          # if we're moving instances to routed, check that they have an ip
2978
          target_mode = params_filled[constants.NIC_MODE]
2979
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2980
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2981
                              (instance.name, nic_idx))
2982
      if nic_errors:
2983
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2984
                                   "\n".join(nic_errors))
2985

    
2986
    # hypervisor list/parameters
2987
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2988
    if self.op.hvparams:
2989
      for hv_name, hv_dict in self.op.hvparams.items():
2990
        if hv_name not in self.new_hvparams:
2991
          self.new_hvparams[hv_name] = hv_dict
2992
        else:
2993
          self.new_hvparams[hv_name].update(hv_dict)
2994

    
2995
    # os hypervisor parameters
2996
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2997
    if self.op.os_hvp:
2998
      for os_name, hvs in self.op.os_hvp.items():
2999
        if os_name not in self.new_os_hvp:
3000
          self.new_os_hvp[os_name] = hvs
3001
        else:
3002
          for hv_name, hv_dict in hvs.items():
3003
            if hv_name not in self.new_os_hvp[os_name]:
3004
              self.new_os_hvp[os_name][hv_name] = hv_dict
3005
            else:
3006
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3007

    
3008
    # os parameters
3009
    self.new_osp = objects.FillDict(cluster.osparams, {})
3010
    if self.op.osparams:
3011
      for os_name, osp in self.op.osparams.items():
3012
        if os_name not in self.new_osp:
3013
          self.new_osp[os_name] = {}
3014

    
3015
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3016
                                                  use_none=True)
3017

    
3018
        if not self.new_osp[os_name]:
3019
          # we removed all parameters
3020
          del self.new_osp[os_name]
3021
        else:
3022
          # check the parameter validity (remote check)
3023
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3024
                         os_name, self.new_osp[os_name])
3025

    
3026
    # changes to the hypervisor list
3027
    if self.op.enabled_hypervisors is not None:
3028
      self.hv_list = self.op.enabled_hypervisors
3029
      for hv in self.hv_list:
3030
        # if the hypervisor doesn't already exist in the cluster
3031
        # hvparams, we initialize it to empty, and then (in both
3032
        # cases) we make sure to fill the defaults, as we might not
3033
        # have a complete defaults list if the hypervisor wasn't
3034
        # enabled before
3035
        if hv not in new_hvp:
3036
          new_hvp[hv] = {}
3037
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3038
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3039
    else:
3040
      self.hv_list = cluster.enabled_hypervisors
3041

    
3042
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3043
      # either the enabled list has changed, or the parameters have, validate
3044
      for hv_name, hv_params in self.new_hvparams.items():
3045
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3046
            (self.op.enabled_hypervisors and
3047
             hv_name in self.op.enabled_hypervisors)):
3048
          # either this is a new hypervisor, or its parameters have changed
3049
          hv_class = hypervisor.GetHypervisor(hv_name)
3050
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3051
          hv_class.CheckParameterSyntax(hv_params)
3052
          _CheckHVParams(self, node_list, hv_name, hv_params)
3053

    
3054
    if self.op.os_hvp:
3055
      # no need to check any newly-enabled hypervisors, since the
3056
      # defaults have already been checked in the above code-block
3057
      for os_name, os_hvp in self.new_os_hvp.items():
3058
        for hv_name, hv_params in os_hvp.items():
3059
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3060
          # we need to fill in the new os_hvp on top of the actual hv_p
3061
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3062
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3063
          hv_class = hypervisor.GetHypervisor(hv_name)
3064
          hv_class.CheckParameterSyntax(new_osp)
3065
          _CheckHVParams(self, node_list, hv_name, new_osp)
3066

    
3067
    if self.op.default_iallocator:
3068
      alloc_script = utils.FindFile(self.op.default_iallocator,
3069
                                    constants.IALLOCATOR_SEARCH_PATH,
3070
                                    os.path.isfile)
3071
      if alloc_script is None:
3072
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3073
                                   " specified" % self.op.default_iallocator,
3074
                                   errors.ECODE_INVAL)
3075

    
3076
  def Exec(self, feedback_fn):
3077
    """Change the parameters of the cluster.
3078

3079
    """
3080
    if self.op.vg_name is not None:
3081
      new_volume = self.op.vg_name
3082
      if not new_volume:
3083
        new_volume = None
3084
      if new_volume != self.cfg.GetVGName():
3085
        self.cfg.SetVGName(new_volume)
3086
      else:
3087
        feedback_fn("Cluster LVM configuration already in desired"
3088
                    " state, not changing")
3089
    if self.op.drbd_helper is not None:
3090
      new_helper = self.op.drbd_helper
3091
      if not new_helper:
3092
        new_helper = None
3093
      if new_helper != self.cfg.GetDRBDHelper():
3094
        self.cfg.SetDRBDHelper(new_helper)
3095
      else:
3096
        feedback_fn("Cluster DRBD helper already in desired state,"
3097
                    " not changing")
3098
    if self.op.hvparams:
3099
      self.cluster.hvparams = self.new_hvparams
3100
    if self.op.os_hvp:
3101
      self.cluster.os_hvp = self.new_os_hvp
3102
    if self.op.enabled_hypervisors is not None:
3103
      self.cluster.hvparams = self.new_hvparams
3104
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3105
    if self.op.beparams:
3106
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3107
    if self.op.nicparams:
3108
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3109
    if self.op.osparams:
3110
      self.cluster.osparams = self.new_osp
3111
    if self.op.ndparams:
3112
      self.cluster.ndparams = self.new_ndparams
3113

    
3114
    if self.op.candidate_pool_size is not None:
3115
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3116
      # we need to update the pool size here, otherwise the save will fail
3117
      _AdjustCandidatePool(self, [])
3118

    
3119
    if self.op.maintain_node_health is not None:
3120
      self.cluster.maintain_node_health = self.op.maintain_node_health
3121

    
3122
    if self.op.prealloc_wipe_disks is not None:
3123
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3124

    
3125
    if self.op.add_uids is not None:
3126
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3127

    
3128
    if self.op.remove_uids is not None:
3129
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3130

    
3131
    if self.op.uid_pool is not None:
3132
      self.cluster.uid_pool = self.op.uid_pool
3133

    
3134
    if self.op.default_iallocator is not None:
3135
      self.cluster.default_iallocator = self.op.default_iallocator
3136

    
3137
    if self.op.reserved_lvs is not None:
3138
      self.cluster.reserved_lvs = self.op.reserved_lvs
3139

    
3140
    def helper_os(aname, mods, desc):
3141
      desc += " OS list"
3142
      lst = getattr(self.cluster, aname)
3143
      for key, val in mods:
3144
        if key == constants.DDM_ADD:
3145
          if val in lst:
3146
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3147
          else:
3148
            lst.append(val)
3149
        elif key == constants.DDM_REMOVE:
3150
          if val in lst:
3151
            lst.remove(val)
3152
          else:
3153
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3154
        else:
3155
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3156

    
3157
    if self.op.hidden_os:
3158
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3159

    
3160
    if self.op.blacklisted_os:
3161
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3162

    
3163
    if self.op.master_netdev:
3164
      master = self.cfg.GetMasterNode()
3165
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3166
                  self.cluster.master_netdev)
3167
      result = self.rpc.call_node_stop_master(master, False)
3168
      result.Raise("Could not disable the master ip")
3169
      feedback_fn("Changing master_netdev from %s to %s" %
3170
                  (self.cluster.master_netdev, self.op.master_netdev))
3171
      self.cluster.master_netdev = self.op.master_netdev
3172

    
3173
    self.cfg.Update(self.cluster, feedback_fn)
3174

    
3175
    if self.op.master_netdev:
3176
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3177
                  self.op.master_netdev)
3178
      result = self.rpc.call_node_start_master(master, False, False)
3179
      if result.fail_msg:
3180
        self.LogWarning("Could not re-enable the master ip on"
3181
                        " the master, please restart manually: %s",
3182
                        result.fail_msg)
3183

    
3184

    
3185
def _UploadHelper(lu, nodes, fname):
3186
  """Helper for uploading a file and showing warnings.
3187

3188
  """
3189
  if os.path.exists(fname):
3190
    result = lu.rpc.call_upload_file(nodes, fname)
3191
    for to_node, to_result in result.items():
3192
      msg = to_result.fail_msg
3193
      if msg:
3194
        msg = ("Copy of file %s to node %s failed: %s" %
3195
               (fname, to_node, msg))
3196
        lu.proc.LogWarning(msg)
3197

    
3198

    
3199
def _ComputeAncillaryFiles(cluster, redist):
3200
  """Compute files external to Ganeti which need to be consistent.
3201

3202
  @type redist: boolean
3203
  @param redist: Whether to include files which need to be redistributed
3204

3205
  """
3206
  # Compute files for all nodes
3207
  files_all = set([
3208
    constants.SSH_KNOWN_HOSTS_FILE,
3209
    constants.CONFD_HMAC_KEY,
3210
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3211
    ])
3212

    
3213
  if not redist:
3214
    files_all.update(constants.ALL_CERT_FILES)
3215
    files_all.update(ssconf.SimpleStore().GetFileList())
3216

    
3217
  if cluster.modify_etc_hosts:
3218
    files_all.add(constants.ETC_HOSTS)
3219

    
3220
  # Files which must either exist on all nodes or on none
3221
  files_all_opt = set([
3222
    constants.RAPI_USERS_FILE,
3223
    ])
3224

    
3225
  # Files which should only be on master candidates
3226
  files_mc = set()
3227
  if not redist:
3228
    files_mc.add(constants.CLUSTER_CONF_FILE)
3229

    
3230
  # Files which should only be on VM-capable nodes
3231
  files_vm = set(filename
3232
    for hv_name in cluster.enabled_hypervisors
3233
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3234

    
3235
  # Filenames must be unique
3236
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3237
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3238
         "Found file listed in more than one file list"
3239

    
3240
  return (files_all, files_all_opt, files_mc, files_vm)
3241

    
3242

    
3243
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3244
  """Distribute additional files which are part of the cluster configuration.
3245

3246
  ConfigWriter takes care of distributing the config and ssconf files, but
3247
  there are more files which should be distributed to all nodes. This function
3248
  makes sure those are copied.
3249

3250
  @param lu: calling logical unit
3251
  @param additional_nodes: list of nodes not in the config to distribute to
3252
  @type additional_vm: boolean
3253
  @param additional_vm: whether the additional nodes are vm-capable or not
3254

3255
  """
3256
  # Gather target nodes
3257
  cluster = lu.cfg.GetClusterInfo()
3258
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3259

    
3260
  online_nodes = lu.cfg.GetOnlineNodeList()
3261
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3262

    
3263
  if additional_nodes is not None:
3264
    online_nodes.extend(additional_nodes)
3265
    if additional_vm:
3266
      vm_nodes.extend(additional_nodes)
3267

    
3268
  # Never distribute to master node
3269
  for nodelist in [online_nodes, vm_nodes]:
3270
    if master_info.name in nodelist:
3271
      nodelist.remove(master_info.name)
3272

    
3273
  # Gather file lists
3274
  (files_all, files_all_opt, files_mc, files_vm) = \
3275
    _ComputeAncillaryFiles(cluster, True)
3276

    
3277
  # Never re-distribute configuration file from here
3278
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3279
              constants.CLUSTER_CONF_FILE in files_vm)
3280
  assert not files_mc, "Master candidates not handled in this function"
3281

    
3282
  filemap = [
3283
    (online_nodes, files_all),
3284
    (online_nodes, files_all_opt),
3285
    (vm_nodes, files_vm),
3286
    ]
3287

    
3288
  # Upload the files
3289
  for (node_list, files) in filemap:
3290
    for fname in files:
3291
      _UploadHelper(lu, node_list, fname)
3292

    
3293

    
3294
class LUClusterRedistConf(NoHooksLU):
3295
  """Force the redistribution of cluster configuration.
3296

3297
  This is a very simple LU.
3298

3299
  """
3300
  REQ_BGL = False
3301

    
3302
  def ExpandNames(self):
3303
    self.needed_locks = {
3304
      locking.LEVEL_NODE: locking.ALL_SET,
3305
    }
3306
    self.share_locks[locking.LEVEL_NODE] = 1
3307

    
3308
  def Exec(self, feedback_fn):
3309
    """Redistribute the configuration.
3310

3311
    """
3312
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3313
    _RedistributeAncillaryFiles(self)
3314

    
3315

    
3316
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3317
  """Sleep and poll for an instance's disk to sync.
3318

3319
  """
3320
  if not instance.disks or disks is not None and not disks:
3321
    return True
3322

    
3323
  disks = _ExpandCheckDisks(instance, disks)
3324

    
3325
  if not oneshot:
3326
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3327

    
3328
  node = instance.primary_node
3329

    
3330
  for dev in disks:
3331
    lu.cfg.SetDiskID(dev, node)
3332

    
3333
  # TODO: Convert to utils.Retry
3334

    
3335
  retries = 0
3336
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3337
  while True:
3338
    max_time = 0
3339
    done = True
3340
    cumul_degraded = False
3341
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3342
    msg = rstats.fail_msg
3343
    if msg:
3344
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3345
      retries += 1
3346
      if retries >= 10:
3347
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3348
                                 " aborting." % node)
3349
      time.sleep(6)
3350
      continue
3351
    rstats = rstats.payload
3352
    retries = 0
3353
    for i, mstat in enumerate(rstats):
3354
      if mstat is None:
3355
        lu.LogWarning("Can't compute data for node %s/%s",
3356
                           node, disks[i].iv_name)
3357
        continue
3358

    
3359
      cumul_degraded = (cumul_degraded or
3360
                        (mstat.is_degraded and mstat.sync_percent is None))
3361
      if mstat.sync_percent is not None:
3362
        done = False
3363
        if mstat.estimated_time is not None:
3364
          rem_time = ("%s remaining (estimated)" %
3365
                      utils.FormatSeconds(mstat.estimated_time))
3366
          max_time = mstat.estimated_time
3367
        else:
3368
          rem_time = "no time estimate"
3369
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3370
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3371

    
3372
    # if we're done but degraded, let's do a few small retries, to
3373
    # make sure we see a stable and not transient situation; therefore
3374
    # we force restart of the loop
3375
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3376
      logging.info("Degraded disks found, %d retries left", degr_retries)
3377
      degr_retries -= 1
3378
      time.sleep(1)
3379
      continue
3380

    
3381
    if done or oneshot:
3382
      break
3383

    
3384
    time.sleep(min(60, max_time))
3385

    
3386
  if done:
3387
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3388
  return not cumul_degraded
3389

    
3390

    
3391
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3392
  """Check that mirrors are not degraded.
3393

3394
  The ldisk parameter, if True, will change the test from the
3395
  is_degraded attribute (which represents overall non-ok status for
3396
  the device(s)) to the ldisk (representing the local storage status).
3397

3398
  """
3399
  lu.cfg.SetDiskID(dev, node)
3400

    
3401
  result = True
3402

    
3403
  if on_primary or dev.AssembleOnSecondary():
3404
    rstats = lu.rpc.call_blockdev_find(node, dev)
3405
    msg = rstats.fail_msg
3406
    if msg:
3407
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3408
      result = False
3409
    elif not rstats.payload:
3410
      lu.LogWarning("Can't find disk on node %s", node)
3411
      result = False
3412
    else:
3413
      if ldisk:
3414
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3415
      else:
3416
        result = result and not rstats.payload.is_degraded
3417

    
3418
  if dev.children:
3419
    for child in dev.children:
3420
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3421

    
3422
  return result
3423

    
3424

    
3425
class LUOobCommand(NoHooksLU):
3426
  """Logical unit for OOB handling.
3427

3428
  """
3429
  REG_BGL = False
3430
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3431

    
3432
  def CheckPrereq(self):
3433
    """Check prerequisites.
3434

3435
    This checks:
3436
     - the node exists in the configuration
3437
     - OOB is supported
3438

3439
    Any errors are signaled by raising errors.OpPrereqError.
3440

3441
    """
3442
    self.nodes = []
3443
    self.master_node = self.cfg.GetMasterNode()
3444

    
3445
    assert self.op.power_delay >= 0.0
3446

    
3447
    if self.op.node_names:
3448
      if (self.op.command in self._SKIP_MASTER and
3449
          self.master_node in self.op.node_names):
3450
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3451
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3452

    
3453
        if master_oob_handler:
3454
          additional_text = ("run '%s %s %s' if you want to operate on the"
3455
                             " master regardless") % (master_oob_handler,
3456
                                                      self.op.command,
3457
                                                      self.master_node)
3458
        else:
3459
          additional_text = "it does not support out-of-band operations"
3460

    
3461
        raise errors.OpPrereqError(("Operating on the master node %s is not"
3462
                                    " allowed for %s; %s") %
3463
                                   (self.master_node, self.op.command,
3464
                                    additional_text), errors.ECODE_INVAL)
3465
    else:
3466
      self.op.node_names = self.cfg.GetNodeList()
3467
      if self.op.command in self._SKIP_MASTER:
3468
        self.op.node_names.remove(self.master_node)
3469

    
3470
    if self.op.command in self._SKIP_MASTER:
3471
      assert self.master_node not in self.op.node_names
3472

    
3473
    for node_name in self.op.node_names:
3474
      node = self.cfg.GetNodeInfo(node_name)
3475

    
3476
      if node is None:
3477
        raise errors.OpPrereqError("Node %s not found" % node_name,
3478
                                   errors.ECODE_NOENT)
3479
      else:
3480
        self.nodes.append(node)
3481

    
3482
      if (not self.op.ignore_status and
3483
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3484
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3485
                                    " not marked offline") % node_name,
3486
                                   errors.ECODE_STATE)
3487

    
3488
  def ExpandNames(self):
3489
    """Gather locks we need.
3490

3491
    """
3492
    if self.op.node_names:
3493
      self.op.node_names = [_ExpandNodeName(self.cfg, name)
3494
                            for name in self.op.node_names]
3495
      lock_names = self.op.node_names
3496
    else:
3497
      lock_names = locking.ALL_SET
3498

    
3499
    self.needed_locks = {
3500
      locking.LEVEL_NODE: lock_names,
3501
      }
3502

    
3503
  def Exec(self, feedback_fn):
3504
    """Execute OOB and return result if we expect any.
3505

3506
    """
3507
    master_node = self.master_node
3508
    ret = []
3509

    
3510
    for idx, node in enumerate(self.nodes):
3511
      node_entry = [(constants.RS_NORMAL, node.name)]
3512
      ret.append(node_entry)
3513

    
3514
      oob_program = _SupportsOob(self.cfg, node)
3515

    
3516
      if not oob_program:
3517
        node_entry.append((constants.RS_UNAVAIL, None))
3518
        continue
3519

    
3520
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3521
                   self.op.command, oob_program, node.name)
3522
      result = self.rpc.call_run_oob(master_node, oob_program,
3523
                                     self.op.command, node.name,
3524
                                     self.op.timeout)
3525

    
3526
      if result.fail_msg:
3527
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3528
                        node.name, result.fail_msg)
3529
        node_entry.append((constants.RS_NODATA, None))
3530
      else:
3531
        try:
3532
          self._CheckPayload(result)
3533
        except errors.OpExecError, err:
3534
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
3535
                          node.name, err)
3536
          node_entry.append((constants.RS_NODATA, None))
3537
        else:
3538
          if self.op.command == constants.OOB_HEALTH:
3539
            # For health we should log important events
3540
            for item, status in result.payload:
3541
              if status in [constants.OOB_STATUS_WARNING,
3542
                            constants.OOB_STATUS_CRITICAL]:
3543
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
3544
                                item, node.name, status)
3545

    
3546
          if self.op.command == constants.OOB_POWER_ON:
3547
            node.powered = True
3548
          elif self.op.command == constants.OOB_POWER_OFF:
3549
            node.powered = False
3550
          elif self.op.command == constants.OOB_POWER_STATUS:
3551
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3552
            if powered != node.powered:
3553
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3554
                               " match actual power state (%s)"), node.powered,
3555
                              node.name, powered)
3556

    
3557
          # For configuration changing commands we should update the node
3558
          if self.op.command in (constants.OOB_POWER_ON,
3559
                                 constants.OOB_POWER_OFF):
3560
            self.cfg.Update(node, feedback_fn)
3561

    
3562
          node_entry.append((constants.RS_NORMAL, result.payload))
3563

    
3564
          if (self.op.command == constants.OOB_POWER_ON and
3565
              idx < len(self.nodes) - 1):
3566
            time.sleep(self.op.power_delay)
3567

    
3568
    return ret
3569

    
3570
  def _CheckPayload(self, result):
3571
    """Checks if the payload is valid.
3572

3573
    @param result: RPC result
3574
    @raises errors.OpExecError: If payload is not valid
3575

3576
    """
3577
    errs = []
3578
    if self.op.command == constants.OOB_HEALTH:
3579
      if not isinstance(result.payload, list):
3580
        errs.append("command 'health' is expected to return a list but got %s" %
3581
                    type(result.payload))
3582
      else:
3583
        for item, status in result.payload:
3584
          if status not in constants.OOB_STATUSES:
3585
            errs.append("health item '%s' has invalid status '%s'" %
3586
                        (item, status))
3587

    
3588
    if self.op.command == constants.OOB_POWER_STATUS:
3589
      if not isinstance(result.payload, dict):
3590
        errs.append("power-status is expected to return a dict but got %s" %
3591
                    type(result.payload))
3592

    
3593
    if self.op.command in [
3594
        constants.OOB_POWER_ON,
3595
        constants.OOB_POWER_OFF,
3596
        constants.OOB_POWER_CYCLE,
3597
        ]:
3598
      if result.payload is not None:
3599
        errs.append("%s is expected to not return payload but got '%s'" %
3600
                    (self.op.command, result.payload))
3601

    
3602
    if errs:
3603
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3604
                               utils.CommaJoin(errs))
3605

    
3606
class _OsQuery(_QueryBase):
3607
  FIELDS = query.OS_FIELDS
3608

    
3609
  def ExpandNames(self, lu):
3610
    # Lock all nodes in shared mode
3611
    # Temporary removal of locks, should be reverted later
3612
    # TODO: reintroduce locks when they are lighter-weight
3613
    lu.needed_locks = {}
3614
    #self.share_locks[locking.LEVEL_NODE] = 1
3615
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3616

    
3617
    # The following variables interact with _QueryBase._GetNames
3618
    if self.names:
3619
      self.wanted = self.names
3620
    else:
3621
      self.wanted = locking.ALL_SET
3622

    
3623
    self.do_locking = self.use_locking
3624

    
3625
  def DeclareLocks(self, lu, level):
3626
    pass
3627

    
3628
  @staticmethod
3629
  def _DiagnoseByOS(rlist):
3630
    """Remaps a per-node return list into an a per-os per-node dictionary
3631

3632
    @param rlist: a map with node names as keys and OS objects as values
3633

3634
    @rtype: dict
3635
    @return: a dictionary with osnames as keys and as value another
3636
        map, with nodes as keys and tuples of (path, status, diagnose,
3637
        variants, parameters, api_versions) as values, eg::
3638

3639
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3640
                                     (/srv/..., False, "invalid api")],
3641
                           "node2": [(/srv/..., True, "", [], [])]}
3642
          }
3643

3644
    """
3645
    all_os = {}
3646
    # we build here the list of nodes that didn't fail the RPC (at RPC
3647
    # level), so that nodes with a non-responding node daemon don't
3648
    # make all OSes invalid
3649
    good_nodes = [node_name for node_name in rlist
3650
                  if not rlist[node_name].fail_msg]
3651
    for node_name, nr in rlist.items():
3652
      if nr.fail_msg or not nr.payload:
3653
        continue
3654
      for (name, path, status, diagnose, variants,
3655
           params, api_versions) in nr.payload:
3656
        if name not in all_os:
3657
          # build a list of nodes for this os containing empty lists
3658
          # for each node in node_list
3659
          all_os[name] = {}
3660
          for nname in good_nodes:
3661
            all_os[name][nname] = []
3662
        # convert params from [name, help] to (name, help)
3663
        params = [tuple(v) for v in params]
3664
        all_os[name][node_name].append((path, status, diagnose,
3665
                                        variants, params, api_versions))
3666
    return all_os
3667

    
3668
  def _GetQueryData(self, lu):
3669
    """Computes the list of nodes and their attributes.
3670

3671
    """
3672
    # Locking is not used
3673
    assert not (compat.any(lu.glm.is_owned(level)
3674
                           for level in locking.LEVELS) or
3675
                self.do_locking or self.use_locking)
3676

    
3677
    valid_nodes = [node.name
3678
                   for node in lu.cfg.GetAllNodesInfo().values()
3679
                   if not node.offline and node.vm_capable]
3680
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3681
    cluster = lu.cfg.GetClusterInfo()
3682

    
3683
    data = {}
3684

    
3685
    for (os_name, os_data) in pol.items():
3686
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3687
                          hidden=(os_name in cluster.hidden_os),
3688
                          blacklisted=(os_name in cluster.blacklisted_os))
3689

    
3690
      variants = set()
3691
      parameters = set()
3692
      api_versions = set()
3693

    
3694
      for idx, osl in enumerate(os_data.values()):
3695
        info.valid = bool(info.valid and osl and osl[0][1])
3696
        if not info.valid:
3697
          break
3698

    
3699
        (node_variants, node_params, node_api) = osl[0][3:6]
3700
        if idx == 0:
3701
          # First entry
3702
          variants.update(node_variants)
3703
          parameters.update(node_params)
3704
          api_versions.update(node_api)
3705
        else:
3706
          # Filter out inconsistent values
3707
          variants.intersection_update(node_variants)
3708
          parameters.intersection_update(node_params)
3709
          api_versions.intersection_update(node_api)
3710

    
3711
      info.variants = list(variants)
3712
      info.parameters = list(parameters)
3713
      info.api_versions = list(api_versions)
3714

    
3715
      data[os_name] = info
3716

    
3717
    # Prepare data in requested order
3718
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3719
            if name in data]
3720

    
3721

    
3722
class LUOsDiagnose(NoHooksLU):
3723
  """Logical unit for OS diagnose/query.
3724

3725
  """
3726
  REQ_BGL = False
3727

    
3728
  @staticmethod
3729
  def _BuildFilter(fields, names):
3730
    """Builds a filter for querying OSes.
3731

3732
    """
3733
    name_filter = qlang.MakeSimpleFilter("name", names)
3734

    
3735
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3736
    # respective field is not requested
3737
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3738
                     for fname in ["hidden", "blacklisted"]
3739
                     if fname not in fields]
3740
    if "valid" not in fields:
3741
      status_filter.append([qlang.OP_TRUE, "valid"])
3742

    
3743
    if status_filter:
3744
      status_filter.insert(0, qlang.OP_AND)
3745
    else:
3746
      status_filter = None
3747

    
3748
    if name_filter and status_filter:
3749
      return [qlang.OP_AND, name_filter, status_filter]
3750
    elif name_filter:
3751
      return name_filter
3752
    else:
3753
      return status_filter
3754

    
3755
  def CheckArguments(self):
3756
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3757
                       self.op.output_fields, False)
3758

    
3759
  def ExpandNames(self):
3760
    self.oq.ExpandNames(self)
3761

    
3762
  def Exec(self, feedback_fn):
3763
    return self.oq.OldStyleQuery(self)
3764

    
3765

    
3766
class LUNodeRemove(LogicalUnit):
3767
  """Logical unit for removing a node.
3768

3769
  """
3770
  HPATH = "node-remove"
3771
  HTYPE = constants.HTYPE_NODE
3772

    
3773
  def BuildHooksEnv(self):
3774
    """Build hooks env.
3775

3776
    This doesn't run on the target node in the pre phase as a failed
3777
    node would then be impossible to remove.
3778

3779
    """
3780
    return {
3781
      "OP_TARGET": self.op.node_name,
3782
      "NODE_NAME": self.op.node_name,
3783
      }
3784

    
3785
  def BuildHooksNodes(self):
3786
    """Build hooks nodes.
3787

3788
    """
3789
    all_nodes = self.cfg.GetNodeList()
3790
    try:
3791
      all_nodes.remove(self.op.node_name)
3792
    except ValueError:
3793
      logging.warning("Node '%s', which is about to be removed, was not found"
3794
                      " in the list of all nodes", self.op.node_name)
3795
    return (all_nodes, all_nodes)
3796

    
3797
  def CheckPrereq(self):
3798
    """Check prerequisites.
3799

3800
    This checks:
3801
     - the node exists in the configuration
3802
     - it does not have primary or secondary instances
3803
     - it's not the master
3804

3805
    Any errors are signaled by raising errors.OpPrereqError.
3806

3807
    """
3808
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3809
    node = self.cfg.GetNodeInfo(self.op.node_name)
3810
    assert node is not None
3811

    
3812
    instance_list = self.cfg.GetInstanceList()
3813

    
3814
    masternode = self.cfg.GetMasterNode()
3815
    if node.name == masternode:
3816
      raise errors.OpPrereqError("Node is the master node, failover to another"
3817
                                 " node is required", errors.ECODE_INVAL)
3818

    
3819
    for instance_name in instance_list:
3820
      instance = self.cfg.GetInstanceInfo(instance_name)
3821
      if node.name in instance.all_nodes:
3822
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3823
                                   " please remove first" % instance_name,
3824
                                   errors.ECODE_INVAL)
3825
    self.op.node_name = node.name
3826
    self.node = node
3827

    
3828
  def Exec(self, feedback_fn):
3829
    """Removes the node from the cluster.
3830

3831
    """
3832
    node = self.node
3833
    logging.info("Stopping the node daemon and removing configs from node %s",
3834
                 node.name)
3835

    
3836
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3837

    
3838
    # Promote nodes to master candidate as needed
3839
    _AdjustCandidatePool(self, exceptions=[node.name])
3840
    self.context.RemoveNode(node.name)
3841

    
3842
    # Run post hooks on the node before it's removed
3843
    _RunPostHook(self, node.name)
3844

    
3845
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3846
    msg = result.fail_msg
3847
    if msg:
3848
      self.LogWarning("Errors encountered on the remote node while leaving"
3849
                      " the cluster: %s", msg)
3850

    
3851
    # Remove node from our /etc/hosts
3852
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3853
      master_node = self.cfg.GetMasterNode()
3854
      result = self.rpc.call_etc_hosts_modify(master_node,
3855
                                              constants.ETC_HOSTS_REMOVE,
3856
                                              node.name, None)
3857
      result.Raise("Can't update hosts file with new host data")
3858
      _RedistributeAncillaryFiles(self)
3859

    
3860

    
3861
class _NodeQuery(_QueryBase):
3862
  FIELDS = query.NODE_FIELDS
3863

    
3864
  def ExpandNames(self, lu):
3865
    lu.needed_locks = {}
3866
    lu.share_locks[locking.LEVEL_NODE] = 1
3867

    
3868
    if self.names:
3869
      self.wanted = _GetWantedNodes(lu, self.names)
3870
    else:
3871
      self.wanted = locking.ALL_SET
3872

    
3873
    self.do_locking = (self.use_locking and
3874
                       query.NQ_LIVE in self.requested_data)
3875

    
3876
    if self.do_locking:
3877
      # if we don't request only static fields, we need to lock the nodes
3878
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3879

    
3880
  def DeclareLocks(self, lu, level):
3881
    pass
3882

    
3883
  def _GetQueryData(self, lu):
3884
    """Computes the list of nodes and their attributes.
3885

3886
    """
3887
    all_info = lu.cfg.GetAllNodesInfo()
3888

    
3889
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3890

    
3891
    # Gather data as requested
3892
    if query.NQ_LIVE in self.requested_data:
3893
      # filter out non-vm_capable nodes
3894
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3895

    
3896
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3897
                                        lu.cfg.GetHypervisorType())
3898
      live_data = dict((name, nresult.payload)
3899
                       for (name, nresult) in node_data.items()
3900
                       if not nresult.fail_msg and nresult.payload)
3901
    else:
3902
      live_data = None
3903

    
3904
    if query.NQ_INST in self.requested_data:
3905
      node_to_primary = dict([(name, set()) for name in nodenames])
3906
      node_to_secondary = dict([(name, set()) for name in nodenames])
3907

    
3908
      inst_data = lu.cfg.GetAllInstancesInfo()
3909

    
3910
      for inst in inst_data.values():
3911
        if inst.primary_node in node_to_primary:
3912
          node_to_primary[inst.primary_node].add(inst.name)
3913
        for secnode in inst.secondary_nodes:
3914
          if secnode in node_to_secondary:
3915
            node_to_secondary[secnode].add(inst.name)
3916
    else:
3917
      node_to_primary = None
3918
      node_to_secondary = None
3919

    
3920
    if query.NQ_OOB in self.requested_data:
3921
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3922
                         for name, node in all_info.iteritems())
3923
    else:
3924
      oob_support = None
3925

    
3926
    if query.NQ_GROUP in self.requested_data:
3927
      groups = lu.cfg.GetAllNodeGroupsInfo()
3928
    else:
3929
      groups = {}
3930

    
3931
    return query.NodeQueryData([all_info[name] for name in nodenames],
3932
                               live_data, lu.cfg.GetMasterNode(),
3933
                               node_to_primary, node_to_secondary, groups,
3934
                               oob_support, lu.cfg.GetClusterInfo())
3935

    
3936

    
3937
class LUNodeQuery(NoHooksLU):
3938
  """Logical unit for querying nodes.
3939

3940
  """
3941
  # pylint: disable-msg=W0142
3942
  REQ_BGL = False
3943

    
3944
  def CheckArguments(self):
3945
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3946
                         self.op.output_fields, self.op.use_locking)
3947

    
3948
  def ExpandNames(self):
3949
    self.nq.ExpandNames(self)
3950

    
3951
  def Exec(self, feedback_fn):
3952
    return self.nq.OldStyleQuery(self)
3953

    
3954

    
3955
class LUNodeQueryvols(NoHooksLU):
3956
  """Logical unit for getting volumes on node(s).
3957

3958
  """
3959
  REQ_BGL = False
3960
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3961
  _FIELDS_STATIC = utils.FieldSet("node")
3962

    
3963
  def CheckArguments(self):
3964
    _CheckOutputFields(static=self._FIELDS_STATIC,
3965
                       dynamic=self._FIELDS_DYNAMIC,
3966
                       selected=self.op.output_fields)
3967

    
3968
  def ExpandNames(self):
3969
    self.needed_locks = {}
3970
    self.share_locks[locking.LEVEL_NODE] = 1
3971
    if not self.op.nodes:
3972
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3973
    else:
3974
      self.needed_locks[locking.LEVEL_NODE] = \
3975
        _GetWantedNodes(self, self.op.nodes)
3976

    
3977
  def Exec(self, feedback_fn):
3978
    """Computes the list of nodes and their attributes.
3979

3980
    """
3981
    nodenames = self.glm.list_owned(locking.LEVEL_NODE)
3982
    volumes = self.rpc.call_node_volumes(nodenames)
3983

    
3984
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3985
             in self.cfg.GetInstanceList()]
3986

    
3987
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3988

    
3989
    output = []
3990
    for node in nodenames:
3991
      nresult = volumes[node]
3992
      if nresult.offline:
3993
        continue
3994
      msg = nresult.fail_msg
3995
      if msg:
3996
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3997
        continue
3998

    
3999
      node_vols = nresult.payload[:]
4000
      node_vols.sort(key=lambda vol: vol['dev'])
4001

    
4002
      for vol in node_vols:
4003
        node_output = []
4004
        for field in self.op.output_fields:
4005
          if field == "node":
4006
            val = node
4007
          elif field == "phys":
4008
            val = vol['dev']
4009
          elif field == "vg":
4010
            val = vol['vg']
4011
          elif field == "name":
4012
            val = vol['name']
4013
          elif field == "size":
4014
            val = int(float(vol['size']))
4015
          elif field == "instance":
4016
            for inst in ilist:
4017
              if node not in lv_by_node[inst]:
4018
                continue
4019
              if vol['name'] in lv_by_node[inst][node]:
4020
                val = inst.name
4021
                break
4022
            else:
4023
              val = '-'
4024
          else:
4025
            raise errors.ParameterError(field)
4026
          node_output.append(str(val))
4027

    
4028
        output.append(node_output)
4029

    
4030
    return output
4031

    
4032

    
4033
class LUNodeQueryStorage(NoHooksLU):
4034
  """Logical unit for getting information on storage units on node(s).
4035

4036
  """
4037
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4038
  REQ_BGL = False
4039

    
4040
  def CheckArguments(self):
4041
    _CheckOutputFields(static=self._FIELDS_STATIC,
4042
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4043
                       selected=self.op.output_fields)
4044

    
4045
  def ExpandNames(self):
4046
    self.needed_locks = {}
4047
    self.share_locks[locking.LEVEL_NODE] = 1
4048

    
4049
    if self.op.nodes:
4050
      self.needed_locks[locking.LEVEL_NODE] = \
4051
        _GetWantedNodes(self, self.op.nodes)
4052
    else:
4053
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4054

    
4055
  def Exec(self, feedback_fn):
4056
    """Computes the list of nodes and their attributes.
4057

4058
    """
4059
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4060

    
4061
    # Always get name to sort by
4062
    if constants.SF_NAME in self.op.output_fields:
4063
      fields = self.op.output_fields[:]
4064
    else:
4065
      fields = [constants.SF_NAME] + self.op.output_fields
4066

    
4067
    # Never ask for node or type as it's only known to the LU
4068
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4069
      while extra in fields:
4070
        fields.remove(extra)
4071

    
4072
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4073
    name_idx = field_idx[constants.SF_NAME]
4074

    
4075
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4076
    data = self.rpc.call_storage_list(self.nodes,
4077
                                      self.op.storage_type, st_args,
4078
                                      self.op.name, fields)
4079

    
4080
    result = []
4081

    
4082
    for node in utils.NiceSort(self.nodes):
4083
      nresult = data[node]
4084
      if nresult.offline:
4085
        continue
4086

    
4087
      msg = nresult.fail_msg
4088
      if msg:
4089
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4090
        continue
4091

    
4092
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4093

    
4094
      for name in utils.NiceSort(rows.keys()):
4095
        row = rows[name]
4096

    
4097
        out = []
4098

    
4099
        for field in self.op.output_fields:
4100
          if field == constants.SF_NODE:
4101
            val = node
4102
          elif field == constants.SF_TYPE:
4103
            val = self.op.storage_type
4104
          elif field in field_idx:
4105
            val = row[field_idx[field]]
4106
          else:
4107
            raise errors.ParameterError(field)
4108

    
4109
          out.append(val)
4110

    
4111
        result.append(out)
4112

    
4113
    return result
4114

    
4115

    
4116
class _InstanceQuery(_QueryBase):
4117
  FIELDS = query.INSTANCE_FIELDS
4118

    
4119
  def ExpandNames(self, lu):
4120
    lu.needed_locks = {}
4121
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
4122
    lu.share_locks[locking.LEVEL_NODE] = 1
4123

    
4124
    if self.names:
4125
      self.wanted = _GetWantedInstances(lu, self.names)
4126
    else:
4127
      self.wanted = locking.ALL_SET
4128

    
4129
    self.do_locking = (self.use_locking and
4130
                       query.IQ_LIVE in self.requested_data)
4131
    if self.do_locking:
4132
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4133
      lu.needed_locks[locking.LEVEL_NODE] = []
4134
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4135

    
4136
  def DeclareLocks(self, lu, level):
4137
    if level == locking.LEVEL_NODE and self.do_locking:
4138
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
4139

    
4140
  def _GetQueryData(self, lu):
4141
    """Computes the list of instances and their attributes.
4142

4143
    """
4144
    cluster = lu.cfg.GetClusterInfo()
4145
    all_info = lu.cfg.GetAllInstancesInfo()
4146

    
4147
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4148

    
4149
    instance_list = [all_info[name] for name in instance_names]
4150
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4151
                                        for inst in instance_list)))
4152
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4153
    bad_nodes = []
4154
    offline_nodes = []
4155
    wrongnode_inst = set()
4156

    
4157
    # Gather data as requested
4158
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4159
      live_data = {}
4160
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4161
      for name in nodes:
4162
        result = node_data[name]
4163
        if result.offline:
4164
          # offline nodes will be in both lists
4165
          assert result.fail_msg
4166
          offline_nodes.append(name)
4167
        if result.fail_msg:
4168
          bad_nodes.append(name)
4169
        elif result.payload:
4170
          for inst in result.payload:
4171
            if inst in all_info:
4172
              if all_info[inst].primary_node == name:
4173
                live_data.update(result.payload)
4174
              else:
4175
                wrongnode_inst.add(inst)
4176
            else:
4177
              # orphan instance; we don't list it here as we don't
4178
              # handle this case yet in the output of instance listing
4179
              logging.warning("Orphan instance '%s' found on node %s",
4180
                              inst, name)
4181
        # else no instance is alive
4182
    else:
4183
      live_data = {}
4184

    
4185
    if query.IQ_DISKUSAGE in self.requested_data:
4186
      disk_usage = dict((inst.name,
4187
                         _ComputeDiskSize(inst.disk_template,
4188
                                          [{constants.IDISK_SIZE: disk.size}
4189
                                           for disk in inst.disks]))
4190
                        for inst in instance_list)
4191
    else:
4192
      disk_usage = None
4193

    
4194
    if query.IQ_CONSOLE in self.requested_data:
4195
      consinfo = {}
4196
      for inst in instance_list:
4197
        if inst.name in live_data:
4198
          # Instance is running
4199
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4200
        else:
4201
          consinfo[inst.name] = None
4202
      assert set(consinfo.keys()) == set(instance_names)
4203
    else:
4204
      consinfo = None
4205

    
4206
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4207
                                   disk_usage, offline_nodes, bad_nodes,
4208
                                   live_data, wrongnode_inst, consinfo)
4209

    
4210

    
4211
class LUQuery(NoHooksLU):
4212
  """Query for resources/items of a certain kind.
4213

4214
  """
4215
  # pylint: disable-msg=W0142
4216
  REQ_BGL = False
4217

    
4218
  def CheckArguments(self):
4219
    qcls = _GetQueryImplementation(self.op.what)
4220

    
4221
    self.impl = qcls(self.op.filter, self.op.fields, False)
4222

    
4223
  def ExpandNames(self):
4224
    self.impl.ExpandNames(self)
4225

    
4226
  def DeclareLocks(self, level):
4227
    self.impl.DeclareLocks(self, level)
4228

    
4229
  def Exec(self, feedback_fn):
4230
    return self.impl.NewStyleQuery(self)
4231

    
4232

    
4233
class LUQueryFields(NoHooksLU):
4234
  """Query for resources/items of a certain kind.
4235

4236
  """
4237
  # pylint: disable-msg=W0142
4238
  REQ_BGL = False
4239

    
4240
  def CheckArguments(self):
4241
    self.qcls = _GetQueryImplementation(self.op.what)
4242

    
4243
  def ExpandNames(self):
4244
    self.needed_locks = {}
4245

    
4246
  def Exec(self, feedback_fn):
4247
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4248

    
4249

    
4250
class LUNodeModifyStorage(NoHooksLU):
4251
  """Logical unit for modifying a storage volume on a node.
4252

4253
  """
4254
  REQ_BGL = False
4255

    
4256
  def CheckArguments(self):
4257
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4258

    
4259
    storage_type = self.op.storage_type
4260

    
4261
    try:
4262
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4263
    except KeyError:
4264
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4265
                                 " modified" % storage_type,
4266
                                 errors.ECODE_INVAL)
4267

    
4268
    diff = set(self.op.changes.keys()) - modifiable
4269
    if diff:
4270
      raise errors.OpPrereqError("The following fields can not be modified for"
4271
                                 " storage units of type '%s': %r" %
4272
                                 (storage_type, list(diff)),
4273
                                 errors.ECODE_INVAL)
4274

    
4275
  def ExpandNames(self):
4276
    self.needed_locks = {
4277
      locking.LEVEL_NODE: self.op.node_name,
4278
      }
4279

    
4280
  def Exec(self, feedback_fn):
4281
    """Computes the list of nodes and their attributes.
4282

4283
    """
4284
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4285
    result = self.rpc.call_storage_modify(self.op.node_name,
4286
                                          self.op.storage_type, st_args,
4287
                                          self.op.name, self.op.changes)
4288
    result.Raise("Failed to modify storage unit '%s' on %s" %
4289
                 (self.op.name, self.op.node_name))
4290

    
4291

    
4292
class LUNodeAdd(LogicalUnit):
4293
  """Logical unit for adding node to the cluster.
4294

4295
  """
4296
  HPATH = "node-add"
4297
  HTYPE = constants.HTYPE_NODE
4298
  _NFLAGS = ["master_capable", "vm_capable"]
4299

    
4300
  def CheckArguments(self):
4301
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4302
    # validate/normalize the node name
4303
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4304
                                         family=self.primary_ip_family)
4305
    self.op.node_name = self.hostname.name
4306

    
4307
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4308
      raise errors.OpPrereqError("Cannot readd the master node",
4309
                                 errors.ECODE_STATE)
4310

    
4311
    if self.op.readd and self.op.group:
4312
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4313
                                 " being readded", errors.ECODE_INVAL)
4314

    
4315
  def BuildHooksEnv(self):
4316
    """Build hooks env.
4317

4318
    This will run on all nodes before, and on all nodes + the new node after.
4319

4320
    """
4321
    return {
4322
      "OP_TARGET": self.op.node_name,
4323
      "NODE_NAME": self.op.node_name,
4324
      "NODE_PIP": self.op.primary_ip,
4325
      "NODE_SIP": self.op.secondary_ip,
4326
      "MASTER_CAPABLE": str(self.op.master_capable),
4327
      "VM_CAPABLE": str(self.op.vm_capable),
4328
      }
4329

    
4330
  def BuildHooksNodes(self):
4331
    """Build hooks nodes.
4332

4333
    """
4334
    # Exclude added node
4335
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4336
    post_nodes = pre_nodes + [self.op.node_name, ]
4337

    
4338
    return (pre_nodes, post_nodes)
4339

    
4340
  def CheckPrereq(self):
4341
    """Check prerequisites.
4342

4343
    This checks:
4344
     - the new node is not already in the config
4345
     - it is resolvable
4346
     - its parameters (single/dual homed) matches the cluster
4347

4348
    Any errors are signaled by raising errors.OpPrereqError.
4349

4350
    """
4351
    cfg = self.cfg
4352
    hostname = self.hostname
4353
    node = hostname.name
4354
    primary_ip = self.op.primary_ip = hostname.ip
4355
    if self.op.secondary_ip is None:
4356
      if self.primary_ip_family == netutils.IP6Address.family:
4357
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4358
                                   " IPv4 address must be given as secondary",
4359
                                   errors.ECODE_INVAL)
4360
      self.op.secondary_ip = primary_ip
4361

    
4362
    secondary_ip = self.op.secondary_ip
4363
    if not netutils.IP4Address.IsValid(secondary_ip):
4364
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4365
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4366

    
4367
    node_list = cfg.GetNodeList()
4368
    if not self.op.readd and node in node_list:
4369
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4370
                                 node, errors.ECODE_EXISTS)
4371
    elif self.op.readd and node not in node_list:
4372
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4373
                                 errors.ECODE_NOENT)
4374

    
4375
    self.changed_primary_ip = False
4376

    
4377
    for existing_node_name in node_list:
4378
      existing_node = cfg.GetNodeInfo(existing_node_name)
4379

    
4380
      if self.op.readd and node == existing_node_name:
4381
        if existing_node.secondary_ip != secondary_ip:
4382
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4383
                                     " address configuration as before",
4384
                                     errors.ECODE_INVAL)
4385
        if existing_node.primary_ip != primary_ip:
4386
          self.changed_primary_ip = True
4387

    
4388
        continue
4389

    
4390
      if (existing_node.primary_ip == primary_ip or
4391
          existing_node.secondary_ip == primary_ip or
4392
          existing_node.primary_ip == secondary_ip or
4393
          existing_node.secondary_ip == secondary_ip):
4394
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4395
                                   " existing node %s" % existing_node.name,
4396
                                   errors.ECODE_NOTUNIQUE)
4397

    
4398
    # After this 'if' block, None is no longer a valid value for the
4399
    # _capable op attributes
4400
    if self.op.readd:
4401
      old_node = self.cfg.GetNodeInfo(node)
4402
      assert old_node is not None, "Can't retrieve locked node %s" % node
4403
      for attr in self._NFLAGS:
4404
        if getattr(self.op, attr) is None:
4405
          setattr(self.op, attr, getattr(old_node, attr))
4406
    else:
4407
      for attr in self._NFLAGS:
4408
        if getattr(self.op, attr) is None:
4409
          setattr(self.op, attr, True)
4410

    
4411
    if self.op.readd and not self.op.vm_capable:
4412
      pri, sec = cfg.GetNodeInstances(node)
4413
      if pri or sec:
4414
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4415
                                   " flag set to false, but it already holds"
4416
                                   " instances" % node,
4417
                                   errors.ECODE_STATE)
4418

    
4419
    # check that the type of the node (single versus dual homed) is the
4420
    # same as for the master
4421
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4422
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4423
    newbie_singlehomed = secondary_ip == primary_ip
4424
    if master_singlehomed != newbie_singlehomed:
4425
      if master_singlehomed:
4426
        raise errors.OpPrereqError("The master has no secondary ip but the"
4427
                                   " new node has one",
4428
                                   errors.ECODE_INVAL)
4429
      else:
4430
        raise errors.OpPrereqError("The master has a secondary ip but the"
4431
                                   " new node doesn't have one",
4432
                                   errors.ECODE_INVAL)
4433

    
4434
    # checks reachability
4435
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4436
      raise errors.OpPrereqError("Node not reachable by ping",
4437
                                 errors.ECODE_ENVIRON)
4438

    
4439
    if not newbie_singlehomed:
4440
      # check reachability from my secondary ip to newbie's secondary ip
4441
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4442
                           source=myself.secondary_ip):
4443
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4444
                                   " based ping to node daemon port",
4445
                                   errors.ECODE_ENVIRON)
4446

    
4447
    if self.op.readd:
4448
      exceptions = [node]
4449
    else:
4450
      exceptions = []
4451

    
4452
    if self.op.master_capable:
4453
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4454
    else:
4455
      self.master_candidate = False
4456

    
4457
    if self.op.readd:
4458
      self.new_node = old_node
4459
    else:
4460
      node_group = cfg.LookupNodeGroup(self.op.group)
4461
      self.new_node = objects.Node(name=node,
4462
                                   primary_ip=primary_ip,
4463
                                   secondary_ip=secondary_ip,
4464
                                   master_candidate=self.master_candidate,
4465
                                   offline=False, drained=False,
4466
                                   group=node_group)
4467

    
4468
    if self.op.ndparams:
4469
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4470

    
4471
  def Exec(self, feedback_fn):
4472
    """Adds the new node to the cluster.
4473

4474
    """
4475
    new_node = self.new_node
4476
    node = new_node.name
4477

    
4478
    # We adding a new node so we assume it's powered
4479
    new_node.powered = True
4480

    
4481
    # for re-adds, reset the offline/drained/master-candidate flags;
4482
    # we need to reset here, otherwise offline would prevent RPC calls
4483
    # later in the procedure; this also means that if the re-add
4484
    # fails, we are left with a non-offlined, broken node
4485
    if self.op.readd:
4486
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4487
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4488
      # if we demote the node, we do cleanup later in the procedure
4489
      new_node.master_candidate = self.master_candidate
4490
      if self.changed_primary_ip:
4491
        new_node.primary_ip = self.op.primary_ip
4492

    
4493
    # copy the master/vm_capable flags
4494
    for attr in self._NFLAGS:
4495
      setattr(new_node, attr, getattr(self.op, attr))
4496

    
4497
    # notify the user about any possible mc promotion
4498
    if new_node.master_candidate:
4499
      self.LogInfo("Node will be a master candidate")
4500

    
4501
    if self.op.ndparams:
4502
      new_node.ndparams = self.op.ndparams
4503
    else:
4504
      new_node.ndparams = {}
4505

    
4506
    # check connectivity
4507
    result = self.rpc.call_version([node])[node]
4508
    result.Raise("Can't get version information from node %s" % node)
4509
    if constants.PROTOCOL_VERSION == result.payload:
4510
      logging.info("Communication to node %s fine, sw version %s match",
4511
                   node, result.payload)
4512
    else:
4513
      raise errors.OpExecError("Version mismatch master version %s,"
4514
                               " node version %s" %
4515
                               (constants.PROTOCOL_VERSION, result.payload))
4516

    
4517
    # Add node to our /etc/hosts, and add key to known_hosts
4518
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4519
      master_node = self.cfg.GetMasterNode()
4520
      result = self.rpc.call_etc_hosts_modify(master_node,
4521
                                              constants.ETC_HOSTS_ADD,
4522
                                              self.hostname.name,
4523
                                              self.hostname.ip)
4524
      result.Raise("Can't update hosts file with new host data")
4525

    
4526
    if new_node.secondary_ip != new_node.primary_ip:
4527
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4528
                               False)
4529

    
4530
    node_verify_list = [self.cfg.GetMasterNode()]
4531
    node_verify_param = {
4532
      constants.NV_NODELIST: [node],
4533
      # TODO: do a node-net-test as well?
4534
    }
4535

    
4536
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4537
                                       self.cfg.GetClusterName())
4538
    for verifier in node_verify_list:
4539
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4540
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4541
      if nl_payload:
4542
        for failed in nl_payload:
4543
          feedback_fn("ssh/hostname verification failed"
4544
                      " (checking from %s): %s" %
4545
                      (verifier, nl_payload[failed]))
4546
        raise errors.OpExecError("ssh/hostname verification failed")
4547

    
4548
    if self.op.readd:
4549
      _RedistributeAncillaryFiles(self)
4550
      self.context.ReaddNode(new_node)
4551
      # make sure we redistribute the config
4552
      self.cfg.Update(new_node, feedback_fn)
4553
      # and make sure the new node will not have old files around
4554
      if not new_node.master_candidate:
4555
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4556
        msg = result.fail_msg
4557
        if msg:
4558
          self.LogWarning("Node failed to demote itself from master"
4559
                          " candidate status: %s" % msg)
4560
    else:
4561
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4562
                                  additional_vm=self.op.vm_capable)
4563
      self.context.AddNode(new_node, self.proc.GetECId())
4564

    
4565

    
4566
class LUNodeSetParams(LogicalUnit):
4567
  """Modifies the parameters of a node.
4568

4569
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4570
      to the node role (as _ROLE_*)
4571
  @cvar _R2F: a dictionary from node role to tuples of flags
4572
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4573

4574
  """
4575
  HPATH = "node-modify"
4576
  HTYPE = constants.HTYPE_NODE
4577
  REQ_BGL = False
4578
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4579
  _F2R = {
4580
    (True, False, False): _ROLE_CANDIDATE,
4581
    (False, True, False): _ROLE_DRAINED,
4582
    (False, False, True): _ROLE_OFFLINE,
4583
    (False, False, False): _ROLE_REGULAR,
4584
    }
4585
  _R2F = dict((v, k) for k, v in _F2R.items())
4586
  _FLAGS = ["master_candidate", "drained", "offline"]
4587

    
4588
  def CheckArguments(self):
4589
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4590
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4591
                self.op.master_capable, self.op.vm_capable,
4592
                self.op.secondary_ip, self.op.ndparams]
4593
    if all_mods.count(None) == len(all_mods):
4594
      raise errors.OpPrereqError("Please pass at least one modification",
4595
                                 errors.ECODE_INVAL)
4596
    if all_mods.count(True) > 1:
4597
      raise errors.OpPrereqError("Can't set the node into more than one"
4598
                                 " state at the same time",
4599
                                 errors.ECODE_INVAL)
4600

    
4601
    # Boolean value that tells us whether we might be demoting from MC
4602
    self.might_demote = (self.op.master_candidate == False or
4603
                         self.op.offline == True or
4604
                         self.op.drained == True or
4605
                         self.op.master_capable == False)
4606

    
4607
    if self.op.secondary_ip:
4608
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4609
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4610
                                   " address" % self.op.secondary_ip,
4611
                                   errors.ECODE_INVAL)
4612

    
4613
    self.lock_all = self.op.auto_promote and self.might_demote
4614
    self.lock_instances = self.op.secondary_ip is not None
4615

    
4616
  def ExpandNames(self):
4617
    if self.lock_all:
4618
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4619
    else:
4620
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4621

    
4622
    if self.lock_instances:
4623
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4624

    
4625
  def DeclareLocks(self, level):
4626
    # If we have locked all instances, before waiting to lock nodes, release
4627
    # all the ones living on nodes unrelated to the current operation.
4628
    if level == locking.LEVEL_NODE and self.lock_instances:
4629
      self.affected_instances = []
4630
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4631
        instances_keep = []
4632

    
4633
        # Build list of instances to release
4634
        for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
4635
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4636
          if (instance.disk_template in constants.DTS_INT_MIRROR and
4637
              self.op.node_name in instance.all_nodes):
4638
            instances_keep.append(instance_name)
4639
            self.affected_instances.append(instance)
4640

    
4641
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
4642

    
4643
        assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
4644
                set(instances_keep))
4645

    
4646
  def BuildHooksEnv(self):
4647
    """Build hooks env.
4648

4649
    This runs on the master node.
4650

4651
    """
4652
    return {
4653
      "OP_TARGET": self.op.node_name,
4654
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4655
      "OFFLINE": str(self.op.offline),
4656
      "DRAINED": str(self.op.drained),
4657
      "MASTER_CAPABLE": str(self.op.master_capable),
4658
      "VM_CAPABLE": str(self.op.vm_capable),
4659
      }
4660

    
4661
  def BuildHooksNodes(self):
4662
    """Build hooks nodes.
4663

4664
    """
4665
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
4666
    return (nl, nl)
4667

    
4668
  def CheckPrereq(self):
4669
    """Check prerequisites.
4670

4671
    This only checks the instance list against the existing names.
4672

4673
    """
4674
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4675

    
4676
    if (self.op.master_candidate is not None or
4677
        self.op.drained is not None or
4678
        self.op.offline is not None):
4679
      # we can't change the master's node flags
4680
      if self.op.node_name == self.cfg.GetMasterNode():
4681
        raise errors.OpPrereqError("The master role can be changed"
4682
                                   " only via master-failover",
4683
                                   errors.ECODE_INVAL)
4684

    
4685
    if self.op.master_candidate and not node.master_capable:
4686
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4687
                                 " it a master candidate" % node.name,
4688
                                 errors.ECODE_STATE)
4689

    
4690
    if self.op.vm_capable == False:
4691
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4692
      if ipri or isec:
4693
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4694
                                   " the vm_capable flag" % node.name,
4695
                                   errors.ECODE_STATE)
4696

    
4697
    if node.master_candidate and self.might_demote and not self.lock_all:
4698
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
4699
      # check if after removing the current node, we're missing master
4700
      # candidates
4701
      (mc_remaining, mc_should, _) = \
4702
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4703
      if mc_remaining < mc_should:
4704
        raise errors.OpPrereqError("Not enough master candidates, please"
4705
                                   " pass auto promote option to allow"
4706
                                   " promotion", errors.ECODE_STATE)
4707

    
4708
    self.old_flags = old_flags = (node.master_candidate,
4709
                                  node.drained, node.offline)
4710
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4711
    self.old_role = old_role = self._F2R[old_flags]
4712

    
4713
    # Check for ineffective changes
4714
    for attr in self._FLAGS:
4715
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4716
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4717
        setattr(self.op, attr, None)
4718

    
4719
    # Past this point, any flag change to False means a transition
4720
    # away from the respective state, as only real changes are kept
4721

    
4722
    # TODO: We might query the real power state if it supports OOB
4723
    if _SupportsOob(self.cfg, node):
4724
      if self.op.offline is False and not (node.powered or
4725
                                           self.op.powered == True):
4726
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
4727
                                    " offline status can be reset") %
4728
                                   self.op.node_name)
4729
    elif self.op.powered is not None:
4730
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
4731
                                  " as it does not support out-of-band"
4732
                                  " handling") % self.op.node_name)
4733

    
4734
    # If we're being deofflined/drained, we'll MC ourself if needed
4735
    if (self.op.drained == False or self.op.offline == False or
4736
        (self.op.master_capable and not node.master_capable)):
4737
      if _DecideSelfPromotion(self):
4738
        self.op.master_candidate = True
4739
        self.LogInfo("Auto-promoting node to master candidate")
4740

    
4741
    # If we're no longer master capable, we'll demote ourselves from MC
4742
    if self.op.master_capable == False and node.master_candidate:
4743
      self.LogInfo("Demoting from master candidate")
4744
      self.op.master_candidate = False
4745

    
4746
    # Compute new role
4747
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4748
    if self.op.master_candidate:
4749
      new_role = self._ROLE_CANDIDATE
4750
    elif self.op.drained:
4751
      new_role = self._ROLE_DRAINED
4752
    elif self.op.offline:
4753
      new_role = self._ROLE_OFFLINE
4754
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4755
      # False is still in new flags, which means we're un-setting (the
4756
      # only) True flag
4757
      new_role = self._ROLE_REGULAR
4758
    else: # no new flags, nothing, keep old role
4759
      new_role = old_role
4760

    
4761
    self.new_role = new_role
4762

    
4763
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4764
      # Trying to transition out of offline status
4765
      result = self.rpc.call_version([node.name])[node.name]
4766
      if result.fail_msg:
4767
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4768
                                   " to report its version: %s" %
4769
                                   (node.name, result.fail_msg),
4770
                                   errors.ECODE_STATE)
4771
      else:
4772
        self.LogWarning("Transitioning node from offline to online state"
4773
                        " without using re-add. Please make sure the node"
4774
                        " is healthy!")
4775

    
4776
    if self.op.secondary_ip:
4777
      # Ok even without locking, because this can't be changed by any LU
4778
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4779
      master_singlehomed = master.secondary_ip == master.primary_ip
4780
      if master_singlehomed and self.op.secondary_ip:
4781
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4782
                                   " homed cluster", errors.ECODE_INVAL)
4783

    
4784
      if node.offline:
4785
        if self.affected_instances:
4786
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4787
                                     " node has instances (%s) configured"
4788
                                     " to use it" % self.affected_instances)
4789
      else:
4790
        # On online nodes, check that no instances are running, and that
4791
        # the node has the new ip and we can reach it.
4792
        for instance in self.affected_instances:
4793
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4794

    
4795
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4796
        if master.name != node.name:
4797
          # check reachability from master secondary ip to new secondary ip
4798
          if not netutils.TcpPing(self.op.secondary_ip,
4799
                                  constants.DEFAULT_NODED_PORT,
4800
                                  source=master.secondary_ip):
4801
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4802
                                       " based ping to node daemon port",
4803
                                       errors.ECODE_ENVIRON)
4804

    
4805
    if self.op.ndparams:
4806
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4807
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4808
      self.new_ndparams = new_ndparams
4809

    
4810
  def Exec(self, feedback_fn):
4811
    """Modifies a node.
4812

4813
    """
4814
    node = self.node
4815
    old_role = self.old_role
4816
    new_role = self.new_role
4817

    
4818
    result = []
4819

    
4820
    if self.op.ndparams:
4821
      node.ndparams = self.new_ndparams
4822

    
4823
    if self.op.powered is not None:
4824
      node.powered = self.op.powered
4825

    
4826
    for attr in ["master_capable", "vm_capable"]:
4827
      val = getattr(self.op, attr)
4828
      if val is not None:
4829
        setattr(node, attr, val)
4830
        result.append((attr, str(val)))
4831

    
4832
    if new_role != old_role:
4833
      # Tell the node to demote itself, if no longer MC and not offline
4834
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4835
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4836
        if msg:
4837
          self.LogWarning("Node failed to demote itself: %s", msg)
4838

    
4839
      new_flags = self._R2F[new_role]
4840
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4841
        if of != nf:
4842
          result.append((desc, str(nf)))
4843
      (node.master_candidate, node.drained, node.offline) = new_flags
4844

    
4845
      # we locked all nodes, we adjust the CP before updating this node
4846
      if self.lock_all:
4847
        _AdjustCandidatePool(self, [node.name])
4848

    
4849
    if self.op.secondary_ip:
4850
      node.secondary_ip = self.op.secondary_ip
4851
      result.append(("secondary_ip", self.op.secondary_ip))
4852

    
4853
    # this will trigger configuration file update, if needed
4854
    self.cfg.Update(node, feedback_fn)
4855

    
4856
    # this will trigger job queue propagation or cleanup if the mc
4857
    # flag changed
4858
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4859
      self.context.ReaddNode(node)
4860

    
4861
    return result
4862

    
4863

    
4864
class LUNodePowercycle(NoHooksLU):
4865
  """Powercycles a node.
4866

4867
  """
4868
  REQ_BGL = False
4869

    
4870
  def CheckArguments(self):
4871
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4872
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4873
      raise errors.OpPrereqError("The node is the master and the force"
4874
                                 " parameter was not set",
4875
                                 errors.ECODE_INVAL)
4876

    
4877
  def ExpandNames(self):
4878
    """Locking for PowercycleNode.
4879

4880
    This is a last-resort option and shouldn't block on other
4881
    jobs. Therefore, we grab no locks.
4882

4883
    """
4884
    self.needed_locks = {}
4885

    
4886
  def Exec(self, feedback_fn):
4887
    """Reboots a node.
4888

4889
    """
4890
    result = self.rpc.call_node_powercycle(self.op.node_name,
4891
                                           self.cfg.GetHypervisorType())
4892
    result.Raise("Failed to schedule the reboot")
4893
    return result.payload
4894

    
4895

    
4896
class LUClusterQuery(NoHooksLU):
4897
  """Query cluster configuration.
4898

4899
  """
4900
  REQ_BGL = False
4901

    
4902
  def ExpandNames(self):
4903
    self.needed_locks = {}
4904

    
4905
  def Exec(self, feedback_fn):
4906
    """Return cluster config.
4907

4908
    """
4909
    cluster = self.cfg.GetClusterInfo()
4910
    os_hvp = {}
4911

    
4912
    # Filter just for enabled hypervisors
4913
    for os_name, hv_dict in cluster.os_hvp.items():
4914
      os_hvp[os_name] = {}
4915
      for hv_name, hv_params in hv_dict.items():
4916
        if hv_name in cluster.enabled_hypervisors:
4917
          os_hvp[os_name][hv_name] = hv_params
4918

    
4919
    # Convert ip_family to ip_version
4920
    primary_ip_version = constants.IP4_VERSION
4921
    if cluster.primary_ip_family == netutils.IP6Address.family:
4922
      primary_ip_version = constants.IP6_VERSION
4923

    
4924
    result = {
4925
      "software_version": constants.RELEASE_VERSION,
4926
      "protocol_version": constants.PROTOCOL_VERSION,
4927
      "config_version": constants.CONFIG_VERSION,
4928
      "os_api_version": max(constants.OS_API_VERSIONS),
4929
      "export_version": constants.EXPORT_VERSION,
4930
      "architecture": (platform.architecture()[0], platform.machine()),
4931
      "name": cluster.cluster_name,
4932
      "master": cluster.master_node,
4933
      "default_hypervisor": cluster.enabled_hypervisors[0],
4934
      "enabled_hypervisors": cluster.enabled_hypervisors,
4935
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4936
                        for hypervisor_name in cluster.enabled_hypervisors]),
4937
      "os_hvp": os_hvp,
4938
      "beparams": cluster.beparams,
4939
      "osparams": cluster.osparams,
4940
      "nicparams": cluster.nicparams,
4941
      "ndparams": cluster.ndparams,
4942
      "candidate_pool_size": cluster.candidate_pool_size,
4943
      "master_netdev": cluster.master_netdev,
4944
      "volume_group_name": cluster.volume_group_name,
4945
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4946
      "file_storage_dir": cluster.file_storage_dir,
4947
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
4948
      "maintain_node_health": cluster.maintain_node_health,
4949
      "ctime": cluster.ctime,
4950
      "mtime": cluster.mtime,
4951
      "uuid": cluster.uuid,
4952
      "tags": list(cluster.GetTags()),
4953
      "uid_pool": cluster.uid_pool,
4954
      "default_iallocator": cluster.default_iallocator,
4955
      "reserved_lvs": cluster.reserved_lvs,
4956
      "primary_ip_version": primary_ip_version,
4957
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4958
      "hidden_os": cluster.hidden_os,
4959
      "blacklisted_os": cluster.blacklisted_os,
4960
      }
4961

    
4962
    return result
4963

    
4964

    
4965
class LUClusterConfigQuery(NoHooksLU):
4966
  """Return configuration values.
4967

4968
  """
4969
  REQ_BGL = False
4970
  _FIELDS_DYNAMIC = utils.FieldSet()
4971
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4972
                                  "watcher_pause", "volume_group_name")
4973

    
4974
  def CheckArguments(self):
4975
    _CheckOutputFields(static=self._FIELDS_STATIC,
4976
                       dynamic=self._FIELDS_DYNAMIC,
4977
                       selected=self.op.output_fields)
4978

    
4979
  def ExpandNames(self):
4980
    self.needed_locks = {}
4981

    
4982
  def Exec(self, feedback_fn):
4983
    """Dump a representation of the cluster config to the standard output.
4984

4985
    """
4986
    values = []
4987
    for field in self.op.output_fields:
4988
      if field == "cluster_name":
4989
        entry = self.cfg.GetClusterName()
4990
      elif field == "master_node":
4991
        entry = self.cfg.GetMasterNode()
4992
      elif field == "drain_flag":
4993
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4994
      elif field == "watcher_pause":
4995
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4996
      elif field == "volume_group_name":
4997
        entry = self.cfg.GetVGName()
4998
      else:
4999
        raise errors.ParameterError(field)
5000
      values.append(entry)
5001
    return values
5002

    
5003

    
5004
class LUInstanceActivateDisks(NoHooksLU):
5005
  """Bring up an instance's disks.
5006

5007
  """
5008
  REQ_BGL = False
5009

    
5010
  def ExpandNames(self):
5011
    self._ExpandAndLockInstance()
5012
    self.needed_locks[locking.LEVEL_NODE] = []
5013
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5014

    
5015
  def DeclareLocks(self, level):
5016
    if level == locking.LEVEL_NODE:
5017
      self._LockInstancesNodes()
5018

    
5019
  def CheckPrereq(self):
5020
    """Check prerequisites.
5021

5022
    This checks that the instance is in the cluster.
5023

5024
    """
5025
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5026
    assert self.instance is not None, \
5027
      "Cannot retrieve locked instance %s" % self.op.instance_name
5028
    _CheckNodeOnline(self, self.instance.primary_node)
5029

    
5030
  def Exec(self, feedback_fn):
5031
    """Activate the disks.
5032

5033
    """
5034
    disks_ok, disks_info = \
5035
              _AssembleInstanceDisks(self, self.instance,
5036
                                     ignore_size=self.op.ignore_size)
5037
    if not disks_ok:
5038
      raise errors.OpExecError("Cannot activate block devices")
5039

    
5040
    return disks_info
5041

    
5042

    
5043
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5044
                           ignore_size=False):
5045
  """Prepare the block devices for an instance.
5046

5047
  This sets up the block devices on all nodes.
5048

5049
  @type lu: L{LogicalUnit}
5050
  @param lu: the logical unit on whose behalf we execute
5051
  @type instance: L{objects.Instance}
5052
  @param instance: the instance for whose disks we assemble
5053
  @type disks: list of L{objects.Disk} or None
5054
  @param disks: which disks to assemble (or all, if None)
5055
  @type ignore_secondaries: boolean
5056
  @param ignore_secondaries: if true, errors on secondary nodes
5057
      won't result in an error return from the function
5058
  @type ignore_size: boolean
5059
  @param ignore_size: if true, the current known size of the disk
5060
      will not be used during the disk activation, useful for cases
5061
      when the size is wrong
5062
  @return: False if the operation failed, otherwise a list of
5063
      (host, instance_visible_name, node_visible_name)
5064
      with the mapping from node devices to instance devices
5065

5066
  """
5067
  device_info = []
5068
  disks_ok = True
5069
  iname = instance.name
5070
  disks = _ExpandCheckDisks(instance, disks)
5071

    
5072
  # With the two passes mechanism we try to reduce the window of
5073
  # opportunity for the race condition of switching DRBD to primary
5074
  # before handshaking occured, but we do not eliminate it
5075

    
5076
  # The proper fix would be to wait (with some limits) until the
5077
  # connection has been made and drbd transitions from WFConnection
5078
  # into any other network-connected state (Connected, SyncTarget,
5079
  # SyncSource, etc.)
5080

    
5081
  # 1st pass, assemble on all nodes in secondary mode
5082
  for idx, inst_disk in enumerate(disks):
5083
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5084
      if ignore_size:
5085
        node_disk = node_disk.Copy()
5086
        node_disk.UnsetSize()
5087
      lu.cfg.SetDiskID(node_disk, node)
5088
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5089
      msg = result.fail_msg
5090
      if msg:
5091
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5092
                           " (is_primary=False, pass=1): %s",
5093
                           inst_disk.iv_name, node, msg)
5094
        if not ignore_secondaries:
5095
          disks_ok = False
5096

    
5097
  # FIXME: race condition on drbd migration to primary
5098

    
5099
  # 2nd pass, do only the primary node
5100
  for idx, inst_disk in enumerate(disks):
5101
    dev_path = None
5102

    
5103
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5104
      if node != instance.primary_node:
5105
        continue
5106
      if ignore_size:
5107
        node_disk = node_disk.Copy()
5108
        node_disk.UnsetSize()
5109
      lu.cfg.SetDiskID(node_disk, node)
5110
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5111
      msg = result.fail_msg
5112
      if msg:
5113
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5114
                           " (is_primary=True, pass=2): %s",
5115
                           inst_disk.iv_name, node, msg)
5116
        disks_ok = False
5117
      else:
5118
        dev_path = result.payload
5119

    
5120
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5121

    
5122
  # leave the disks configured for the primary node
5123
  # this is a workaround that would be fixed better by
5124
  # improving the logical/physical id handling
5125
  for disk in disks:
5126
    lu.cfg.SetDiskID(disk, instance.primary_node)
5127

    
5128
  return disks_ok, device_info
5129

    
5130

    
5131
def _StartInstanceDisks(lu, instance, force):
5132
  """Start the disks of an instance.
5133

5134
  """
5135
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5136
                                           ignore_secondaries=force)
5137
  if not disks_ok:
5138
    _ShutdownInstanceDisks(lu, instance)
5139
    if force is not None and not force:
5140
      lu.proc.LogWarning("", hint="If the message above refers to a"
5141
                         " secondary node,"
5142
                         " you can retry the operation using '--force'.")
5143
    raise errors.OpExecError("Disk consistency error")
5144

    
5145

    
5146
class LUInstanceDeactivateDisks(NoHooksLU):
5147
  """Shutdown an instance's disks.
5148

5149
  """
5150
  REQ_BGL = False
5151

    
5152
  def ExpandNames(self):
5153
    self._ExpandAndLockInstance()
5154
    self.needed_locks[locking.LEVEL_NODE] = []
5155
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5156

    
5157
  def DeclareLocks(self, level):
5158
    if level == locking.LEVEL_NODE:
5159
      self._LockInstancesNodes()
5160

    
5161
  def CheckPrereq(self):
5162
    """Check prerequisites.
5163

5164
    This checks that the instance is in the cluster.
5165

5166
    """
5167
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5168
    assert self.instance is not None, \
5169
      "Cannot retrieve locked instance %s" % self.op.instance_name
5170

    
5171
  def Exec(self, feedback_fn):
5172
    """Deactivate the disks
5173

5174
    """
5175
    instance = self.instance
5176
    if self.op.force:
5177
      _ShutdownInstanceDisks(self, instance)
5178
    else:
5179
      _SafeShutdownInstanceDisks(self, instance)
5180

    
5181

    
5182
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5183
  """Shutdown block devices of an instance.
5184

5185
  This function checks if an instance is running, before calling
5186
  _ShutdownInstanceDisks.
5187

5188
  """
5189
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5190
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5191

    
5192

    
5193
def _ExpandCheckDisks(instance, disks):
5194
  """Return the instance disks selected by the disks list
5195

5196
  @type disks: list of L{objects.Disk} or None
5197
  @param disks: selected disks
5198
  @rtype: list of L{objects.Disk}
5199
  @return: selected instance disks to act on
5200

5201
  """
5202
  if disks is None:
5203
    return instance.disks
5204
  else:
5205
    if not set(disks).issubset(instance.disks):
5206
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5207
                                   " target instance")
5208
    return disks
5209

    
5210

    
5211
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5212
  """Shutdown block devices of an instance.
5213

5214
  This does the shutdown on all nodes of the instance.
5215

5216
  If the ignore_primary is false, errors on the primary node are
5217
  ignored.
5218

5219
  """
5220
  all_result = True
5221
  disks = _ExpandCheckDisks(instance, disks)
5222

    
5223
  for disk in disks:
5224
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5225
      lu.cfg.SetDiskID(top_disk, node)
5226
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5227
      msg = result.fail_msg
5228
      if msg:
5229
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5230
                      disk.iv_name, node, msg)
5231
        if ((node == instance.primary_node and not ignore_primary) or
5232
            (node != instance.primary_node and not result.offline)):
5233
          all_result = False
5234
  return all_result
5235

    
5236

    
5237
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5238
  """Checks if a node has enough free memory.
5239

5240
  This function check if a given node has the needed amount of free
5241
  memory. In case the node has less memory or we cannot get the
5242
  information from the node, this function raise an OpPrereqError
5243
  exception.
5244

5245
  @type lu: C{LogicalUnit}
5246
  @param lu: a logical unit from which we get configuration data
5247
  @type node: C{str}
5248
  @param node: the node to check
5249
  @type reason: C{str}
5250
  @param reason: string to use in the error message
5251
  @type requested: C{int}
5252
  @param requested: the amount of memory in MiB to check for
5253
  @type hypervisor_name: C{str}
5254
  @param hypervisor_name: the hypervisor to ask for memory stats
5255
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5256
      we cannot check the node
5257

5258
  """
5259
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5260
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5261
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5262
  free_mem = nodeinfo[node].payload.get('memory_free', None)
5263
  if not isinstance(free_mem, int):
5264
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5265
                               " was '%s'" % (node, free_mem),
5266
                               errors.ECODE_ENVIRON)
5267
  if requested > free_mem:
5268
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5269
                               " needed %s MiB, available %s MiB" %
5270
                               (node, reason, requested, free_mem),
5271
                               errors.ECODE_NORES)
5272

    
5273

    
5274
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5275
  """Checks if nodes have enough free disk space in the all VGs.
5276

5277
  This function check if all given nodes have the needed amount of
5278
  free disk. In case any node has less disk or we cannot get the
5279
  information from the node, this function raise an OpPrereqError
5280
  exception.
5281

5282
  @type lu: C{LogicalUnit}
5283
  @param lu: a logical unit from which we get configuration data
5284
  @type nodenames: C{list}
5285
  @param nodenames: the list of node names to check
5286
  @type req_sizes: C{dict}
5287
  @param req_sizes: the hash of vg and corresponding amount of disk in
5288
      MiB to check for
5289
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5290
      or we cannot check the node
5291

5292
  """
5293
  for vg, req_size in req_sizes.items():
5294
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5295

    
5296

    
5297
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5298
  """Checks if nodes have enough free disk space in the specified VG.
5299

5300
  This function check if all given nodes have the needed amount of
5301
  free disk. In case any node has less disk or we cannot get the
5302
  information from the node, this function raise an OpPrereqError
5303
  exception.
5304

5305
  @type lu: C{LogicalUnit}
5306
  @param lu: a logical unit from which we get configuration data
5307
  @type nodenames: C{list}
5308
  @param nodenames: the list of node names to check
5309
  @type vg: C{str}
5310
  @param vg: the volume group to check
5311
  @type requested: C{int}
5312
  @param requested: the amount of disk in MiB to check for
5313
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5314
      or we cannot check the node
5315

5316
  """
5317
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5318
  for node in nodenames:
5319
    info = nodeinfo[node]
5320
    info.Raise("Cannot get current information from node %s" % node,
5321
               prereq=True, ecode=errors.ECODE_ENVIRON)
5322
    vg_free = info.payload.get("vg_free", None)
5323
    if not isinstance(vg_free, int):
5324
      raise errors.OpPrereqError("Can't compute free disk space on node"
5325
                                 " %s for vg %s, result was '%s'" %
5326
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5327
    if requested > vg_free:
5328
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5329
                                 " vg %s: required %d MiB, available %d MiB" %
5330
                                 (node, vg, requested, vg_free),
5331
                                 errors.ECODE_NORES)
5332

    
5333

    
5334
class LUInstanceStartup(LogicalUnit):
5335
  """Starts an instance.
5336

5337
  """
5338
  HPATH = "instance-start"
5339
  HTYPE = constants.HTYPE_INSTANCE
5340
  REQ_BGL = False
5341

    
5342
  def CheckArguments(self):
5343
    # extra beparams
5344
    if self.op.beparams:
5345
      # fill the beparams dict
5346
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5347

    
5348
  def ExpandNames(self):
5349
    self._ExpandAndLockInstance()
5350

    
5351
  def BuildHooksEnv(self):
5352
    """Build hooks env.
5353

5354
    This runs on master, primary and secondary nodes of the instance.
5355

5356
    """
5357
    env = {
5358
      "FORCE": self.op.force,
5359
      }
5360

    
5361
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5362

    
5363
    return env
5364

    
5365
  def BuildHooksNodes(self):
5366
    """Build hooks nodes.
5367

5368
    """
5369
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5370
    return (nl, nl)
5371

    
5372
  def CheckPrereq(self):
5373
    """Check prerequisites.
5374

5375
    This checks that the instance is in the cluster.
5376

5377
    """
5378
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5379
    assert self.instance is not None, \
5380
      "Cannot retrieve locked instance %s" % self.op.instance_name
5381

    
5382
    # extra hvparams
5383
    if self.op.hvparams:
5384
      # check hypervisor parameter syntax (locally)
5385
      cluster = self.cfg.GetClusterInfo()
5386
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5387
      filled_hvp = cluster.FillHV(instance)
5388
      filled_hvp.update(self.op.hvparams)
5389
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5390
      hv_type.CheckParameterSyntax(filled_hvp)
5391
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5392

    
5393
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5394

    
5395
    if self.primary_offline and self.op.ignore_offline_nodes:
5396
      self.proc.LogWarning("Ignoring offline primary node")
5397

    
5398
      if self.op.hvparams or self.op.beparams:
5399
        self.proc.LogWarning("Overridden parameters are ignored")
5400
    else:
5401
      _CheckNodeOnline(self, instance.primary_node)
5402

    
5403
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5404

    
5405
      # check bridges existence
5406
      _CheckInstanceBridgesExist(self, instance)
5407

    
5408
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5409
                                                instance.name,
5410
                                                instance.hypervisor)
5411
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5412
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5413
      if not remote_info.payload: # not running already
5414
        _CheckNodeFreeMemory(self, instance.primary_node,
5415
                             "starting instance %s" % instance.name,
5416
                             bep[constants.BE_MEMORY], instance.hypervisor)
5417

    
5418
  def Exec(self, feedback_fn):
5419
    """Start the instance.
5420

5421
    """
5422
    instance = self.instance
5423
    force = self.op.force
5424

    
5425
    self.cfg.MarkInstanceUp(instance.name)
5426

    
5427
    if self.primary_offline:
5428
      assert self.op.ignore_offline_nodes
5429
      self.proc.LogInfo("Primary node offline, marked instance as started")
5430
    else:
5431
      node_current = instance.primary_node
5432

    
5433
      _StartInstanceDisks(self, instance, force)
5434

    
5435
      result = self.rpc.call_instance_start(node_current, instance,
5436
                                            self.op.hvparams, self.op.beparams)
5437
      msg = result.fail_msg
5438
      if msg:
5439
        _ShutdownInstanceDisks(self, instance)
5440
        raise errors.OpExecError("Could not start instance: %s" % msg)
5441

    
5442

    
5443
class LUInstanceReboot(LogicalUnit):
5444
  """Reboot an instance.
5445

5446
  """
5447
  HPATH = "instance-reboot"
5448
  HTYPE = constants.HTYPE_INSTANCE
5449
  REQ_BGL = False
5450

    
5451
  def ExpandNames(self):
5452
    self._ExpandAndLockInstance()
5453

    
5454
  def BuildHooksEnv(self):
5455
    """Build hooks env.
5456

5457
    This runs on master, primary and secondary nodes of the instance.
5458

5459
    """
5460
    env = {
5461
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5462
      "REBOOT_TYPE": self.op.reboot_type,
5463
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5464
      }
5465

    
5466
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5467

    
5468
    return env
5469

    
5470
  def BuildHooksNodes(self):
5471
    """Build hooks nodes.
5472

5473
    """
5474
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5475
    return (nl, nl)
5476

    
5477
  def CheckPrereq(self):
5478
    """Check prerequisites.
5479

5480
    This checks that the instance is in the cluster.
5481

5482
    """
5483
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5484
    assert self.instance is not None, \
5485
      "Cannot retrieve locked instance %s" % self.op.instance_name
5486

    
5487
    _CheckNodeOnline(self, instance.primary_node)
5488

    
5489
    # check bridges existence
5490
    _CheckInstanceBridgesExist(self, instance)
5491

    
5492
  def Exec(self, feedback_fn):
5493
    """Reboot the instance.
5494

5495
    """
5496
    instance = self.instance
5497
    ignore_secondaries = self.op.ignore_secondaries
5498
    reboot_type = self.op.reboot_type
5499

    
5500
    remote_info = self.rpc.call_instance_info(instance.primary_node,
5501
                                              instance.name,
5502
                                              instance.hypervisor)
5503
    remote_info.Raise("Error checking node %s" % instance.primary_node)
5504
    instance_running = bool(remote_info.payload)
5505

    
5506
    node_current = instance.primary_node
5507

    
5508
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5509
                                            constants.INSTANCE_REBOOT_HARD]:
5510
      for disk in instance.disks:
5511
        self.cfg.SetDiskID(disk, node_current)
5512
      result = self.rpc.call_instance_reboot(node_current, instance,
5513
                                             reboot_type,
5514
                                             self.op.shutdown_timeout)
5515
      result.Raise("Could not reboot instance")
5516
    else:
5517
      if instance_running:
5518
        result = self.rpc.call_instance_shutdown(node_current, instance,
5519
                                                 self.op.shutdown_timeout)
5520
        result.Raise("Could not shutdown instance for full reboot")
5521
        _ShutdownInstanceDisks(self, instance)
5522
      else:
5523
        self.LogInfo("Instance %s was already stopped, starting now",
5524
                     instance.name)
5525
      _StartInstanceDisks(self, instance, ignore_secondaries)
5526
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5527
      msg = result.fail_msg
5528
      if msg:
5529
        _ShutdownInstanceDisks(self, instance)
5530
        raise errors.OpExecError("Could not start instance for"
5531
                                 " full reboot: %s" % msg)
5532

    
5533
    self.cfg.MarkInstanceUp(instance.name)
5534

    
5535

    
5536
class LUInstanceShutdown(LogicalUnit):
5537
  """Shutdown an instance.
5538

5539
  """
5540
  HPATH = "instance-stop"
5541
  HTYPE = constants.HTYPE_INSTANCE
5542
  REQ_BGL = False
5543

    
5544
  def ExpandNames(self):
5545
    self._ExpandAndLockInstance()
5546

    
5547
  def BuildHooksEnv(self):
5548
    """Build hooks env.
5549

5550
    This runs on master, primary and secondary nodes of the instance.
5551

5552
    """
5553
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5554
    env["TIMEOUT"] = self.op.timeout
5555
    return env
5556

    
5557
  def BuildHooksNodes(self):
5558
    """Build hooks nodes.
5559

5560
    """
5561
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5562
    return (nl, nl)
5563

    
5564
  def CheckPrereq(self):
5565
    """Check prerequisites.
5566

5567
    This checks that the instance is in the cluster.
5568

5569
    """
5570
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5571
    assert self.instance is not None, \
5572
      "Cannot retrieve locked instance %s" % self.op.instance_name
5573

    
5574
    self.primary_offline = \
5575
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5576

    
5577
    if self.primary_offline and self.op.ignore_offline_nodes:
5578
      self.proc.LogWarning("Ignoring offline primary node")
5579
    else:
5580
      _CheckNodeOnline(self, self.instance.primary_node)
5581

    
5582
  def Exec(self, feedback_fn):
5583
    """Shutdown the instance.
5584

5585
    """
5586
    instance = self.instance
5587
    node_current = instance.primary_node
5588
    timeout = self.op.timeout
5589

    
5590
    self.cfg.MarkInstanceDown(instance.name)
5591

    
5592
    if self.primary_offline:
5593
      assert self.op.ignore_offline_nodes
5594
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5595
    else:
5596
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5597
      msg = result.fail_msg
5598
      if msg:
5599
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5600

    
5601
      _ShutdownInstanceDisks(self, instance)
5602

    
5603

    
5604
class LUInstanceReinstall(LogicalUnit):
5605
  """Reinstall an instance.
5606

5607
  """
5608
  HPATH = "instance-reinstall"
5609
  HTYPE = constants.HTYPE_INSTANCE
5610
  REQ_BGL = False
5611

    
5612
  def ExpandNames(self):
5613
    self._ExpandAndLockInstance()
5614

    
5615
  def BuildHooksEnv(self):
5616
    """Build hooks env.
5617

5618
    This runs on master, primary and secondary nodes of the instance.
5619

5620
    """
5621
    return _BuildInstanceHookEnvByObject(self, self.instance)
5622

    
5623
  def BuildHooksNodes(self):
5624
    """Build hooks nodes.
5625

5626
    """
5627
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5628
    return (nl, nl)
5629

    
5630
  def CheckPrereq(self):
5631
    """Check prerequisites.
5632

5633
    This checks that the instance is in the cluster and is not running.
5634

5635
    """
5636
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5637
    assert instance is not None, \
5638
      "Cannot retrieve locked instance %s" % self.op.instance_name
5639
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5640
                     " offline, cannot reinstall")
5641
    for node in instance.secondary_nodes:
5642
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5643
                       " cannot reinstall")
5644

    
5645
    if instance.disk_template == constants.DT_DISKLESS:
5646
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5647
                                 self.op.instance_name,
5648
                                 errors.ECODE_INVAL)
5649
    _CheckInstanceDown(self, instance, "cannot reinstall")
5650

    
5651
    if self.op.os_type is not None:
5652
      # OS verification
5653
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5654
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5655
      instance_os = self.op.os_type
5656
    else:
5657
      instance_os = instance.os
5658

    
5659
    nodelist = list(instance.all_nodes)
5660

    
5661
    if self.op.osparams:
5662
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5663
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5664
      self.os_inst = i_osdict # the new dict (without defaults)
5665
    else:
5666
      self.os_inst = None
5667

    
5668
    self.instance = instance
5669

    
5670
  def Exec(self, feedback_fn):
5671
    """Reinstall the instance.
5672

5673
    """
5674
    inst = self.instance
5675

    
5676
    if self.op.os_type is not None:
5677
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5678
      inst.os = self.op.os_type
5679
      # Write to configuration
5680
      self.cfg.Update(inst, feedback_fn)
5681

    
5682
    _StartInstanceDisks(self, inst, None)
5683
    try:
5684
      feedback_fn("Running the instance OS create scripts...")
5685
      # FIXME: pass debug option from opcode to backend
5686
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5687
                                             self.op.debug_level,
5688
                                             osparams=self.os_inst)
5689
      result.Raise("Could not install OS for instance %s on node %s" %
5690
                   (inst.name, inst.primary_node))
5691
    finally:
5692
      _ShutdownInstanceDisks(self, inst)
5693

    
5694

    
5695
class LUInstanceRecreateDisks(LogicalUnit):
5696
  """Recreate an instance's missing disks.
5697

5698
  """
5699
  HPATH = "instance-recreate-disks"
5700
  HTYPE = constants.HTYPE_INSTANCE
5701
  REQ_BGL = False
5702

    
5703
  def ExpandNames(self):
5704
    self._ExpandAndLockInstance()
5705

    
5706
  def BuildHooksEnv(self):
5707
    """Build hooks env.
5708

5709
    This runs on master, primary and secondary nodes of the instance.
5710

5711
    """
5712
    return _BuildInstanceHookEnvByObject(self, self.instance)
5713

    
5714
  def BuildHooksNodes(self):
5715
    """Build hooks nodes.
5716

5717
    """
5718
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5719
    return (nl, nl)
5720

    
5721
  def CheckPrereq(self):
5722
    """Check prerequisites.
5723

5724
    This checks that the instance is in the cluster and is not running.
5725

5726
    """
5727
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5728
    assert instance is not None, \
5729
      "Cannot retrieve locked instance %s" % self.op.instance_name
5730
    _CheckNodeOnline(self, instance.primary_node)
5731

    
5732
    if instance.disk_template == constants.DT_DISKLESS:
5733
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5734
                                 self.op.instance_name, errors.ECODE_INVAL)
5735
    _CheckInstanceDown(self, instance, "cannot recreate disks")
5736

    
5737
    if not self.op.disks:
5738
      self.op.disks = range(len(instance.disks))
5739
    else:
5740
      for idx in self.op.disks:
5741
        if idx >= len(instance.disks):
5742
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
5743
                                     errors.ECODE_INVAL)
5744

    
5745
    self.instance = instance
5746

    
5747
  def Exec(self, feedback_fn):
5748
    """Recreate the disks.
5749

5750
    """
5751
    to_skip = []
5752
    for idx, _ in enumerate(self.instance.disks):
5753
      if idx not in self.op.disks: # disk idx has not been passed in
5754
        to_skip.append(idx)
5755
        continue
5756

    
5757
    _CreateDisks(self, self.instance, to_skip=to_skip)
5758

    
5759

    
5760
class LUInstanceRename(LogicalUnit):
5761
  """Rename an instance.
5762

5763
  """
5764
  HPATH = "instance-rename"
5765
  HTYPE = constants.HTYPE_INSTANCE
5766

    
5767
  def CheckArguments(self):
5768
    """Check arguments.
5769

5770
    """
5771
    if self.op.ip_check and not self.op.name_check:
5772
      # TODO: make the ip check more flexible and not depend on the name check
5773
      raise errors.OpPrereqError("IP address check requires a name check",
5774
                                 errors.ECODE_INVAL)
5775

    
5776
  def BuildHooksEnv(self):
5777
    """Build hooks env.
5778

5779
    This runs on master, primary and secondary nodes of the instance.
5780

5781
    """
5782
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5783
    env["INSTANCE_NEW_NAME"] = self.op.new_name
5784
    return env
5785

    
5786
  def BuildHooksNodes(self):
5787
    """Build hooks nodes.
5788

5789
    """
5790
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5791
    return (nl, nl)
5792

    
5793
  def CheckPrereq(self):
5794
    """Check prerequisites.
5795

5796
    This checks that the instance is in the cluster and is not running.
5797

5798
    """
5799
    self.op.instance_name = _ExpandInstanceName(self.cfg,
5800
                                                self.op.instance_name)
5801
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5802
    assert instance is not None
5803
    _CheckNodeOnline(self, instance.primary_node)
5804
    _CheckInstanceDown(self, instance, "cannot rename")
5805
    self.instance = instance
5806

    
5807
    new_name = self.op.new_name
5808
    if self.op.name_check:
5809
      hostname = netutils.GetHostname(name=new_name)
5810
      self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5811
                   hostname.name)
5812
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5813
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5814
                                    " same as given hostname '%s'") %
5815
                                    (hostname.name, self.op.new_name),
5816
                                    errors.ECODE_INVAL)
5817
      new_name = self.op.new_name = hostname.name
5818
      if (self.op.ip_check and
5819
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5820
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5821
                                   (hostname.ip, new_name),
5822
                                   errors.ECODE_NOTUNIQUE)
5823

    
5824
    instance_list = self.cfg.GetInstanceList()
5825
    if new_name in instance_list and new_name != instance.name:
5826
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5827
                                 new_name, errors.ECODE_EXISTS)
5828

    
5829
  def Exec(self, feedback_fn):
5830
    """Rename the instance.
5831

5832
    """
5833
    inst = self.instance
5834
    old_name = inst.name
5835

    
5836
    rename_file_storage = False
5837
    if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5838
        self.op.new_name != inst.name):
5839
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5840
      rename_file_storage = True
5841

    
5842
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5843
    # Change the instance lock. This is definitely safe while we hold the BGL.
5844
    # Otherwise the new lock would have to be added in acquired mode.
5845
    assert self.REQ_BGL
5846
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
5847
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5848

    
5849
    # re-read the instance from the configuration after rename
5850
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5851

    
5852
    if rename_file_storage:
5853
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5854
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5855
                                                     old_file_storage_dir,
5856
                                                     new_file_storage_dir)
5857
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5858
                   " (but the instance has been renamed in Ganeti)" %
5859
                   (inst.primary_node, old_file_storage_dir,
5860
                    new_file_storage_dir))
5861

    
5862
    _StartInstanceDisks(self, inst, None)
5863
    try:
5864
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5865
                                                 old_name, self.op.debug_level)
5866
      msg = result.fail_msg
5867
      if msg:
5868
        msg = ("Could not run OS rename script for instance %s on node %s"
5869
               " (but the instance has been renamed in Ganeti): %s" %
5870
               (inst.name, inst.primary_node, msg))
5871
        self.proc.LogWarning(msg)
5872
    finally:
5873
      _ShutdownInstanceDisks(self, inst)
5874

    
5875
    return inst.name
5876

    
5877

    
5878
class LUInstanceRemove(LogicalUnit):
5879
  """Remove an instance.
5880

5881
  """
5882
  HPATH = "instance-remove"
5883
  HTYPE = constants.HTYPE_INSTANCE
5884
  REQ_BGL = False
5885

    
5886
  def ExpandNames(self):
5887
    self._ExpandAndLockInstance()
5888
    self.needed_locks[locking.LEVEL_NODE] = []
5889
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5890

    
5891
  def DeclareLocks(self, level):
5892
    if level == locking.LEVEL_NODE:
5893
      self._LockInstancesNodes()
5894

    
5895
  def BuildHooksEnv(self):
5896
    """Build hooks env.
5897

5898
    This runs on master, primary and secondary nodes of the instance.
5899

5900
    """
5901
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5902
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5903
    return env
5904

    
5905
  def BuildHooksNodes(self):
5906
    """Build hooks nodes.
5907

5908
    """
5909
    nl = [self.cfg.GetMasterNode()]
5910
    nl_post = list(self.instance.all_nodes) + nl
5911
    return (nl, nl_post)
5912

    
5913
  def CheckPrereq(self):
5914
    """Check prerequisites.
5915

5916
    This checks that the instance is in the cluster.
5917

5918
    """
5919
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5920
    assert self.instance is not None, \
5921
      "Cannot retrieve locked instance %s" % self.op.instance_name
5922

    
5923
  def Exec(self, feedback_fn):
5924
    """Remove the instance.
5925

5926
    """
5927
    instance = self.instance
5928
    logging.info("Shutting down instance %s on node %s",
5929
                 instance.name, instance.primary_node)
5930

    
5931
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5932
                                             self.op.shutdown_timeout)
5933
    msg = result.fail_msg
5934
    if msg:
5935
      if self.op.ignore_failures:
5936
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5937
      else:
5938
        raise errors.OpExecError("Could not shutdown instance %s on"
5939
                                 " node %s: %s" %
5940
                                 (instance.name, instance.primary_node, msg))
5941

    
5942
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5943

    
5944

    
5945
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5946
  """Utility function to remove an instance.
5947

5948
  """
5949
  logging.info("Removing block devices for instance %s", instance.name)
5950

    
5951
  if not _RemoveDisks(lu, instance):
5952
    if not ignore_failures:
5953
      raise errors.OpExecError("Can't remove instance's disks")
5954
    feedback_fn("Warning: can't remove instance's disks")
5955

    
5956
  logging.info("Removing instance %s out of cluster config", instance.name)
5957

    
5958
  lu.cfg.RemoveInstance(instance.name)
5959

    
5960
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5961
    "Instance lock removal conflict"
5962

    
5963
  # Remove lock for the instance
5964
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5965

    
5966

    
5967
class LUInstanceQuery(NoHooksLU):
5968
  """Logical unit for querying instances.
5969

5970
  """
5971
  # pylint: disable-msg=W0142
5972
  REQ_BGL = False
5973

    
5974
  def CheckArguments(self):
5975
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5976
                             self.op.output_fields, self.op.use_locking)
5977

    
5978
  def ExpandNames(self):
5979
    self.iq.ExpandNames(self)
5980

    
5981
  def DeclareLocks(self, level):
5982
    self.iq.DeclareLocks(self, level)
5983

    
5984
  def Exec(self, feedback_fn):
5985
    return self.iq.OldStyleQuery(self)
5986

    
5987

    
5988
class LUInstanceFailover(LogicalUnit):
5989
  """Failover an instance.
5990

5991
  """
5992
  HPATH = "instance-failover"
5993
  HTYPE = constants.HTYPE_INSTANCE
5994
  REQ_BGL = False
5995

    
5996
  def CheckArguments(self):
5997
    """Check the arguments.
5998

5999
    """
6000
    self.iallocator = getattr(self.op, "iallocator", None)
6001
    self.target_node = getattr(self.op, "target_node", None)
6002

    
6003
  def ExpandNames(self):
6004
    self._ExpandAndLockInstance()
6005

    
6006
    if self.op.target_node is not None:
6007
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6008

    
6009
    self.needed_locks[locking.LEVEL_NODE] = []
6010
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6011

    
6012
    ignore_consistency = self.op.ignore_consistency
6013
    shutdown_timeout = self.op.shutdown_timeout
6014
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6015
                                       cleanup=False,
6016
                                       failover=True,
6017
                                       ignore_consistency=ignore_consistency,
6018
                                       shutdown_timeout=shutdown_timeout)
6019
    self.tasklets = [self._migrater]
6020

    
6021
  def DeclareLocks(self, level):
6022
    if level == locking.LEVEL_NODE:
6023
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6024
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6025
        if self.op.target_node is None:
6026
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6027
        else:
6028
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6029
                                                   self.op.target_node]
6030
        del self.recalculate_locks[locking.LEVEL_NODE]
6031
      else:
6032
        self._LockInstancesNodes()
6033

    
6034
  def BuildHooksEnv(self):
6035
    """Build hooks env.
6036

6037
    This runs on master, primary and secondary nodes of the instance.
6038

6039
    """
6040
    instance = self._migrater.instance
6041
    source_node = instance.primary_node
6042
    target_node = self.op.target_node
6043
    env = {
6044
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6045
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6046
      "OLD_PRIMARY": source_node,
6047
      "NEW_PRIMARY": target_node,
6048
      }
6049

    
6050
    if instance.disk_template in constants.DTS_INT_MIRROR:
6051
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6052
      env["NEW_SECONDARY"] = source_node
6053
    else:
6054
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6055

    
6056
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6057

    
6058
    return env
6059

    
6060
  def BuildHooksNodes(self):
6061
    """Build hooks nodes.
6062

6063
    """
6064
    instance = self._migrater.instance
6065
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6066
    return (nl, nl + [instance.primary_node])
6067

    
6068

    
6069
class LUInstanceMigrate(LogicalUnit):
6070
  """Migrate an instance.
6071

6072
  This is migration without shutting down, compared to the failover,
6073
  which is done with shutdown.
6074

6075
  """
6076
  HPATH = "instance-migrate"
6077
  HTYPE = constants.HTYPE_INSTANCE
6078
  REQ_BGL = False
6079

    
6080
  def ExpandNames(self):
6081
    self._ExpandAndLockInstance()
6082

    
6083
    if self.op.target_node is not None:
6084
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6085

    
6086
    self.needed_locks[locking.LEVEL_NODE] = []
6087
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6088

    
6089
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6090
                                       cleanup=self.op.cleanup,
6091
                                       failover=False,
6092
                                       fallback=self.op.allow_failover)
6093
    self.tasklets = [self._migrater]
6094

    
6095
  def DeclareLocks(self, level):
6096
    if level == locking.LEVEL_NODE:
6097
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6098
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6099
        if self.op.target_node is None:
6100
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6101
        else:
6102
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6103
                                                   self.op.target_node]
6104
        del self.recalculate_locks[locking.LEVEL_NODE]
6105
      else:
6106
        self._LockInstancesNodes()
6107

    
6108
  def BuildHooksEnv(self):
6109
    """Build hooks env.
6110

6111
    This runs on master, primary and secondary nodes of the instance.
6112

6113
    """
6114
    instance = self._migrater.instance
6115
    source_node = instance.primary_node
6116
    target_node = self.op.target_node
6117
    env = _BuildInstanceHookEnvByObject(self, instance)
6118
    env.update({
6119
      "MIGRATE_LIVE": self._migrater.live,
6120
      "MIGRATE_CLEANUP": self.op.cleanup,
6121
      "OLD_PRIMARY": source_node,
6122
      "NEW_PRIMARY": target_node,
6123
      })
6124

    
6125
    if instance.disk_template in constants.DTS_INT_MIRROR:
6126
      env["OLD_SECONDARY"] = target_node
6127
      env["NEW_SECONDARY"] = source_node
6128
    else:
6129
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6130

    
6131
    return env
6132

    
6133
  def BuildHooksNodes(self):
6134
    """Build hooks nodes.
6135

6136
    """
6137
    instance = self._migrater.instance
6138
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6139
    return (nl, nl + [instance.primary_node])
6140

    
6141

    
6142
class LUInstanceMove(LogicalUnit):
6143
  """Move an instance by data-copying.
6144

6145
  """
6146
  HPATH = "instance-move"
6147
  HTYPE = constants.HTYPE_INSTANCE
6148
  REQ_BGL = False
6149

    
6150
  def ExpandNames(self):
6151
    self._ExpandAndLockInstance()
6152
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6153
    self.op.target_node = target_node
6154
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6155
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6156

    
6157
  def DeclareLocks(self, level):
6158
    if level == locking.LEVEL_NODE:
6159
      self._LockInstancesNodes(primary_only=True)
6160

    
6161
  def BuildHooksEnv(self):
6162
    """Build hooks env.
6163

6164
    This runs on master, primary and secondary nodes of the instance.
6165

6166
    """
6167
    env = {
6168
      "TARGET_NODE": self.op.target_node,
6169
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6170
      }
6171
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6172
    return env
6173

    
6174
  def BuildHooksNodes(self):
6175
    """Build hooks nodes.
6176

6177
    """
6178
    nl = [
6179
      self.cfg.GetMasterNode(),
6180
      self.instance.primary_node,
6181
      self.op.target_node,
6182
      ]
6183
    return (nl, nl)
6184

    
6185
  def CheckPrereq(self):
6186
    """Check prerequisites.
6187

6188
    This checks that the instance is in the cluster.
6189

6190
    """
6191
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6192
    assert self.instance is not None, \
6193
      "Cannot retrieve locked instance %s" % self.op.instance_name
6194

    
6195
    node = self.cfg.GetNodeInfo(self.op.target_node)
6196
    assert node is not None, \
6197
      "Cannot retrieve locked node %s" % self.op.target_node
6198

    
6199
    self.target_node = target_node = node.name
6200

    
6201
    if target_node == instance.primary_node:
6202
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6203
                                 (instance.name, target_node),
6204
                                 errors.ECODE_STATE)
6205

    
6206
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6207

    
6208
    for idx, dsk in enumerate(instance.disks):
6209
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6210
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6211
                                   " cannot copy" % idx, errors.ECODE_STATE)
6212

    
6213
    _CheckNodeOnline(self, target_node)
6214
    _CheckNodeNotDrained(self, target_node)
6215
    _CheckNodeVmCapable(self, target_node)
6216

    
6217
    if instance.admin_up:
6218
      # check memory requirements on the secondary node
6219
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6220
                           instance.name, bep[constants.BE_MEMORY],
6221
                           instance.hypervisor)
6222
    else:
6223
      self.LogInfo("Not checking memory on the secondary node as"
6224
                   " instance will not be started")
6225

    
6226
    # check bridge existance
6227
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6228

    
6229
  def Exec(self, feedback_fn):
6230
    """Move an instance.
6231

6232
    The move is done by shutting it down on its present node, copying
6233
    the data over (slow) and starting it on the new node.
6234

6235
    """
6236
    instance = self.instance
6237

    
6238
    source_node = instance.primary_node
6239
    target_node = self.target_node
6240

    
6241
    self.LogInfo("Shutting down instance %s on source node %s",
6242
                 instance.name, source_node)
6243

    
6244
    result = self.rpc.call_instance_shutdown(source_node, instance,
6245
                                             self.op.shutdown_timeout)
6246
    msg = result.fail_msg
6247
    if msg:
6248
      if self.op.ignore_consistency:
6249
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6250
                             " Proceeding anyway. Please make sure node"
6251
                             " %s is down. Error details: %s",
6252
                             instance.name, source_node, source_node, msg)
6253
      else:
6254
        raise errors.OpExecError("Could not shutdown instance %s on"
6255
                                 " node %s: %s" %
6256
                                 (instance.name, source_node, msg))
6257

    
6258
    # create the target disks
6259
    try:
6260
      _CreateDisks(self, instance, target_node=target_node)
6261
    except errors.OpExecError:
6262
      self.LogWarning("Device creation failed, reverting...")
6263
      try:
6264
        _RemoveDisks(self, instance, target_node=target_node)
6265
      finally:
6266
        self.cfg.ReleaseDRBDMinors(instance.name)
6267
        raise
6268

    
6269
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6270

    
6271
    errs = []
6272
    # activate, get path, copy the data over
6273
    for idx, disk in enumerate(instance.disks):
6274
      self.LogInfo("Copying data for disk %d", idx)
6275
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6276
                                               instance.name, True, idx)
6277
      if result.fail_msg:
6278
        self.LogWarning("Can't assemble newly created disk %d: %s",
6279
                        idx, result.fail_msg)
6280
        errs.append(result.fail_msg)
6281
        break
6282
      dev_path = result.payload
6283
      result = self.rpc.call_blockdev_export(source_node, disk,
6284
                                             target_node, dev_path,
6285
                                             cluster_name)
6286
      if result.fail_msg:
6287
        self.LogWarning("Can't copy data over for disk %d: %s",
6288
                        idx, result.fail_msg)
6289
        errs.append(result.fail_msg)
6290
        break
6291

    
6292
    if errs:
6293
      self.LogWarning("Some disks failed to copy, aborting")
6294
      try:
6295
        _RemoveDisks(self, instance, target_node=target_node)
6296
      finally:
6297
        self.cfg.ReleaseDRBDMinors(instance.name)
6298
        raise errors.OpExecError("Errors during disk copy: %s" %
6299
                                 (",".join(errs),))
6300

    
6301
    instance.primary_node = target_node
6302
    self.cfg.Update(instance, feedback_fn)
6303

    
6304
    self.LogInfo("Removing the disks on the original node")
6305
    _RemoveDisks(self, instance, target_node=source_node)
6306

    
6307
    # Only start the instance if it's marked as up
6308
    if instance.admin_up:
6309
      self.LogInfo("Starting instance %s on node %s",
6310
                   instance.name, target_node)
6311

    
6312
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6313
                                           ignore_secondaries=True)
6314
      if not disks_ok:
6315
        _ShutdownInstanceDisks(self, instance)
6316
        raise errors.OpExecError("Can't activate the instance's disks")
6317

    
6318
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6319
      msg = result.fail_msg
6320
      if msg:
6321
        _ShutdownInstanceDisks(self, instance)
6322
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6323
                                 (instance.name, target_node, msg))
6324

    
6325

    
6326
class LUNodeMigrate(LogicalUnit):
6327
  """Migrate all instances from a node.
6328

6329
  """
6330
  HPATH = "node-migrate"
6331
  HTYPE = constants.HTYPE_NODE
6332
  REQ_BGL = False
6333

    
6334
  def CheckArguments(self):
6335
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6336

    
6337
  def ExpandNames(self):
6338
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6339

    
6340
    self.needed_locks = {}
6341

    
6342
    # Create tasklets for migrating instances for all instances on this node
6343
    names = []
6344
    tasklets = []
6345

    
6346
    self.lock_all_nodes = False
6347

    
6348
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6349
      logging.debug("Migrating instance %s", inst.name)
6350
      names.append(inst.name)
6351

    
6352
      tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False))
6353

    
6354
      if inst.disk_template in constants.DTS_EXT_MIRROR:
6355
        # We need to lock all nodes, as the iallocator will choose the
6356
        # destination nodes afterwards
6357
        self.lock_all_nodes = True
6358

    
6359
    self.tasklets = tasklets
6360

    
6361
    # Declare node locks
6362
    if self.lock_all_nodes:
6363
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6364
    else:
6365
      self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6366
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6367

    
6368
    # Declare instance locks
6369
    self.needed_locks[locking.LEVEL_INSTANCE] = names
6370

    
6371
  def DeclareLocks(self, level):
6372
    if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6373
      self._LockInstancesNodes()
6374

    
6375
  def BuildHooksEnv(self):
6376
    """Build hooks env.
6377

6378
    This runs on the master, the primary and all the secondaries.
6379

6380
    """
6381
    return {
6382
      "NODE_NAME": self.op.node_name,
6383
      }
6384

    
6385
  def BuildHooksNodes(self):
6386
    """Build hooks nodes.
6387

6388
    """
6389
    nl = [self.cfg.GetMasterNode()]
6390
    return (nl, nl)
6391

    
6392

    
6393
class TLMigrateInstance(Tasklet):
6394
  """Tasklet class for instance migration.
6395

6396
  @type live: boolean
6397
  @ivar live: whether the migration will be done live or non-live;
6398
      this variable is initalized only after CheckPrereq has run
6399
  @type cleanup: boolean
6400
  @ivar cleanup: Wheater we cleanup from a failed migration
6401
  @type iallocator: string
6402
  @ivar iallocator: The iallocator used to determine target_node
6403
  @type target_node: string
6404
  @ivar target_node: If given, the target_node to reallocate the instance to
6405
  @type failover: boolean
6406
  @ivar failover: Whether operation results in failover or migration
6407
  @type fallback: boolean
6408
  @ivar fallback: Whether fallback to failover is allowed if migration not
6409
                  possible
6410
  @type ignore_consistency: boolean
6411
  @ivar ignore_consistency: Wheter we should ignore consistency between source
6412
                            and target node
6413
  @type shutdown_timeout: int
6414
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
6415

6416
  """
6417
  def __init__(self, lu, instance_name, cleanup=False,
6418
               failover=False, fallback=False,
6419
               ignore_consistency=False,
6420
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6421
    """Initializes this class.
6422

6423
    """
6424
    Tasklet.__init__(self, lu)
6425

    
6426
    # Parameters
6427
    self.instance_name = instance_name
6428
    self.cleanup = cleanup
6429
    self.live = False # will be overridden later
6430
    self.failover = failover
6431
    self.fallback = fallback
6432
    self.ignore_consistency = ignore_consistency
6433
    self.shutdown_timeout = shutdown_timeout
6434

    
6435
  def CheckPrereq(self):
6436
    """Check prerequisites.
6437

6438
    This checks that the instance is in the cluster.
6439

6440
    """
6441
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6442
    instance = self.cfg.GetInstanceInfo(instance_name)
6443
    assert instance is not None
6444
    self.instance = instance
6445

    
6446
    if (not self.cleanup and not instance.admin_up and not self.failover and
6447
        self.fallback):
6448
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6449
                      " to failover")
6450
      self.failover = True
6451

    
6452
    if instance.disk_template not in constants.DTS_MIRRORED:
6453
      if self.failover:
6454
        text = "failovers"
6455
      else:
6456
        text = "migrations"
6457
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6458
                                 " %s" % (instance.disk_template, text),
6459
                                 errors.ECODE_STATE)
6460

    
6461
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6462
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6463

    
6464
      if self.lu.op.iallocator:
6465
        self._RunAllocator()
6466
      else:
6467
        # We set set self.target_node as it is required by
6468
        # BuildHooksEnv
6469
        self.target_node = self.lu.op.target_node
6470

    
6471
      # self.target_node is already populated, either directly or by the
6472
      # iallocator run
6473
      target_node = self.target_node
6474

    
6475
      if len(self.lu.tasklets) == 1:
6476
        # It is safe to release locks only when we're the only tasklet
6477
        # in the LU
6478
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6479
                      keep=[instance.primary_node, self.target_node])
6480

    
6481
    else:
6482
      secondary_nodes = instance.secondary_nodes
6483
      if not secondary_nodes:
6484
        raise errors.ConfigurationError("No secondary node but using"
6485
                                        " %s disk template" %
6486
                                        instance.disk_template)
6487
      target_node = secondary_nodes[0]
6488
      if self.lu.op.iallocator or (self.lu.op.target_node and
6489
                                   self.lu.op.target_node != target_node):
6490
        if self.failover:
6491
          text = "failed over"
6492
        else:
6493
          text = "migrated"
6494
        raise errors.OpPrereqError("Instances with disk template %s cannot"
6495
                                   " be %s to arbitrary nodes"
6496
                                   " (neither an iallocator nor a target"
6497
                                   " node can be passed)" %
6498
                                   (instance.disk_template, text),
6499
                                   errors.ECODE_INVAL)
6500

    
6501
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6502

    
6503
    # check memory requirements on the secondary node
6504
    if not self.failover or instance.admin_up:
6505
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6506
                           instance.name, i_be[constants.BE_MEMORY],
6507
                           instance.hypervisor)
6508
    else:
6509
      self.lu.LogInfo("Not checking memory on the secondary node as"
6510
                      " instance will not be started")
6511

    
6512
    # check bridge existance
6513
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6514

    
6515
    if not self.cleanup:
6516
      _CheckNodeNotDrained(self.lu, target_node)
6517
      if not self.failover:
6518
        result = self.rpc.call_instance_migratable(instance.primary_node,
6519
                                                   instance)
6520
        if result.fail_msg and self.fallback:
6521
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6522
                          " failover")
6523
          self.failover = True
6524
        else:
6525
          result.Raise("Can't migrate, please use failover",
6526
                       prereq=True, ecode=errors.ECODE_STATE)
6527

    
6528
    assert not (self.failover and self.cleanup)
6529

    
6530
    if not self.failover:
6531
      if self.lu.op.live is not None and self.lu.op.mode is not None:
6532
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6533
                                   " parameters are accepted",
6534
                                   errors.ECODE_INVAL)
6535
      if self.lu.op.live is not None:
6536
        if self.lu.op.live:
6537
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
6538
        else:
6539
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6540
        # reset the 'live' parameter to None so that repeated
6541
        # invocations of CheckPrereq do not raise an exception
6542
        self.lu.op.live = None
6543
      elif self.lu.op.mode is None:
6544
        # read the default value from the hypervisor
6545
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6546
                                                skip_globals=False)
6547
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6548

    
6549
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6550
    else:
6551
      # Failover is never live
6552
      self.live = False
6553

    
6554
  def _RunAllocator(self):
6555
    """Run the allocator based on input opcode.
6556

6557
    """
6558
    ial = IAllocator(self.cfg, self.rpc,
6559
                     mode=constants.IALLOCATOR_MODE_RELOC,
6560
                     name=self.instance_name,
6561
                     # TODO See why hail breaks with a single node below
6562
                     relocate_from=[self.instance.primary_node,
6563
                                    self.instance.primary_node],
6564
                     )
6565

    
6566
    ial.Run(self.lu.op.iallocator)
6567

    
6568
    if not ial.success:
6569
      raise errors.OpPrereqError("Can't compute nodes using"
6570
                                 " iallocator '%s': %s" %
6571
                                 (self.lu.op.iallocator, ial.info),
6572
                                 errors.ECODE_NORES)
6573
    if len(ial.result) != ial.required_nodes:
6574
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6575
                                 " of nodes (%s), required %s" %
6576
                                 (self.lu.op.iallocator, len(ial.result),
6577
                                  ial.required_nodes), errors.ECODE_FAULT)
6578
    self.target_node = ial.result[0]
6579
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6580
                 self.instance_name, self.lu.op.iallocator,
6581
                 utils.CommaJoin(ial.result))
6582

    
6583
  def _WaitUntilSync(self):
6584
    """Poll with custom rpc for disk sync.
6585

6586
    This uses our own step-based rpc call.
6587

6588
    """
6589
    self.feedback_fn("* wait until resync is done")
6590
    all_done = False
6591
    while not all_done:
6592
      all_done = True
6593
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6594
                                            self.nodes_ip,
6595
                                            self.instance.disks)
6596
      min_percent = 100
6597
      for node, nres in result.items():
6598
        nres.Raise("Cannot resync disks on node %s" % node)
6599
        node_done, node_percent = nres.payload
6600
        all_done = all_done and node_done
6601
        if node_percent is not None:
6602
          min_percent = min(min_percent, node_percent)
6603
      if not all_done:
6604
        if min_percent < 100:
6605
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6606
        time.sleep(2)
6607

    
6608
  def _EnsureSecondary(self, node):
6609
    """Demote a node to secondary.
6610

6611
    """
6612
    self.feedback_fn("* switching node %s to secondary mode" % node)
6613

    
6614
    for dev in self.instance.disks:
6615
      self.cfg.SetDiskID(dev, node)
6616

    
6617
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6618
                                          self.instance.disks)
6619
    result.Raise("Cannot change disk to secondary on node %s" % node)
6620

    
6621
  def _GoStandalone(self):
6622
    """Disconnect from the network.
6623

6624
    """
6625
    self.feedback_fn("* changing into standalone mode")
6626
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6627
                                               self.instance.disks)
6628
    for node, nres in result.items():
6629
      nres.Raise("Cannot disconnect disks node %s" % node)
6630

    
6631
  def _GoReconnect(self, multimaster):
6632
    """Reconnect to the network.
6633

6634
    """
6635
    if multimaster:
6636
      msg = "dual-master"
6637
    else:
6638
      msg = "single-master"
6639
    self.feedback_fn("* changing disks into %s mode" % msg)
6640
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6641
                                           self.instance.disks,
6642
                                           self.instance.name, multimaster)
6643
    for node, nres in result.items():
6644
      nres.Raise("Cannot change disks config on node %s" % node)
6645

    
6646
  def _ExecCleanup(self):
6647
    """Try to cleanup after a failed migration.
6648

6649
    The cleanup is done by:
6650
      - check that the instance is running only on one node
6651
        (and update the config if needed)
6652
      - change disks on its secondary node to secondary
6653
      - wait until disks are fully synchronized
6654
      - disconnect from the network
6655
      - change disks into single-master mode
6656
      - wait again until disks are fully synchronized
6657

6658
    """
6659
    instance = self.instance
6660
    target_node = self.target_node
6661
    source_node = self.source_node
6662

    
6663
    # check running on only one node
6664
    self.feedback_fn("* checking where the instance actually runs"
6665
                     " (if this hangs, the hypervisor might be in"
6666
                     " a bad state)")
6667
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6668
    for node, result in ins_l.items():
6669
      result.Raise("Can't contact node %s" % node)
6670

    
6671
    runningon_source = instance.name in ins_l[source_node].payload
6672
    runningon_target = instance.name in ins_l[target_node].payload
6673

    
6674
    if runningon_source and runningon_target:
6675
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6676
                               " or the hypervisor is confused; you will have"
6677
                               " to ensure manually that it runs only on one"
6678
                               " and restart this operation")
6679

    
6680
    if not (runningon_source or runningon_target):
6681
      raise errors.OpExecError("Instance does not seem to be running at all;"
6682
                               " in this case it's safer to repair by"
6683
                               " running 'gnt-instance stop' to ensure disk"
6684
                               " shutdown, and then restarting it")
6685

    
6686
    if runningon_target:
6687
      # the migration has actually succeeded, we need to update the config
6688
      self.feedback_fn("* instance running on secondary node (%s),"
6689
                       " updating config" % target_node)
6690
      instance.primary_node = target_node
6691
      self.cfg.Update(instance, self.feedback_fn)
6692
      demoted_node = source_node
6693
    else:
6694
      self.feedback_fn("* instance confirmed to be running on its"
6695
                       " primary node (%s)" % source_node)
6696
      demoted_node = target_node
6697

    
6698
    if instance.disk_template in constants.DTS_INT_MIRROR:
6699
      self._EnsureSecondary(demoted_node)
6700
      try:
6701
        self._WaitUntilSync()
6702
      except errors.OpExecError:
6703
        # we ignore here errors, since if the device is standalone, it
6704
        # won't be able to sync
6705
        pass
6706
      self._GoStandalone()
6707
      self._GoReconnect(False)
6708
      self._WaitUntilSync()
6709

    
6710
    self.feedback_fn("* done")
6711

    
6712
  def _RevertDiskStatus(self):
6713
    """Try to revert the disk status after a failed migration.
6714

6715
    """
6716
    target_node = self.target_node
6717
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6718
      return
6719

    
6720
    try:
6721
      self._EnsureSecondary(target_node)
6722
      self._GoStandalone()
6723
      self._GoReconnect(False)
6724
      self._WaitUntilSync()
6725
    except errors.OpExecError, err:
6726
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
6727
                         " please try to recover the instance manually;"
6728
                         " error '%s'" % str(err))
6729

    
6730
  def _AbortMigration(self):
6731
    """Call the hypervisor code to abort a started migration.
6732

6733
    """
6734
    instance = self.instance
6735
    target_node = self.target_node
6736
    migration_info = self.migration_info
6737

    
6738
    abort_result = self.rpc.call_finalize_migration(target_node,
6739
                                                    instance,
6740
                                                    migration_info,
6741
                                                    False)
6742
    abort_msg = abort_result.fail_msg
6743
    if abort_msg:
6744
      logging.error("Aborting migration failed on target node %s: %s",
6745
                    target_node, abort_msg)
6746
      # Don't raise an exception here, as we stil have to try to revert the
6747
      # disk status, even if this step failed.
6748

    
6749
  def _ExecMigration(self):
6750
    """Migrate an instance.
6751

6752
    The migrate is done by:
6753
      - change the disks into dual-master mode
6754
      - wait until disks are fully synchronized again
6755
      - migrate the instance
6756
      - change disks on the new secondary node (the old primary) to secondary
6757
      - wait until disks are fully synchronized
6758
      - change disks into single-master mode
6759

6760
    """
6761
    instance = self.instance
6762
    target_node = self.target_node
6763
    source_node = self.source_node
6764

    
6765
    self.feedback_fn("* checking disk consistency between source and target")
6766
    for dev in instance.disks:
6767
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6768
        raise errors.OpExecError("Disk %s is degraded or not fully"
6769
                                 " synchronized on target node,"
6770
                                 " aborting migration" % dev.iv_name)
6771

    
6772
    # First get the migration information from the remote node
6773
    result = self.rpc.call_migration_info(source_node, instance)
6774
    msg = result.fail_msg
6775
    if msg:
6776
      log_err = ("Failed fetching source migration information from %s: %s" %
6777
                 (source_node, msg))
6778
      logging.error(log_err)
6779
      raise errors.OpExecError(log_err)
6780

    
6781
    self.migration_info = migration_info = result.payload
6782

    
6783
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6784
      # Then switch the disks to master/master mode
6785
      self._EnsureSecondary(target_node)
6786
      self._GoStandalone()
6787
      self._GoReconnect(True)
6788
      self._WaitUntilSync()
6789

    
6790
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6791
    result = self.rpc.call_accept_instance(target_node,
6792
                                           instance,
6793
                                           migration_info,
6794
                                           self.nodes_ip[target_node])
6795

    
6796
    msg = result.fail_msg
6797
    if msg:
6798
      logging.error("Instance pre-migration failed, trying to revert"
6799
                    " disk status: %s", msg)
6800
      self.feedback_fn("Pre-migration failed, aborting")
6801
      self._AbortMigration()
6802
      self._RevertDiskStatus()
6803
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6804
                               (instance.name, msg))
6805

    
6806
    self.feedback_fn("* migrating instance to %s" % target_node)
6807
    result = self.rpc.call_instance_migrate(source_node, instance,
6808
                                            self.nodes_ip[target_node],
6809
                                            self.live)
6810
    msg = result.fail_msg
6811
    if msg:
6812
      logging.error("Instance migration failed, trying to revert"
6813
                    " disk status: %s", msg)
6814
      self.feedback_fn("Migration failed, aborting")
6815
      self._AbortMigration()
6816
      self._RevertDiskStatus()
6817
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6818
                               (instance.name, msg))
6819

    
6820
    instance.primary_node = target_node
6821
    # distribute new instance config to the other nodes
6822
    self.cfg.Update(instance, self.feedback_fn)
6823

    
6824
    result = self.rpc.call_finalize_migration(target_node,
6825
                                              instance,
6826
                                              migration_info,
6827
                                              True)
6828
    msg = result.fail_msg
6829
    if msg:
6830
      logging.error("Instance migration succeeded, but finalization failed:"
6831
                    " %s", msg)
6832
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6833
                               msg)
6834

    
6835
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6836
      self._EnsureSecondary(source_node)
6837
      self._WaitUntilSync()
6838
      self._GoStandalone()
6839
      self._GoReconnect(False)
6840
      self._WaitUntilSync()
6841

    
6842
    self.feedback_fn("* done")
6843

    
6844
  def _ExecFailover(self):
6845
    """Failover an instance.
6846

6847
    The failover is done by shutting it down on its present node and
6848
    starting it on the secondary.
6849

6850
    """
6851
    instance = self.instance
6852
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
6853

    
6854
    source_node = instance.primary_node
6855
    target_node = self.target_node
6856

    
6857
    if instance.admin_up:
6858
      self.feedback_fn("* checking disk consistency between source and target")
6859
      for dev in instance.disks:
6860
        # for drbd, these are drbd over lvm
6861
        if not _CheckDiskConsistency(self, dev, target_node, False):
6862
          if not self.ignore_consistency:
6863
            raise errors.OpExecError("Disk %s is degraded on target node,"
6864
                                     " aborting failover" % dev.iv_name)
6865
    else:
6866
      self.feedback_fn("* not checking disk consistency as instance is not"
6867
                       " running")
6868

    
6869
    self.feedback_fn("* shutting down instance on source node")
6870
    logging.info("Shutting down instance %s on node %s",
6871
                 instance.name, source_node)
6872

    
6873
    result = self.rpc.call_instance_shutdown(source_node, instance,
6874
                                             self.shutdown_timeout)
6875
    msg = result.fail_msg
6876
    if msg:
6877
      if self.ignore_consistency or primary_node.offline:
6878
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
6879
                           " proceeding anyway; please make sure node"
6880
                           " %s is down; error details: %s",
6881
                           instance.name, source_node, source_node, msg)
6882
      else:
6883
        raise errors.OpExecError("Could not shutdown instance %s on"
6884
                                 " node %s: %s" %
6885
                                 (instance.name, source_node, msg))
6886

    
6887
    self.feedback_fn("* deactivating the instance's disks on source node")
6888
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
6889
      raise errors.OpExecError("Can't shut down the instance's disks.")
6890

    
6891
    instance.primary_node = target_node
6892
    # distribute new instance config to the other nodes
6893
    self.cfg.Update(instance, self.feedback_fn)
6894

    
6895
    # Only start the instance if it's marked as up
6896
    if instance.admin_up:
6897
      self.feedback_fn("* activating the instance's disks on target node")
6898
      logging.info("Starting instance %s on node %s",
6899
                   instance.name, target_node)
6900

    
6901
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6902
                                           ignore_secondaries=True)
6903
      if not disks_ok:
6904
        _ShutdownInstanceDisks(self, instance)
6905
        raise errors.OpExecError("Can't activate the instance's disks")
6906

    
6907
      self.feedback_fn("* starting the instance on the target node")
6908
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6909
      msg = result.fail_msg
6910
      if msg:
6911
        _ShutdownInstanceDisks(self, instance)
6912
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6913
                                 (instance.name, target_node, msg))
6914

    
6915
  def Exec(self, feedback_fn):
6916
    """Perform the migration.
6917

6918
    """
6919
    self.feedback_fn = feedback_fn
6920
    self.source_node = self.instance.primary_node
6921

    
6922
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6923
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
6924
      self.target_node = self.instance.secondary_nodes[0]
6925
      # Otherwise self.target_node has been populated either
6926
      # directly, or through an iallocator.
6927

    
6928
    self.all_nodes = [self.source_node, self.target_node]
6929
    self.nodes_ip = {
6930
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6931
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6932
      }
6933

    
6934
    if self.failover:
6935
      feedback_fn("Failover instance %s" % self.instance.name)
6936
      self._ExecFailover()
6937
    else:
6938
      feedback_fn("Migrating instance %s" % self.instance.name)
6939

    
6940
      if self.cleanup:
6941
        return self._ExecCleanup()
6942
      else:
6943
        return self._ExecMigration()
6944

    
6945

    
6946
def _CreateBlockDev(lu, node, instance, device, force_create,
6947
                    info, force_open):
6948
  """Create a tree of block devices on a given node.
6949

6950
  If this device type has to be created on secondaries, create it and
6951
  all its children.
6952

6953
  If not, just recurse to children keeping the same 'force' value.
6954

6955
  @param lu: the lu on whose behalf we execute
6956
  @param node: the node on which to create the device
6957
  @type instance: L{objects.Instance}
6958
  @param instance: the instance which owns the device
6959
  @type device: L{objects.Disk}
6960
  @param device: the device to create
6961
  @type force_create: boolean
6962
  @param force_create: whether to force creation of this device; this
6963
      will be change to True whenever we find a device which has
6964
      CreateOnSecondary() attribute
6965
  @param info: the extra 'metadata' we should attach to the device
6966
      (this will be represented as a LVM tag)
6967
  @type force_open: boolean
6968
  @param force_open: this parameter will be passes to the
6969
      L{backend.BlockdevCreate} function where it specifies
6970
      whether we run on primary or not, and it affects both
6971
      the child assembly and the device own Open() execution
6972

6973
  """
6974
  if device.CreateOnSecondary():
6975
    force_create = True
6976

    
6977
  if device.children:
6978
    for child in device.children:
6979
      _CreateBlockDev(lu, node, instance, child, force_create,
6980
                      info, force_open)
6981

    
6982
  if not force_create:
6983
    return
6984

    
6985
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6986

    
6987

    
6988
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6989
  """Create a single block device on a given node.
6990

6991
  This will not recurse over children of the device, so they must be
6992
  created in advance.
6993

6994
  @param lu: the lu on whose behalf we execute
6995
  @param node: the node on which to create the device
6996
  @type instance: L{objects.Instance}
6997
  @param instance: the instance which owns the device
6998
  @type device: L{objects.Disk}
6999
  @param device: the device to create
7000
  @param info: the extra 'metadata' we should attach to the device
7001
      (this will be represented as a LVM tag)
7002
  @type force_open: boolean
7003
  @param force_open: this parameter will be passes to the
7004
      L{backend.BlockdevCreate} function where it specifies
7005
      whether we run on primary or not, and it affects both
7006
      the child assembly and the device own Open() execution
7007

7008
  """
7009
  lu.cfg.SetDiskID(device, node)
7010
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7011
                                       instance.name, force_open, info)
7012
  result.Raise("Can't create block device %s on"
7013
               " node %s for instance %s" % (device, node, instance.name))
7014
  if device.physical_id is None:
7015
    device.physical_id = result.payload
7016

    
7017

    
7018
def _GenerateUniqueNames(lu, exts):
7019
  """Generate a suitable LV name.
7020

7021
  This will generate a logical volume name for the given instance.
7022

7023
  """
7024
  results = []
7025
  for val in exts:
7026
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7027
    results.append("%s%s" % (new_id, val))
7028
  return results
7029

    
7030

    
7031
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7032
                         iv_name, p_minor, s_minor):
7033
  """Generate a drbd8 device complete with its children.
7034

7035
  """
7036
  assert len(vgnames) == len(names) == 2
7037
  port = lu.cfg.AllocatePort()
7038
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7039
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7040
                          logical_id=(vgnames[0], names[0]))
7041
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7042
                          logical_id=(vgnames[1], names[1]))
7043
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7044
                          logical_id=(primary, secondary, port,
7045
                                      p_minor, s_minor,
7046
                                      shared_secret),
7047
                          children=[dev_data, dev_meta],
7048
                          iv_name=iv_name)
7049
  return drbd_dev
7050

    
7051

    
7052
def _GenerateDiskTemplate(lu, template_name,
7053
                          instance_name, primary_node,
7054
                          secondary_nodes, disk_info,
7055
                          file_storage_dir, file_driver,
7056
                          base_index, feedback_fn):
7057
  """Generate the entire disk layout for a given template type.
7058

7059
  """
7060
  #TODO: compute space requirements
7061

    
7062
  vgname = lu.cfg.GetVGName()
7063
  disk_count = len(disk_info)
7064
  disks = []
7065
  if template_name == constants.DT_DISKLESS:
7066
    pass
7067
  elif template_name == constants.DT_PLAIN:
7068
    if len(secondary_nodes) != 0:
7069
      raise errors.ProgrammerError("Wrong template configuration")
7070

    
7071
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7072
                                      for i in range(disk_count)])
7073
    for idx, disk in enumerate(disk_info):
7074
      disk_index = idx + base_index
7075
      vg = disk.get(constants.IDISK_VG, vgname)
7076
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7077
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7078
                              size=disk[constants.IDISK_SIZE],
7079
                              logical_id=(vg, names[idx]),
7080
                              iv_name="disk/%d" % disk_index,
7081
                              mode=disk[constants.IDISK_MODE])
7082
      disks.append(disk_dev)
7083
  elif template_name == constants.DT_DRBD8:
7084
    if len(secondary_nodes) != 1:
7085
      raise errors.ProgrammerError("Wrong template configuration")
7086
    remote_node = secondary_nodes[0]
7087
    minors = lu.cfg.AllocateDRBDMinor(
7088
      [primary_node, remote_node] * len(disk_info), instance_name)
7089

    
7090
    names = []
7091
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7092
                                               for i in range(disk_count)]):
7093
      names.append(lv_prefix + "_data")
7094
      names.append(lv_prefix + "_meta")
7095
    for idx, disk in enumerate(disk_info):
7096
      disk_index = idx + base_index
7097
      data_vg = disk.get(constants.IDISK_VG, vgname)
7098
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7099
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7100
                                      disk[constants.IDISK_SIZE],
7101
                                      [data_vg, meta_vg],
7102
                                      names[idx * 2:idx * 2 + 2],
7103
                                      "disk/%d" % disk_index,
7104
                                      minors[idx * 2], minors[idx * 2 + 1])
7105
      disk_dev.mode = disk[constants.IDISK_MODE]
7106
      disks.append(disk_dev)
7107
  elif template_name == constants.DT_FILE:
7108
    if len(secondary_nodes) != 0:
7109
      raise errors.ProgrammerError("Wrong template configuration")
7110

    
7111
    opcodes.RequireFileStorage()
7112

    
7113
    for idx, disk in enumerate(disk_info):
7114
      disk_index = idx + base_index
7115
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7116
                              size=disk[constants.IDISK_SIZE],
7117
                              iv_name="disk/%d" % disk_index,
7118
                              logical_id=(file_driver,
7119
                                          "%s/disk%d" % (file_storage_dir,
7120
                                                         disk_index)),
7121
                              mode=disk[constants.IDISK_MODE])
7122
      disks.append(disk_dev)
7123
  elif template_name == constants.DT_SHARED_FILE:
7124
    if len(secondary_nodes) != 0:
7125
      raise errors.ProgrammerError("Wrong template configuration")
7126

    
7127
    opcodes.RequireSharedFileStorage()
7128

    
7129
    for idx, disk in enumerate(disk_info):
7130
      disk_index = idx + base_index
7131
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7132
                              size=disk[constants.IDISK_SIZE],
7133
                              iv_name="disk/%d" % disk_index,
7134
                              logical_id=(file_driver,
7135
                                          "%s/disk%d" % (file_storage_dir,
7136
                                                         disk_index)),
7137
                              mode=disk[constants.IDISK_MODE])
7138
      disks.append(disk_dev)
7139
  elif template_name == constants.DT_BLOCK:
7140
    if len(secondary_nodes) != 0:
7141
      raise errors.ProgrammerError("Wrong template configuration")
7142

    
7143
    for idx, disk in enumerate(disk_info):
7144
      disk_index = idx + base_index
7145
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7146
                              size=disk[constants.IDISK_SIZE],
7147
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7148
                                          disk[constants.IDISK_ADOPT]),
7149
                              iv_name="disk/%d" % disk_index,
7150
                              mode=disk[constants.IDISK_MODE])
7151
      disks.append(disk_dev)
7152

    
7153
  else:
7154
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7155
  return disks
7156

    
7157

    
7158
def _GetInstanceInfoText(instance):
7159
  """Compute that text that should be added to the disk's metadata.
7160

7161
  """
7162
  return "originstname+%s" % instance.name
7163

    
7164

    
7165
def _CalcEta(time_taken, written, total_size):
7166
  """Calculates the ETA based on size written and total size.
7167

7168
  @param time_taken: The time taken so far
7169
  @param written: amount written so far
7170
  @param total_size: The total size of data to be written
7171
  @return: The remaining time in seconds
7172

7173
  """
7174
  avg_time = time_taken / float(written)
7175
  return (total_size - written) * avg_time
7176

    
7177

    
7178
def _WipeDisks(lu, instance):
7179
  """Wipes instance disks.
7180

7181
  @type lu: L{LogicalUnit}
7182
  @param lu: the logical unit on whose behalf we execute
7183
  @type instance: L{objects.Instance}
7184
  @param instance: the instance whose disks we should create
7185
  @return: the success of the wipe
7186

7187
  """
7188
  node = instance.primary_node
7189

    
7190
  for device in instance.disks:
7191
    lu.cfg.SetDiskID(device, node)
7192

    
7193
  logging.info("Pause sync of instance %s disks", instance.name)
7194
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7195

    
7196
  for idx, success in enumerate(result.payload):
7197
    if not success:
7198
      logging.warn("pause-sync of instance %s for disks %d failed",
7199
                   instance.name, idx)
7200

    
7201
  try:
7202
    for idx, device in enumerate(instance.disks):
7203
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7204
      # MAX_WIPE_CHUNK at max
7205
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7206
                            constants.MIN_WIPE_CHUNK_PERCENT)
7207
      # we _must_ make this an int, otherwise rounding errors will
7208
      # occur
7209
      wipe_chunk_size = int(wipe_chunk_size)
7210

    
7211
      lu.LogInfo("* Wiping disk %d", idx)
7212
      logging.info("Wiping disk %d for instance %s, node %s using"
7213
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7214

    
7215
      offset = 0
7216
      size = device.size
7217
      last_output = 0
7218
      start_time = time.time()
7219

    
7220
      while offset < size:
7221
        wipe_size = min(wipe_chunk_size, size - offset)
7222
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7223
                      idx, offset, wipe_size)
7224
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7225
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7226
                     (idx, offset, wipe_size))
7227
        now = time.time()
7228
        offset += wipe_size
7229
        if now - last_output >= 60:
7230
          eta = _CalcEta(now - start_time, offset, size)
7231
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7232
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7233
          last_output = now
7234
  finally:
7235
    logging.info("Resume sync of instance %s disks", instance.name)
7236

    
7237
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7238

    
7239
    for idx, success in enumerate(result.payload):
7240
      if not success:
7241
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7242
                      " look at the status and troubleshoot the issue", idx)
7243
        logging.warn("resume-sync of instance %s for disks %d failed",
7244
                     instance.name, idx)
7245

    
7246

    
7247
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7248
  """Create all disks for an instance.
7249

7250
  This abstracts away some work from AddInstance.
7251

7252
  @type lu: L{LogicalUnit}
7253
  @param lu: the logical unit on whose behalf we execute
7254
  @type instance: L{objects.Instance}
7255
  @param instance: the instance whose disks we should create
7256
  @type to_skip: list
7257
  @param to_skip: list of indices to skip
7258
  @type target_node: string
7259
  @param target_node: if passed, overrides the target node for creation
7260
  @rtype: boolean
7261
  @return: the success of the creation
7262

7263
  """
7264
  info = _GetInstanceInfoText(instance)
7265
  if target_node is None:
7266
    pnode = instance.primary_node
7267
    all_nodes = instance.all_nodes
7268
  else:
7269
    pnode = target_node
7270
    all_nodes = [pnode]
7271

    
7272
  if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7273
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7274
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7275

    
7276
    result.Raise("Failed to create directory '%s' on"
7277
                 " node %s" % (file_storage_dir, pnode))
7278

    
7279
  # Note: this needs to be kept in sync with adding of disks in
7280
  # LUInstanceSetParams
7281
  for idx, device in enumerate(instance.disks):
7282
    if to_skip and idx in to_skip:
7283
      continue
7284
    logging.info("Creating volume %s for instance %s",
7285
                 device.iv_name, instance.name)
7286
    #HARDCODE
7287
    for node in all_nodes:
7288
      f_create = node == pnode
7289
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7290

    
7291

    
7292
def _RemoveDisks(lu, instance, target_node=None):
7293
  """Remove all disks for an instance.
7294

7295
  This abstracts away some work from `AddInstance()` and
7296
  `RemoveInstance()`. Note that in case some of the devices couldn't
7297
  be removed, the removal will continue with the other ones (compare
7298
  with `_CreateDisks()`).
7299

7300
  @type lu: L{LogicalUnit}
7301
  @param lu: the logical unit on whose behalf we execute
7302
  @type instance: L{objects.Instance}
7303
  @param instance: the instance whose disks we should remove
7304
  @type target_node: string
7305
  @param target_node: used to override the node on which to remove the disks
7306
  @rtype: boolean
7307
  @return: the success of the removal
7308

7309
  """
7310
  logging.info("Removing block devices for instance %s", instance.name)
7311

    
7312
  all_result = True
7313
  for device in instance.disks:
7314
    if target_node:
7315
      edata = [(target_node, device)]
7316
    else:
7317
      edata = device.ComputeNodeTree(instance.primary_node)
7318
    for node, disk in edata:
7319
      lu.cfg.SetDiskID(disk, node)
7320
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7321
      if msg:
7322
        lu.LogWarning("Could not remove block device %s on node %s,"
7323
                      " continuing anyway: %s", device.iv_name, node, msg)
7324
        all_result = False
7325

    
7326
  if instance.disk_template == constants.DT_FILE:
7327
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7328
    if target_node:
7329
      tgt = target_node
7330
    else:
7331
      tgt = instance.primary_node
7332
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7333
    if result.fail_msg:
7334
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7335
                    file_storage_dir, instance.primary_node, result.fail_msg)
7336
      all_result = False
7337

    
7338
  return all_result
7339

    
7340

    
7341
def _ComputeDiskSizePerVG(disk_template, disks):
7342
  """Compute disk size requirements in the volume group
7343

7344
  """
7345
  def _compute(disks, payload):
7346
    """Universal algorithm.
7347

7348
    """
7349
    vgs = {}
7350
    for disk in disks:
7351
      vgs[disk[constants.IDISK_VG]] = \
7352
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7353

    
7354
    return vgs
7355

    
7356
  # Required free disk space as a function of disk and swap space
7357
  req_size_dict = {
7358
    constants.DT_DISKLESS: {},
7359
    constants.DT_PLAIN: _compute(disks, 0),
7360
    # 128 MB are added for drbd metadata for each disk
7361
    constants.DT_DRBD8: _compute(disks, 128),
7362
    constants.DT_FILE: {},
7363
    constants.DT_SHARED_FILE: {},
7364
  }
7365

    
7366
  if disk_template not in req_size_dict:
7367
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7368
                                 " is unknown" %  disk_template)
7369

    
7370
  return req_size_dict[disk_template]
7371

    
7372

    
7373
def _ComputeDiskSize(disk_template, disks):
7374
  """Compute disk size requirements in the volume group
7375

7376
  """
7377
  # Required free disk space as a function of disk and swap space
7378
  req_size_dict = {
7379
    constants.DT_DISKLESS: None,
7380
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7381
    # 128 MB are added for drbd metadata for each disk
7382
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7383
    constants.DT_FILE: None,
7384
    constants.DT_SHARED_FILE: 0,
7385
    constants.DT_BLOCK: 0,
7386
  }
7387

    
7388
  if disk_template not in req_size_dict:
7389
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7390
                                 " is unknown" %  disk_template)
7391

    
7392
  return req_size_dict[disk_template]
7393

    
7394

    
7395
def _FilterVmNodes(lu, nodenames):
7396
  """Filters out non-vm_capable nodes from a list.
7397

7398
  @type lu: L{LogicalUnit}
7399
  @param lu: the logical unit for which we check
7400
  @type nodenames: list
7401
  @param nodenames: the list of nodes on which we should check
7402
  @rtype: list
7403
  @return: the list of vm-capable nodes
7404

7405
  """
7406
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7407
  return [name for name in nodenames if name not in vm_nodes]
7408

    
7409

    
7410
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7411
  """Hypervisor parameter validation.
7412

7413
  This function abstract the hypervisor parameter validation to be
7414
  used in both instance create and instance modify.
7415

7416
  @type lu: L{LogicalUnit}
7417
  @param lu: the logical unit for which we check
7418
  @type nodenames: list
7419
  @param nodenames: the list of nodes on which we should check
7420
  @type hvname: string
7421
  @param hvname: the name of the hypervisor we should use
7422
  @type hvparams: dict
7423
  @param hvparams: the parameters which we need to check
7424
  @raise errors.OpPrereqError: if the parameters are not valid
7425

7426
  """
7427
  nodenames = _FilterVmNodes(lu, nodenames)
7428
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7429
                                                  hvname,
7430
                                                  hvparams)
7431
  for node in nodenames:
7432
    info = hvinfo[node]
7433
    if info.offline:
7434
      continue
7435
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7436

    
7437

    
7438
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7439
  """OS parameters validation.
7440

7441
  @type lu: L{LogicalUnit}
7442
  @param lu: the logical unit for which we check
7443
  @type required: boolean
7444
  @param required: whether the validation should fail if the OS is not
7445
      found
7446
  @type nodenames: list
7447
  @param nodenames: the list of nodes on which we should check
7448
  @type osname: string
7449
  @param osname: the name of the hypervisor we should use
7450
  @type osparams: dict
7451
  @param osparams: the parameters which we need to check
7452
  @raise errors.OpPrereqError: if the parameters are not valid
7453

7454
  """
7455
  nodenames = _FilterVmNodes(lu, nodenames)
7456
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7457
                                   [constants.OS_VALIDATE_PARAMETERS],
7458
                                   osparams)
7459
  for node, nres in result.items():
7460
    # we don't check for offline cases since this should be run only
7461
    # against the master node and/or an instance's nodes
7462
    nres.Raise("OS Parameters validation failed on node %s" % node)
7463
    if not nres.payload:
7464
      lu.LogInfo("OS %s not found on node %s, validation skipped",
7465
                 osname, node)
7466

    
7467

    
7468
class LUInstanceCreate(LogicalUnit):
7469
  """Create an instance.
7470

7471
  """
7472
  HPATH = "instance-add"
7473
  HTYPE = constants.HTYPE_INSTANCE
7474
  REQ_BGL = False
7475

    
7476
  def CheckArguments(self):
7477
    """Check arguments.
7478

7479
    """
7480
    # do not require name_check to ease forward/backward compatibility
7481
    # for tools
7482
    if self.op.no_install and self.op.start:
7483
      self.LogInfo("No-installation mode selected, disabling startup")
7484
      self.op.start = False
7485
    # validate/normalize the instance name
7486
    self.op.instance_name = \
7487
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7488

    
7489
    if self.op.ip_check and not self.op.name_check:
7490
      # TODO: make the ip check more flexible and not depend on the name check
7491
      raise errors.OpPrereqError("Cannot do IP address check without a name"
7492
                                 " check", errors.ECODE_INVAL)
7493

    
7494
    # check nics' parameter names
7495
    for nic in self.op.nics:
7496
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7497

    
7498
    # check disks. parameter names and consistent adopt/no-adopt strategy
7499
    has_adopt = has_no_adopt = False
7500
    for disk in self.op.disks:
7501
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7502
      if constants.IDISK_ADOPT in disk:
7503
        has_adopt = True
7504
      else:
7505
        has_no_adopt = True
7506
    if has_adopt and has_no_adopt:
7507
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7508
                                 errors.ECODE_INVAL)
7509
    if has_adopt:
7510
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7511
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7512
                                   " '%s' disk template" %
7513
                                   self.op.disk_template,
7514
                                   errors.ECODE_INVAL)
7515
      if self.op.iallocator is not None:
7516
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7517
                                   " iallocator script", errors.ECODE_INVAL)
7518
      if self.op.mode == constants.INSTANCE_IMPORT:
7519
        raise errors.OpPrereqError("Disk adoption not allowed for"
7520
                                   " instance import", errors.ECODE_INVAL)
7521
    else:
7522
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
7523
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7524
                                   " but no 'adopt' parameter given" %
7525
                                   self.op.disk_template,
7526
                                   errors.ECODE_INVAL)
7527

    
7528
    self.adopt_disks = has_adopt
7529

    
7530
    # instance name verification
7531
    if self.op.name_check:
7532
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7533
      self.op.instance_name = self.hostname1.name
7534
      # used in CheckPrereq for ip ping check
7535
      self.check_ip = self.hostname1.ip
7536
    else:
7537
      self.check_ip = None
7538

    
7539
    # file storage checks
7540
    if (self.op.file_driver and
7541
        not self.op.file_driver in constants.FILE_DRIVER):
7542
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7543
                                 self.op.file_driver, errors.ECODE_INVAL)
7544

    
7545
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7546
      raise errors.OpPrereqError("File storage directory path not absolute",
7547
                                 errors.ECODE_INVAL)
7548

    
7549
    ### Node/iallocator related checks
7550
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7551

    
7552
    if self.op.pnode is not None:
7553
      if self.op.disk_template in constants.DTS_INT_MIRROR:
7554
        if self.op.snode is None:
7555
          raise errors.OpPrereqError("The networked disk templates need"
7556
                                     " a mirror node", errors.ECODE_INVAL)
7557
      elif self.op.snode:
7558
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7559
                        " template")
7560
        self.op.snode = None
7561

    
7562
    self._cds = _GetClusterDomainSecret()
7563

    
7564
    if self.op.mode == constants.INSTANCE_IMPORT:
7565
      # On import force_variant must be True, because if we forced it at
7566
      # initial install, our only chance when importing it back is that it
7567
      # works again!
7568
      self.op.force_variant = True
7569

    
7570
      if self.op.no_install:
7571
        self.LogInfo("No-installation mode has no effect during import")
7572

    
7573
    elif self.op.mode == constants.INSTANCE_CREATE:
7574
      if self.op.os_type is None:
7575
        raise errors.OpPrereqError("No guest OS specified",
7576
                                   errors.ECODE_INVAL)
7577
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7578
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7579
                                   " installation" % self.op.os_type,
7580
                                   errors.ECODE_STATE)
7581
      if self.op.disk_template is None:
7582
        raise errors.OpPrereqError("No disk template specified",
7583
                                   errors.ECODE_INVAL)
7584

    
7585
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7586
      # Check handshake to ensure both clusters have the same domain secret
7587
      src_handshake = self.op.source_handshake
7588
      if not src_handshake:
7589
        raise errors.OpPrereqError("Missing source handshake",
7590
                                   errors.ECODE_INVAL)
7591

    
7592
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7593
                                                           src_handshake)
7594
      if errmsg:
7595
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7596
                                   errors.ECODE_INVAL)
7597

    
7598
      # Load and check source CA
7599
      self.source_x509_ca_pem = self.op.source_x509_ca
7600
      if not self.source_x509_ca_pem:
7601
        raise errors.OpPrereqError("Missing source X509 CA",
7602
                                   errors.ECODE_INVAL)
7603

    
7604
      try:
7605
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7606
                                                    self._cds)
7607
      except OpenSSL.crypto.Error, err:
7608
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7609
                                   (err, ), errors.ECODE_INVAL)
7610

    
7611
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7612
      if errcode is not None:
7613
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7614
                                   errors.ECODE_INVAL)
7615

    
7616
      self.source_x509_ca = cert
7617

    
7618
      src_instance_name = self.op.source_instance_name
7619
      if not src_instance_name:
7620
        raise errors.OpPrereqError("Missing source instance name",
7621
                                   errors.ECODE_INVAL)
7622

    
7623
      self.source_instance_name = \
7624
          netutils.GetHostname(name=src_instance_name).name
7625

    
7626
    else:
7627
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
7628
                                 self.op.mode, errors.ECODE_INVAL)
7629

    
7630
  def ExpandNames(self):
7631
    """ExpandNames for CreateInstance.
7632

7633
    Figure out the right locks for instance creation.
7634

7635
    """
7636
    self.needed_locks = {}
7637

    
7638
    instance_name = self.op.instance_name
7639
    # this is just a preventive check, but someone might still add this
7640
    # instance in the meantime, and creation will fail at lock-add time
7641
    if instance_name in self.cfg.GetInstanceList():
7642
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7643
                                 instance_name, errors.ECODE_EXISTS)
7644

    
7645
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7646

    
7647
    if self.op.iallocator:
7648
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7649
    else:
7650
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7651
      nodelist = [self.op.pnode]
7652
      if self.op.snode is not None:
7653
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7654
        nodelist.append(self.op.snode)
7655
      self.needed_locks[locking.LEVEL_NODE] = nodelist
7656

    
7657
    # in case of import lock the source node too
7658
    if self.op.mode == constants.INSTANCE_IMPORT:
7659
      src_node = self.op.src_node
7660
      src_path = self.op.src_path
7661

    
7662
      if src_path is None:
7663
        self.op.src_path = src_path = self.op.instance_name
7664

    
7665
      if src_node is None:
7666
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7667
        self.op.src_node = None
7668
        if os.path.isabs(src_path):
7669
          raise errors.OpPrereqError("Importing an instance from an absolute"
7670
                                     " path requires a source node option",
7671
                                     errors.ECODE_INVAL)
7672
      else:
7673
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7674
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7675
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
7676
        if not os.path.isabs(src_path):
7677
          self.op.src_path = src_path = \
7678
            utils.PathJoin(constants.EXPORT_DIR, src_path)
7679

    
7680
  def _RunAllocator(self):
7681
    """Run the allocator based on input opcode.
7682

7683
    """
7684
    nics = [n.ToDict() for n in self.nics]
7685
    ial = IAllocator(self.cfg, self.rpc,
7686
                     mode=constants.IALLOCATOR_MODE_ALLOC,
7687
                     name=self.op.instance_name,
7688
                     disk_template=self.op.disk_template,
7689
                     tags=[],
7690
                     os=self.op.os_type,
7691
                     vcpus=self.be_full[constants.BE_VCPUS],
7692
                     mem_size=self.be_full[constants.BE_MEMORY],
7693
                     disks=self.disks,
7694
                     nics=nics,
7695
                     hypervisor=self.op.hypervisor,
7696
                     )
7697

    
7698
    ial.Run(self.op.iallocator)
7699

    
7700
    if not ial.success:
7701
      raise errors.OpPrereqError("Can't compute nodes using"
7702
                                 " iallocator '%s': %s" %
7703
                                 (self.op.iallocator, ial.info),
7704
                                 errors.ECODE_NORES)
7705
    if len(ial.result) != ial.required_nodes:
7706
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7707
                                 " of nodes (%s), required %s" %
7708
                                 (self.op.iallocator, len(ial.result),
7709
                                  ial.required_nodes), errors.ECODE_FAULT)
7710
    self.op.pnode = ial.result[0]
7711
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7712
                 self.op.instance_name, self.op.iallocator,
7713
                 utils.CommaJoin(ial.result))
7714
    if ial.required_nodes == 2:
7715
      self.op.snode = ial.result[1]
7716

    
7717
  def BuildHooksEnv(self):
7718
    """Build hooks env.
7719

7720
    This runs on master, primary and secondary nodes of the instance.
7721

7722
    """
7723
    env = {
7724
      "ADD_MODE": self.op.mode,
7725
      }
7726
    if self.op.mode == constants.INSTANCE_IMPORT:
7727
      env["SRC_NODE"] = self.op.src_node
7728
      env["SRC_PATH"] = self.op.src_path
7729
      env["SRC_IMAGES"] = self.src_images
7730

    
7731
    env.update(_BuildInstanceHookEnv(
7732
      name=self.op.instance_name,
7733
      primary_node=self.op.pnode,
7734
      secondary_nodes=self.secondaries,
7735
      status=self.op.start,
7736
      os_type=self.op.os_type,
7737
      memory=self.be_full[constants.BE_MEMORY],
7738
      vcpus=self.be_full[constants.BE_VCPUS],
7739
      nics=_NICListToTuple(self, self.nics),
7740
      disk_template=self.op.disk_template,
7741
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
7742
             for d in self.disks],
7743
      bep=self.be_full,
7744
      hvp=self.hv_full,
7745
      hypervisor_name=self.op.hypervisor,
7746
    ))
7747

    
7748
    return env
7749

    
7750
  def BuildHooksNodes(self):
7751
    """Build hooks nodes.
7752

7753
    """
7754
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7755
    return nl, nl
7756

    
7757
  def _ReadExportInfo(self):
7758
    """Reads the export information from disk.
7759

7760
    It will override the opcode source node and path with the actual
7761
    information, if these two were not specified before.
7762

7763
    @return: the export information
7764

7765
    """
7766
    assert self.op.mode == constants.INSTANCE_IMPORT
7767

    
7768
    src_node = self.op.src_node
7769
    src_path = self.op.src_path
7770

    
7771
    if src_node is None:
7772
      locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
7773
      exp_list = self.rpc.call_export_list(locked_nodes)
7774
      found = False
7775
      for node in exp_list:
7776
        if exp_list[node].fail_msg:
7777
          continue
7778
        if src_path in exp_list[node].payload:
7779
          found = True
7780
          self.op.src_node = src_node = node
7781
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7782
                                                       src_path)
7783
          break
7784
      if not found:
7785
        raise errors.OpPrereqError("No export found for relative path %s" %
7786
                                    src_path, errors.ECODE_INVAL)
7787

    
7788
    _CheckNodeOnline(self, src_node)
7789
    result = self.rpc.call_export_info(src_node, src_path)
7790
    result.Raise("No export or invalid export found in dir %s" % src_path)
7791

    
7792
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7793
    if not export_info.has_section(constants.INISECT_EXP):
7794
      raise errors.ProgrammerError("Corrupted export config",
7795
                                   errors.ECODE_ENVIRON)
7796

    
7797
    ei_version = export_info.get(constants.INISECT_EXP, "version")
7798
    if (int(ei_version) != constants.EXPORT_VERSION):
7799
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7800
                                 (ei_version, constants.EXPORT_VERSION),
7801
                                 errors.ECODE_ENVIRON)
7802
    return export_info
7803

    
7804
  def _ReadExportParams(self, einfo):
7805
    """Use export parameters as defaults.
7806

7807
    In case the opcode doesn't specify (as in override) some instance
7808
    parameters, then try to use them from the export information, if
7809
    that declares them.
7810

7811
    """
7812
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7813

    
7814
    if self.op.disk_template is None:
7815
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
7816
        self.op.disk_template = einfo.get(constants.INISECT_INS,
7817
                                          "disk_template")
7818
      else:
7819
        raise errors.OpPrereqError("No disk template specified and the export"
7820
                                   " is missing the disk_template information",
7821
                                   errors.ECODE_INVAL)
7822

    
7823
    if not self.op.disks:
7824
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7825
        disks = []
7826
        # TODO: import the disk iv_name too
7827
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7828
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7829
          disks.append({constants.IDISK_SIZE: disk_sz})
7830
        self.op.disks = disks
7831
      else:
7832
        raise errors.OpPrereqError("No disk info specified and the export"
7833
                                   " is missing the disk information",
7834
                                   errors.ECODE_INVAL)
7835

    
7836
    if (not self.op.nics and
7837
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7838
      nics = []
7839
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7840
        ndict = {}
7841
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7842
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7843
          ndict[name] = v
7844
        nics.append(ndict)
7845
      self.op.nics = nics
7846

    
7847
    if (self.op.hypervisor is None and
7848
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7849
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7850
    if einfo.has_section(constants.INISECT_HYP):
7851
      # use the export parameters but do not override the ones
7852
      # specified by the user
7853
      for name, value in einfo.items(constants.INISECT_HYP):
7854
        if name not in self.op.hvparams:
7855
          self.op.hvparams[name] = value
7856

    
7857
    if einfo.has_section(constants.INISECT_BEP):
7858
      # use the parameters, without overriding
7859
      for name, value in einfo.items(constants.INISECT_BEP):
7860
        if name not in self.op.beparams:
7861
          self.op.beparams[name] = value
7862
    else:
7863
      # try to read the parameters old style, from the main section
7864
      for name in constants.BES_PARAMETERS:
7865
        if (name not in self.op.beparams and
7866
            einfo.has_option(constants.INISECT_INS, name)):
7867
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7868

    
7869
    if einfo.has_section(constants.INISECT_OSP):
7870
      # use the parameters, without overriding
7871
      for name, value in einfo.items(constants.INISECT_OSP):
7872
        if name not in self.op.osparams:
7873
          self.op.osparams[name] = value
7874

    
7875
  def _RevertToDefaults(self, cluster):
7876
    """Revert the instance parameters to the default values.
7877

7878
    """
7879
    # hvparams
7880
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7881
    for name in self.op.hvparams.keys():
7882
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7883
        del self.op.hvparams[name]
7884
    # beparams
7885
    be_defs = cluster.SimpleFillBE({})
7886
    for name in self.op.beparams.keys():
7887
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7888
        del self.op.beparams[name]
7889
    # nic params
7890
    nic_defs = cluster.SimpleFillNIC({})
7891
    for nic in self.op.nics:
7892
      for name in constants.NICS_PARAMETERS:
7893
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7894
          del nic[name]
7895
    # osparams
7896
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7897
    for name in self.op.osparams.keys():
7898
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7899
        del self.op.osparams[name]
7900

    
7901
  def CheckPrereq(self):
7902
    """Check prerequisites.
7903

7904
    """
7905
    if self.op.mode == constants.INSTANCE_IMPORT:
7906
      export_info = self._ReadExportInfo()
7907
      self._ReadExportParams(export_info)
7908

    
7909
    if (not self.cfg.GetVGName() and
7910
        self.op.disk_template not in constants.DTS_NOT_LVM):
7911
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7912
                                 " instances", errors.ECODE_STATE)
7913

    
7914
    if self.op.hypervisor is None:
7915
      self.op.hypervisor = self.cfg.GetHypervisorType()
7916

    
7917
    cluster = self.cfg.GetClusterInfo()
7918
    enabled_hvs = cluster.enabled_hypervisors
7919
    if self.op.hypervisor not in enabled_hvs:
7920
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7921
                                 " cluster (%s)" % (self.op.hypervisor,
7922
                                  ",".join(enabled_hvs)),
7923
                                 errors.ECODE_STATE)
7924

    
7925
    # check hypervisor parameter syntax (locally)
7926
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7927
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7928
                                      self.op.hvparams)
7929
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7930
    hv_type.CheckParameterSyntax(filled_hvp)
7931
    self.hv_full = filled_hvp
7932
    # check that we don't specify global parameters on an instance
7933
    _CheckGlobalHvParams(self.op.hvparams)
7934

    
7935
    # fill and remember the beparams dict
7936
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7937
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7938

    
7939
    # build os parameters
7940
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7941

    
7942
    # now that hvp/bep are in final format, let's reset to defaults,
7943
    # if told to do so
7944
    if self.op.identify_defaults:
7945
      self._RevertToDefaults(cluster)
7946

    
7947
    # NIC buildup
7948
    self.nics = []
7949
    for idx, nic in enumerate(self.op.nics):
7950
      nic_mode_req = nic.get(constants.INIC_MODE, None)
7951
      nic_mode = nic_mode_req
7952
      if nic_mode is None:
7953
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7954

    
7955
      # in routed mode, for the first nic, the default ip is 'auto'
7956
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7957
        default_ip_mode = constants.VALUE_AUTO
7958
      else:
7959
        default_ip_mode = constants.VALUE_NONE
7960

    
7961
      # ip validity checks
7962
      ip = nic.get(constants.INIC_IP, default_ip_mode)
7963
      if ip is None or ip.lower() == constants.VALUE_NONE:
7964
        nic_ip = None
7965
      elif ip.lower() == constants.VALUE_AUTO:
7966
        if not self.op.name_check:
7967
          raise errors.OpPrereqError("IP address set to auto but name checks"
7968
                                     " have been skipped",
7969
                                     errors.ECODE_INVAL)
7970
        nic_ip = self.hostname1.ip
7971
      else:
7972
        if not netutils.IPAddress.IsValid(ip):
7973
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7974
                                     errors.ECODE_INVAL)
7975
        nic_ip = ip
7976

    
7977
      # TODO: check the ip address for uniqueness
7978
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7979
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7980
                                   errors.ECODE_INVAL)
7981

    
7982
      # MAC address verification
7983
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
7984
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7985
        mac = utils.NormalizeAndValidateMac(mac)
7986

    
7987
        try:
7988
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7989
        except errors.ReservationError:
7990
          raise errors.OpPrereqError("MAC address %s already in use"
7991
                                     " in cluster" % mac,
7992
                                     errors.ECODE_NOTUNIQUE)
7993

    
7994
      #  Build nic parameters
7995
      link = nic.get(constants.INIC_LINK, None)
7996
      nicparams = {}
7997
      if nic_mode_req:
7998
        nicparams[constants.NIC_MODE] = nic_mode_req
7999
      if link:
8000
        nicparams[constants.NIC_LINK] = link
8001

    
8002
      check_params = cluster.SimpleFillNIC(nicparams)
8003
      objects.NIC.CheckParameterSyntax(check_params)
8004
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8005

    
8006
    # disk checks/pre-build
8007
    default_vg = self.cfg.GetVGName()
8008
    self.disks = []
8009
    for disk in self.op.disks:
8010
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8011
      if mode not in constants.DISK_ACCESS_SET:
8012
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8013
                                   mode, errors.ECODE_INVAL)
8014
      size = disk.get(constants.IDISK_SIZE, None)
8015
      if size is None:
8016
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8017
      try:
8018
        size = int(size)
8019
      except (TypeError, ValueError):
8020
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8021
                                   errors.ECODE_INVAL)
8022

    
8023
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8024
      new_disk = {
8025
        constants.IDISK_SIZE: size,
8026
        constants.IDISK_MODE: mode,
8027
        constants.IDISK_VG: data_vg,
8028
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8029
        }
8030
      if constants.IDISK_ADOPT in disk:
8031
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8032
      self.disks.append(new_disk)
8033

    
8034
    if self.op.mode == constants.INSTANCE_IMPORT:
8035

    
8036
      # Check that the new instance doesn't have less disks than the export
8037
      instance_disks = len(self.disks)
8038
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8039
      if instance_disks < export_disks:
8040
        raise errors.OpPrereqError("Not enough disks to import."
8041
                                   " (instance: %d, export: %d)" %
8042
                                   (instance_disks, export_disks),
8043
                                   errors.ECODE_INVAL)
8044

    
8045
      disk_images = []
8046
      for idx in range(export_disks):
8047
        option = 'disk%d_dump' % idx
8048
        if export_info.has_option(constants.INISECT_INS, option):
8049
          # FIXME: are the old os-es, disk sizes, etc. useful?
8050
          export_name = export_info.get(constants.INISECT_INS, option)
8051
          image = utils.PathJoin(self.op.src_path, export_name)
8052
          disk_images.append(image)
8053
        else:
8054
          disk_images.append(False)
8055

    
8056
      self.src_images = disk_images
8057

    
8058
      old_name = export_info.get(constants.INISECT_INS, 'name')
8059
      try:
8060
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8061
      except (TypeError, ValueError), err:
8062
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8063
                                   " an integer: %s" % str(err),
8064
                                   errors.ECODE_STATE)
8065
      if self.op.instance_name == old_name:
8066
        for idx, nic in enumerate(self.nics):
8067
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8068
            nic_mac_ini = 'nic%d_mac' % idx
8069
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8070

    
8071
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8072

    
8073
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8074
    if self.op.ip_check:
8075
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8076
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8077
                                   (self.check_ip, self.op.instance_name),
8078
                                   errors.ECODE_NOTUNIQUE)
8079

    
8080
    #### mac address generation
8081
    # By generating here the mac address both the allocator and the hooks get
8082
    # the real final mac address rather than the 'auto' or 'generate' value.
8083
    # There is a race condition between the generation and the instance object
8084
    # creation, which means that we know the mac is valid now, but we're not
8085
    # sure it will be when we actually add the instance. If things go bad
8086
    # adding the instance will abort because of a duplicate mac, and the
8087
    # creation job will fail.
8088
    for nic in self.nics:
8089
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8090
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8091

    
8092
    #### allocator run
8093

    
8094
    if self.op.iallocator is not None:
8095
      self._RunAllocator()
8096

    
8097
    #### node related checks
8098

    
8099
    # check primary node
8100
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8101
    assert self.pnode is not None, \
8102
      "Cannot retrieve locked node %s" % self.op.pnode
8103
    if pnode.offline:
8104
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8105
                                 pnode.name, errors.ECODE_STATE)
8106
    if pnode.drained:
8107
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8108
                                 pnode.name, errors.ECODE_STATE)
8109
    if not pnode.vm_capable:
8110
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8111
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8112

    
8113
    self.secondaries = []
8114

    
8115
    # mirror node verification
8116
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8117
      if self.op.snode == pnode.name:
8118
        raise errors.OpPrereqError("The secondary node cannot be the"
8119
                                   " primary node", errors.ECODE_INVAL)
8120
      _CheckNodeOnline(self, self.op.snode)
8121
      _CheckNodeNotDrained(self, self.op.snode)
8122
      _CheckNodeVmCapable(self, self.op.snode)
8123
      self.secondaries.append(self.op.snode)
8124

    
8125
    nodenames = [pnode.name] + self.secondaries
8126

    
8127
    if not self.adopt_disks:
8128
      # Check lv size requirements, if not adopting
8129
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8130
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8131

    
8132
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8133
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8134
                                disk[constants.IDISK_ADOPT])
8135
                     for disk in self.disks])
8136
      if len(all_lvs) != len(self.disks):
8137
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8138
                                   errors.ECODE_INVAL)
8139
      for lv_name in all_lvs:
8140
        try:
8141
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8142
          # to ReserveLV uses the same syntax
8143
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8144
        except errors.ReservationError:
8145
          raise errors.OpPrereqError("LV named %s used by another instance" %
8146
                                     lv_name, errors.ECODE_NOTUNIQUE)
8147

    
8148
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8149
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8150

    
8151
      node_lvs = self.rpc.call_lv_list([pnode.name],
8152
                                       vg_names.payload.keys())[pnode.name]
8153
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8154
      node_lvs = node_lvs.payload
8155

    
8156
      delta = all_lvs.difference(node_lvs.keys())
8157
      if delta:
8158
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8159
                                   utils.CommaJoin(delta),
8160
                                   errors.ECODE_INVAL)
8161
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8162
      if online_lvs:
8163
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8164
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8165
                                   errors.ECODE_STATE)
8166
      # update the size of disk based on what is found
8167
      for dsk in self.disks:
8168
        dsk[constants.IDISK_SIZE] = \
8169
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8170
                                        dsk[constants.IDISK_ADOPT])][0]))
8171

    
8172
    elif self.op.disk_template == constants.DT_BLOCK:
8173
      # Normalize and de-duplicate device paths
8174
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8175
                       for disk in self.disks])
8176
      if len(all_disks) != len(self.disks):
8177
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8178
                                   errors.ECODE_INVAL)
8179
      baddisks = [d for d in all_disks
8180
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8181
      if baddisks:
8182
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8183
                                   " cannot be adopted" %
8184
                                   (", ".join(baddisks),
8185
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8186
                                   errors.ECODE_INVAL)
8187

    
8188
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8189
                                            list(all_disks))[pnode.name]
8190
      node_disks.Raise("Cannot get block device information from node %s" %
8191
                       pnode.name)
8192
      node_disks = node_disks.payload
8193
      delta = all_disks.difference(node_disks.keys())
8194
      if delta:
8195
        raise errors.OpPrereqError("Missing block device(s): %s" %
8196
                                   utils.CommaJoin(delta),
8197
                                   errors.ECODE_INVAL)
8198
      for dsk in self.disks:
8199
        dsk[constants.IDISK_SIZE] = \
8200
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8201

    
8202
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8203

    
8204
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8205
    # check OS parameters (remotely)
8206
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8207

    
8208
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8209

    
8210
    # memory check on primary node
8211
    if self.op.start:
8212
      _CheckNodeFreeMemory(self, self.pnode.name,
8213
                           "creating instance %s" % self.op.instance_name,
8214
                           self.be_full[constants.BE_MEMORY],
8215
                           self.op.hypervisor)
8216

    
8217
    self.dry_run_result = list(nodenames)
8218

    
8219
  def Exec(self, feedback_fn):
8220
    """Create and add the instance to the cluster.
8221

8222
    """
8223
    instance = self.op.instance_name
8224
    pnode_name = self.pnode.name
8225

    
8226
    ht_kind = self.op.hypervisor
8227
    if ht_kind in constants.HTS_REQ_PORT:
8228
      network_port = self.cfg.AllocatePort()
8229
    else:
8230
      network_port = None
8231

    
8232
    if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8233
      # this is needed because os.path.join does not accept None arguments
8234
      if self.op.file_storage_dir is None:
8235
        string_file_storage_dir = ""
8236
      else:
8237
        string_file_storage_dir = self.op.file_storage_dir
8238

    
8239
      # build the full file storage dir path
8240
      if self.op.disk_template == constants.DT_SHARED_FILE:
8241
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8242
      else:
8243
        get_fsd_fn = self.cfg.GetFileStorageDir
8244

    
8245
      file_storage_dir = utils.PathJoin(get_fsd_fn(),
8246
                                        string_file_storage_dir, instance)
8247
    else:
8248
      file_storage_dir = ""
8249

    
8250
    disks = _GenerateDiskTemplate(self,
8251
                                  self.op.disk_template,
8252
                                  instance, pnode_name,
8253
                                  self.secondaries,
8254
                                  self.disks,
8255
                                  file_storage_dir,
8256
                                  self.op.file_driver,
8257
                                  0,
8258
                                  feedback_fn)
8259

    
8260
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8261
                            primary_node=pnode_name,
8262
                            nics=self.nics, disks=disks,
8263
                            disk_template=self.op.disk_template,
8264
                            admin_up=False,
8265
                            network_port=network_port,
8266
                            beparams=self.op.beparams,
8267
                            hvparams=self.op.hvparams,
8268
                            hypervisor=self.op.hypervisor,
8269
                            osparams=self.op.osparams,
8270
                            )
8271

    
8272
    if self.adopt_disks:
8273
      if self.op.disk_template == constants.DT_PLAIN:
8274
        # rename LVs to the newly-generated names; we need to construct
8275
        # 'fake' LV disks with the old data, plus the new unique_id
8276
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8277
        rename_to = []
8278
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8279
          rename_to.append(t_dsk.logical_id)
8280
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8281
          self.cfg.SetDiskID(t_dsk, pnode_name)
8282
        result = self.rpc.call_blockdev_rename(pnode_name,
8283
                                               zip(tmp_disks, rename_to))
8284
        result.Raise("Failed to rename adoped LVs")
8285
    else:
8286
      feedback_fn("* creating instance disks...")
8287
      try:
8288
        _CreateDisks(self, iobj)
8289
      except errors.OpExecError:
8290
        self.LogWarning("Device creation failed, reverting...")
8291
        try:
8292
          _RemoveDisks(self, iobj)
8293
        finally:
8294
          self.cfg.ReleaseDRBDMinors(instance)
8295
          raise
8296

    
8297
    feedback_fn("adding instance %s to cluster config" % instance)
8298

    
8299
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8300

    
8301
    # Declare that we don't want to remove the instance lock anymore, as we've
8302
    # added the instance to the config
8303
    del self.remove_locks[locking.LEVEL_INSTANCE]
8304

    
8305
    if self.op.mode == constants.INSTANCE_IMPORT:
8306
      # Release unused nodes
8307
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8308
    else:
8309
      # Release all nodes
8310
      _ReleaseLocks(self, locking.LEVEL_NODE)
8311

    
8312
    disk_abort = False
8313
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8314
      feedback_fn("* wiping instance disks...")
8315
      try:
8316
        _WipeDisks(self, iobj)
8317
      except errors.OpExecError, err:
8318
        logging.exception("Wiping disks failed")
8319
        self.LogWarning("Wiping instance disks failed (%s)", err)
8320
        disk_abort = True
8321

    
8322
    if disk_abort:
8323
      # Something is already wrong with the disks, don't do anything else
8324
      pass
8325
    elif self.op.wait_for_sync:
8326
      disk_abort = not _WaitForSync(self, iobj)
8327
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8328
      # make sure the disks are not degraded (still sync-ing is ok)
8329
      time.sleep(15)
8330
      feedback_fn("* checking mirrors status")
8331
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8332
    else:
8333
      disk_abort = False
8334

    
8335
    if disk_abort:
8336
      _RemoveDisks(self, iobj)
8337
      self.cfg.RemoveInstance(iobj.name)
8338
      # Make sure the instance lock gets removed
8339
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8340
      raise errors.OpExecError("There are some degraded disks for"
8341
                               " this instance")
8342

    
8343
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8344
      if self.op.mode == constants.INSTANCE_CREATE:
8345
        if not self.op.no_install:
8346
          feedback_fn("* running the instance OS create scripts...")
8347
          # FIXME: pass debug option from opcode to backend
8348
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8349
                                                 self.op.debug_level)
8350
          result.Raise("Could not add os for instance %s"
8351
                       " on node %s" % (instance, pnode_name))
8352

    
8353
      elif self.op.mode == constants.INSTANCE_IMPORT:
8354
        feedback_fn("* running the instance OS import scripts...")
8355

    
8356
        transfers = []
8357

    
8358
        for idx, image in enumerate(self.src_images):
8359
          if not image:
8360
            continue
8361

    
8362
          # FIXME: pass debug option from opcode to backend
8363
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8364
                                             constants.IEIO_FILE, (image, ),
8365
                                             constants.IEIO_SCRIPT,
8366
                                             (iobj.disks[idx], idx),
8367
                                             None)
8368
          transfers.append(dt)
8369

    
8370
        import_result = \
8371
          masterd.instance.TransferInstanceData(self, feedback_fn,
8372
                                                self.op.src_node, pnode_name,
8373
                                                self.pnode.secondary_ip,
8374
                                                iobj, transfers)
8375
        if not compat.all(import_result):
8376
          self.LogWarning("Some disks for instance %s on node %s were not"
8377
                          " imported successfully" % (instance, pnode_name))
8378

    
8379
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8380
        feedback_fn("* preparing remote import...")
8381
        # The source cluster will stop the instance before attempting to make a
8382
        # connection. In some cases stopping an instance can take a long time,
8383
        # hence the shutdown timeout is added to the connection timeout.
8384
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8385
                           self.op.source_shutdown_timeout)
8386
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8387

    
8388
        assert iobj.primary_node == self.pnode.name
8389
        disk_results = \
8390
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8391
                                        self.source_x509_ca,
8392
                                        self._cds, timeouts)
8393
        if not compat.all(disk_results):
8394
          # TODO: Should the instance still be started, even if some disks
8395
          # failed to import (valid for local imports, too)?
8396
          self.LogWarning("Some disks for instance %s on node %s were not"
8397
                          " imported successfully" % (instance, pnode_name))
8398

    
8399
        # Run rename script on newly imported instance
8400
        assert iobj.name == instance
8401
        feedback_fn("Running rename script for %s" % instance)
8402
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8403
                                                   self.source_instance_name,
8404
                                                   self.op.debug_level)
8405
        if result.fail_msg:
8406
          self.LogWarning("Failed to run rename script for %s on node"
8407
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
8408

    
8409
      else:
8410
        # also checked in the prereq part
8411
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8412
                                     % self.op.mode)
8413

    
8414
    if self.op.start:
8415
      iobj.admin_up = True
8416
      self.cfg.Update(iobj, feedback_fn)
8417
      logging.info("Starting instance %s on node %s", instance, pnode_name)
8418
      feedback_fn("* starting instance...")
8419
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8420
      result.Raise("Could not start instance")
8421

    
8422
    return list(iobj.all_nodes)
8423

    
8424

    
8425
class LUInstanceConsole(NoHooksLU):
8426
  """Connect to an instance's console.
8427

8428
  This is somewhat special in that it returns the command line that
8429
  you need to run on the master node in order to connect to the
8430
  console.
8431

8432
  """
8433
  REQ_BGL = False
8434

    
8435
  def ExpandNames(self):
8436
    self._ExpandAndLockInstance()
8437

    
8438
  def CheckPrereq(self):
8439
    """Check prerequisites.
8440

8441
    This checks that the instance is in the cluster.
8442

8443
    """
8444
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8445
    assert self.instance is not None, \
8446
      "Cannot retrieve locked instance %s" % self.op.instance_name
8447
    _CheckNodeOnline(self, self.instance.primary_node)
8448

    
8449
  def Exec(self, feedback_fn):
8450
    """Connect to the console of an instance
8451

8452
    """
8453
    instance = self.instance
8454
    node = instance.primary_node
8455

    
8456
    node_insts = self.rpc.call_instance_list([node],
8457
                                             [instance.hypervisor])[node]
8458
    node_insts.Raise("Can't get node information from %s" % node)
8459

    
8460
    if instance.name not in node_insts.payload:
8461
      if instance.admin_up:
8462
        state = constants.INSTST_ERRORDOWN
8463
      else:
8464
        state = constants.INSTST_ADMINDOWN
8465
      raise errors.OpExecError("Instance %s is not running (state %s)" %
8466
                               (instance.name, state))
8467

    
8468
    logging.debug("Connecting to console of %s on %s", instance.name, node)
8469

    
8470
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8471

    
8472

    
8473
def _GetInstanceConsole(cluster, instance):
8474
  """Returns console information for an instance.
8475

8476
  @type cluster: L{objects.Cluster}
8477
  @type instance: L{objects.Instance}
8478
  @rtype: dict
8479

8480
  """
8481
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
8482
  # beparams and hvparams are passed separately, to avoid editing the
8483
  # instance and then saving the defaults in the instance itself.
8484
  hvparams = cluster.FillHV(instance)
8485
  beparams = cluster.FillBE(instance)
8486
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8487

    
8488
  assert console.instance == instance.name
8489
  assert console.Validate()
8490

    
8491
  return console.ToDict()
8492

    
8493

    
8494
class LUInstanceReplaceDisks(LogicalUnit):
8495
  """Replace the disks of an instance.
8496

8497
  """
8498
  HPATH = "mirrors-replace"
8499
  HTYPE = constants.HTYPE_INSTANCE
8500
  REQ_BGL = False
8501

    
8502
  def CheckArguments(self):
8503
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8504
                                  self.op.iallocator)
8505

    
8506
  def ExpandNames(self):
8507
    self._ExpandAndLockInstance()
8508

    
8509
    assert locking.LEVEL_NODE not in self.needed_locks
8510
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
8511

    
8512
    assert self.op.iallocator is None or self.op.remote_node is None, \
8513
      "Conflicting options"
8514

    
8515
    if self.op.remote_node is not None:
8516
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8517

    
8518
      # Warning: do not remove the locking of the new secondary here
8519
      # unless DRBD8.AddChildren is changed to work in parallel;
8520
      # currently it doesn't since parallel invocations of
8521
      # FindUnusedMinor will conflict
8522
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
8523
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8524
    else:
8525
      self.needed_locks[locking.LEVEL_NODE] = []
8526
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8527

    
8528
      if self.op.iallocator is not None:
8529
        # iallocator will select a new node in the same group
8530
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
8531

    
8532
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8533
                                   self.op.iallocator, self.op.remote_node,
8534
                                   self.op.disks, False, self.op.early_release)
8535

    
8536
    self.tasklets = [self.replacer]
8537

    
8538
  def DeclareLocks(self, level):
8539
    if level == locking.LEVEL_NODEGROUP:
8540
      assert self.op.remote_node is None
8541
      assert self.op.iallocator is not None
8542
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
8543

    
8544
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
8545
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
8546
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8547

    
8548
    elif level == locking.LEVEL_NODE:
8549
      if self.op.iallocator is not None:
8550
        assert self.op.remote_node is None
8551
        assert not self.needed_locks[locking.LEVEL_NODE]
8552

    
8553
        # Lock member nodes of all locked groups
8554
        self.needed_locks[locking.LEVEL_NODE] = [node_name
8555
          for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
8556
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
8557
      else:
8558
        self._LockInstancesNodes()
8559

    
8560
  def BuildHooksEnv(self):
8561
    """Build hooks env.
8562

8563
    This runs on the master, the primary and all the secondaries.
8564

8565
    """
8566
    instance = self.replacer.instance
8567
    env = {
8568
      "MODE": self.op.mode,
8569
      "NEW_SECONDARY": self.op.remote_node,
8570
      "OLD_SECONDARY": instance.secondary_nodes[0],
8571
      }
8572
    env.update(_BuildInstanceHookEnvByObject(self, instance))
8573
    return env
8574

    
8575
  def BuildHooksNodes(self):
8576
    """Build hooks nodes.
8577

8578
    """
8579
    instance = self.replacer.instance
8580
    nl = [
8581
      self.cfg.GetMasterNode(),
8582
      instance.primary_node,
8583
      ]
8584
    if self.op.remote_node is not None:
8585
      nl.append(self.op.remote_node)
8586
    return nl, nl
8587

    
8588
  def CheckPrereq(self):
8589
    """Check prerequisites.
8590

8591
    """
8592
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
8593
            self.op.iallocator is None)
8594

    
8595
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
8596
    if owned_groups:
8597
      groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8598
      if owned_groups != groups:
8599
        raise errors.OpExecError("Node groups used by instance '%s' changed"
8600
                                 " since lock was acquired, current list is %r,"
8601
                                 " used to be '%s'" %
8602
                                 (self.op.instance_name,
8603
                                  utils.CommaJoin(groups),
8604
                                  utils.CommaJoin(owned_groups)))
8605

    
8606
    return LogicalUnit.CheckPrereq(self)
8607

    
8608

    
8609
class TLReplaceDisks(Tasklet):
8610
  """Replaces disks for an instance.
8611

8612
  Note: Locking is not within the scope of this class.
8613

8614
  """
8615
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8616
               disks, delay_iallocator, early_release):
8617
    """Initializes this class.
8618

8619
    """
8620
    Tasklet.__init__(self, lu)
8621

    
8622
    # Parameters
8623
    self.instance_name = instance_name
8624
    self.mode = mode
8625
    self.iallocator_name = iallocator_name
8626
    self.remote_node = remote_node
8627
    self.disks = disks
8628
    self.delay_iallocator = delay_iallocator
8629
    self.early_release = early_release
8630

    
8631
    # Runtime data
8632
    self.instance = None
8633
    self.new_node = None
8634
    self.target_node = None
8635
    self.other_node = None
8636
    self.remote_node_info = None
8637
    self.node_secondary_ip = None
8638

    
8639
  @staticmethod
8640
  def CheckArguments(mode, remote_node, iallocator):
8641
    """Helper function for users of this class.
8642

8643
    """
8644
    # check for valid parameter combination
8645
    if mode == constants.REPLACE_DISK_CHG:
8646
      if remote_node is None and iallocator is None:
8647
        raise errors.OpPrereqError("When changing the secondary either an"
8648
                                   " iallocator script must be used or the"
8649
                                   " new node given", errors.ECODE_INVAL)
8650

    
8651
      if remote_node is not None and iallocator is not None:
8652
        raise errors.OpPrereqError("Give either the iallocator or the new"
8653
                                   " secondary, not both", errors.ECODE_INVAL)
8654

    
8655
    elif remote_node is not None or iallocator is not None:
8656
      # Not replacing the secondary
8657
      raise errors.OpPrereqError("The iallocator and new node options can"
8658
                                 " only be used when changing the"
8659
                                 " secondary node", errors.ECODE_INVAL)
8660

    
8661
  @staticmethod
8662
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8663
    """Compute a new secondary node using an IAllocator.
8664

8665
    """
8666
    ial = IAllocator(lu.cfg, lu.rpc,
8667
                     mode=constants.IALLOCATOR_MODE_RELOC,
8668
                     name=instance_name,
8669
                     relocate_from=relocate_from)
8670

    
8671
    ial.Run(iallocator_name)
8672

    
8673
    if not ial.success:
8674
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8675
                                 " %s" % (iallocator_name, ial.info),
8676
                                 errors.ECODE_NORES)
8677

    
8678
    if len(ial.result) != ial.required_nodes:
8679
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8680
                                 " of nodes (%s), required %s" %
8681
                                 (iallocator_name,
8682
                                  len(ial.result), ial.required_nodes),
8683
                                 errors.ECODE_FAULT)
8684

    
8685
    remote_node_name = ial.result[0]
8686

    
8687
    lu.LogInfo("Selected new secondary for instance '%s': %s",
8688
               instance_name, remote_node_name)
8689

    
8690
    return remote_node_name
8691

    
8692
  def _FindFaultyDisks(self, node_name):
8693
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8694
                                    node_name, True)
8695

    
8696
  def _CheckDisksActivated(self, instance):
8697
    """Checks if the instance disks are activated.
8698

8699
    @param instance: The instance to check disks
8700
    @return: True if they are activated, False otherwise
8701

8702
    """
8703
    nodes = instance.all_nodes
8704

    
8705
    for idx, dev in enumerate(instance.disks):
8706
      for node in nodes:
8707
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8708
        self.cfg.SetDiskID(dev, node)
8709

    
8710
        result = self.rpc.call_blockdev_find(node, dev)
8711

    
8712
        if result.offline:
8713
          continue
8714
        elif result.fail_msg or not result.payload:
8715
          return False
8716

    
8717
    return True
8718

    
8719
  def CheckPrereq(self):
8720
    """Check prerequisites.
8721

8722
    This checks that the instance is in the cluster.
8723

8724
    """
8725
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8726
    assert instance is not None, \
8727
      "Cannot retrieve locked instance %s" % self.instance_name
8728

    
8729
    if instance.disk_template != constants.DT_DRBD8:
8730
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8731
                                 " instances", errors.ECODE_INVAL)
8732

    
8733
    if len(instance.secondary_nodes) != 1:
8734
      raise errors.OpPrereqError("The instance has a strange layout,"
8735
                                 " expected one secondary but found %d" %
8736
                                 len(instance.secondary_nodes),
8737
                                 errors.ECODE_FAULT)
8738

    
8739
    if not self.delay_iallocator:
8740
      self._CheckPrereq2()
8741

    
8742
  def _CheckPrereq2(self):
8743
    """Check prerequisites, second part.
8744

8745
    This function should always be part of CheckPrereq. It was separated and is
8746
    now called from Exec because during node evacuation iallocator was only
8747
    called with an unmodified cluster model, not taking planned changes into
8748
    account.
8749

8750
    """
8751
    instance = self.instance
8752
    secondary_node = instance.secondary_nodes[0]
8753

    
8754
    if self.iallocator_name is None:
8755
      remote_node = self.remote_node
8756
    else:
8757
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8758
                                       instance.name, instance.secondary_nodes)
8759

    
8760
    if remote_node is None:
8761
      self.remote_node_info = None
8762
    else:
8763
      assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
8764
             "Remote node '%s' is not locked" % remote_node
8765

    
8766
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8767
      assert self.remote_node_info is not None, \
8768
        "Cannot retrieve locked node %s" % remote_node
8769

    
8770
    if remote_node == self.instance.primary_node:
8771
      raise errors.OpPrereqError("The specified node is the primary node of"
8772
                                 " the instance", errors.ECODE_INVAL)
8773

    
8774
    if remote_node == secondary_node:
8775
      raise errors.OpPrereqError("The specified node is already the"
8776
                                 " secondary node of the instance",
8777
                                 errors.ECODE_INVAL)
8778

    
8779
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8780
                                    constants.REPLACE_DISK_CHG):
8781
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
8782
                                 errors.ECODE_INVAL)
8783

    
8784
    if self.mode == constants.REPLACE_DISK_AUTO:
8785
      if not self._CheckDisksActivated(instance):
8786
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
8787
                                   " first" % self.instance_name,
8788
                                   errors.ECODE_STATE)
8789
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
8790
      faulty_secondary = self._FindFaultyDisks(secondary_node)
8791

    
8792
      if faulty_primary and faulty_secondary:
8793
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8794
                                   " one node and can not be repaired"
8795
                                   " automatically" % self.instance_name,
8796
                                   errors.ECODE_STATE)
8797

    
8798
      if faulty_primary:
8799
        self.disks = faulty_primary
8800
        self.target_node = instance.primary_node
8801
        self.other_node = secondary_node
8802
        check_nodes = [self.target_node, self.other_node]
8803
      elif faulty_secondary:
8804
        self.disks = faulty_secondary
8805
        self.target_node = secondary_node
8806
        self.other_node = instance.primary_node
8807
        check_nodes = [self.target_node, self.other_node]
8808
      else:
8809
        self.disks = []
8810
        check_nodes = []
8811

    
8812
    else:
8813
      # Non-automatic modes
8814
      if self.mode == constants.REPLACE_DISK_PRI:
8815
        self.target_node = instance.primary_node
8816
        self.other_node = secondary_node
8817
        check_nodes = [self.target_node, self.other_node]
8818

    
8819
      elif self.mode == constants.REPLACE_DISK_SEC:
8820
        self.target_node = secondary_node
8821
        self.other_node = instance.primary_node
8822
        check_nodes = [self.target_node, self.other_node]
8823

    
8824
      elif self.mode == constants.REPLACE_DISK_CHG:
8825
        self.new_node = remote_node
8826
        self.other_node = instance.primary_node
8827
        self.target_node = secondary_node
8828
        check_nodes = [self.new_node, self.other_node]
8829

    
8830
        _CheckNodeNotDrained(self.lu, remote_node)
8831
        _CheckNodeVmCapable(self.lu, remote_node)
8832

    
8833
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
8834
        assert old_node_info is not None
8835
        if old_node_info.offline and not self.early_release:
8836
          # doesn't make sense to delay the release
8837
          self.early_release = True
8838
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8839
                          " early-release mode", secondary_node)
8840

    
8841
      else:
8842
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8843
                                     self.mode)
8844

    
8845
      # If not specified all disks should be replaced
8846
      if not self.disks:
8847
        self.disks = range(len(self.instance.disks))
8848

    
8849
    for node in check_nodes:
8850
      _CheckNodeOnline(self.lu, node)
8851

    
8852
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
8853
                                                          self.other_node,
8854
                                                          self.target_node]
8855
                              if node_name is not None)
8856

    
8857
    # Release unneeded node locks
8858
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
8859

    
8860
    # Release any owned node group
8861
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
8862
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
8863

    
8864
    # Check whether disks are valid
8865
    for disk_idx in self.disks:
8866
      instance.FindDisk(disk_idx)
8867

    
8868
    # Get secondary node IP addresses
8869
    self.node_secondary_ip = \
8870
      dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
8871
           for node_name in touched_nodes)
8872

    
8873
  def Exec(self, feedback_fn):
8874
    """Execute disk replacement.
8875

8876
    This dispatches the disk replacement to the appropriate handler.
8877

8878
    """
8879
    if self.delay_iallocator:
8880
      self._CheckPrereq2()
8881

    
8882
    if __debug__:
8883
      # Verify owned locks before starting operation
8884
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
8885
      assert set(owned_locks) == set(self.node_secondary_ip), \
8886
          ("Incorrect node locks, owning %s, expected %s" %
8887
           (owned_locks, self.node_secondary_ip.keys()))
8888

    
8889
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
8890
      assert list(owned_locks) == [self.instance_name], \
8891
          "Instance '%s' not locked" % self.instance_name
8892

    
8893
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
8894
          "Should not own any node group lock at this point"
8895

    
8896
    if not self.disks:
8897
      feedback_fn("No disks need replacement")
8898
      return
8899

    
8900
    feedback_fn("Replacing disk(s) %s for %s" %
8901
                (utils.CommaJoin(self.disks), self.instance.name))
8902

    
8903
    activate_disks = (not self.instance.admin_up)
8904

    
8905
    # Activate the instance disks if we're replacing them on a down instance
8906
    if activate_disks:
8907
      _StartInstanceDisks(self.lu, self.instance, True)
8908

    
8909
    try:
8910
      # Should we replace the secondary node?
8911
      if self.new_node is not None:
8912
        fn = self._ExecDrbd8Secondary
8913
      else:
8914
        fn = self._ExecDrbd8DiskOnly
8915

    
8916
      result = fn(feedback_fn)
8917
    finally:
8918
      # Deactivate the instance disks if we're replacing them on a
8919
      # down instance
8920
      if activate_disks:
8921
        _SafeShutdownInstanceDisks(self.lu, self.instance)
8922

    
8923
    if __debug__:
8924
      # Verify owned locks
8925
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
8926
      nodes = frozenset(self.node_secondary_ip)
8927
      assert ((self.early_release and not owned_locks) or
8928
              (not self.early_release and not (set(owned_locks) - nodes))), \
8929
        ("Not owning the correct locks, early_release=%s, owned=%r,"
8930
         " nodes=%r" % (self.early_release, owned_locks, nodes))
8931

    
8932
    return result
8933

    
8934
  def _CheckVolumeGroup(self, nodes):
8935
    self.lu.LogInfo("Checking volume groups")
8936

    
8937
    vgname = self.cfg.GetVGName()
8938

    
8939
    # Make sure volume group exists on all involved nodes
8940
    results = self.rpc.call_vg_list(nodes)
8941
    if not results:
8942
      raise errors.OpExecError("Can't list volume groups on the nodes")
8943

    
8944
    for node in nodes:
8945
      res = results[node]
8946
      res.Raise("Error checking node %s" % node)
8947
      if vgname not in res.payload:
8948
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
8949
                                 (vgname, node))
8950

    
8951
  def _CheckDisksExistence(self, nodes):
8952
    # Check disk existence
8953
    for idx, dev in enumerate(self.instance.disks):
8954
      if idx not in self.disks:
8955
        continue
8956

    
8957
      for node in nodes:
8958
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8959
        self.cfg.SetDiskID(dev, node)
8960

    
8961
        result = self.rpc.call_blockdev_find(node, dev)
8962

    
8963
        msg = result.fail_msg
8964
        if msg or not result.payload:
8965
          if not msg:
8966
            msg = "disk not found"
8967
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8968
                                   (idx, node, msg))
8969

    
8970
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8971
    for idx, dev in enumerate(self.instance.disks):
8972
      if idx not in self.disks:
8973
        continue
8974

    
8975
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8976
                      (idx, node_name))
8977

    
8978
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8979
                                   ldisk=ldisk):
8980
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8981
                                 " replace disks for instance %s" %
8982
                                 (node_name, self.instance.name))
8983

    
8984
  def _CreateNewStorage(self, node_name):
8985
    iv_names = {}
8986

    
8987
    for idx, dev in enumerate(self.instance.disks):
8988
      if idx not in self.disks:
8989
        continue
8990

    
8991
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8992

    
8993
      self.cfg.SetDiskID(dev, node_name)
8994

    
8995
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8996
      names = _GenerateUniqueNames(self.lu, lv_names)
8997

    
8998
      vg_data = dev.children[0].logical_id[0]
8999
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9000
                             logical_id=(vg_data, names[0]))
9001
      vg_meta = dev.children[1].logical_id[0]
9002
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9003
                             logical_id=(vg_meta, names[1]))
9004

    
9005
      new_lvs = [lv_data, lv_meta]
9006
      old_lvs = dev.children
9007
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9008

    
9009
      # we pass force_create=True to force the LVM creation
9010
      for new_lv in new_lvs:
9011
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9012
                        _GetInstanceInfoText(self.instance), False)
9013

    
9014
    return iv_names
9015

    
9016
  def _CheckDevices(self, node_name, iv_names):
9017
    for name, (dev, _, _) in iv_names.iteritems():
9018
      self.cfg.SetDiskID(dev, node_name)
9019

    
9020
      result = self.rpc.call_blockdev_find(node_name, dev)
9021

    
9022
      msg = result.fail_msg
9023
      if msg or not result.payload:
9024
        if not msg:
9025
          msg = "disk not found"
9026
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9027
                                 (name, msg))
9028

    
9029
      if result.payload.is_degraded:
9030
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9031

    
9032
  def _RemoveOldStorage(self, node_name, iv_names):
9033
    for name, (_, old_lvs, _) in iv_names.iteritems():
9034
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9035

    
9036
      for lv in old_lvs:
9037
        self.cfg.SetDiskID(lv, node_name)
9038

    
9039
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9040
        if msg:
9041
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9042
                             hint="remove unused LVs manually")
9043

    
9044
  def _ExecDrbd8DiskOnly(self, feedback_fn):
9045
    """Replace a disk on the primary or secondary for DRBD 8.
9046

9047
    The algorithm for replace is quite complicated:
9048

9049
      1. for each disk to be replaced:
9050

9051
        1. create new LVs on the target node with unique names
9052
        1. detach old LVs from the drbd device
9053
        1. rename old LVs to name_replaced.<time_t>
9054
        1. rename new LVs to old LVs
9055
        1. attach the new LVs (with the old names now) to the drbd device
9056

9057
      1. wait for sync across all devices
9058

9059
      1. for each modified disk:
9060

9061
        1. remove old LVs (which have the name name_replaces.<time_t>)
9062

9063
    Failures are not very well handled.
9064

9065
    """
9066
    steps_total = 6
9067

    
9068
    # Step: check device activation
9069
    self.lu.LogStep(1, steps_total, "Check device existence")
9070
    self._CheckDisksExistence([self.other_node, self.target_node])
9071
    self._CheckVolumeGroup([self.target_node, self.other_node])
9072

    
9073
    # Step: check other node consistency
9074
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9075
    self._CheckDisksConsistency(self.other_node,
9076
                                self.other_node == self.instance.primary_node,
9077
                                False)
9078

    
9079
    # Step: create new storage
9080
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9081
    iv_names = self._CreateNewStorage(self.target_node)
9082

    
9083
    # Step: for each lv, detach+rename*2+attach
9084
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9085
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9086
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9087

    
9088
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9089
                                                     old_lvs)
9090
      result.Raise("Can't detach drbd from local storage on node"
9091
                   " %s for device %s" % (self.target_node, dev.iv_name))
9092
      #dev.children = []
9093
      #cfg.Update(instance)
9094

    
9095
      # ok, we created the new LVs, so now we know we have the needed
9096
      # storage; as such, we proceed on the target node to rename
9097
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9098
      # using the assumption that logical_id == physical_id (which in
9099
      # turn is the unique_id on that node)
9100

    
9101
      # FIXME(iustin): use a better name for the replaced LVs
9102
      temp_suffix = int(time.time())
9103
      ren_fn = lambda d, suff: (d.physical_id[0],
9104
                                d.physical_id[1] + "_replaced-%s" % suff)
9105

    
9106
      # Build the rename list based on what LVs exist on the node
9107
      rename_old_to_new = []
9108
      for to_ren in old_lvs:
9109
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9110
        if not result.fail_msg and result.payload:
9111
          # device exists
9112
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9113

    
9114
      self.lu.LogInfo("Renaming the old LVs on the target node")
9115
      result = self.rpc.call_blockdev_rename(self.target_node,
9116
                                             rename_old_to_new)
9117
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9118

    
9119
      # Now we rename the new LVs to the old LVs
9120
      self.lu.LogInfo("Renaming the new LVs on the target node")
9121
      rename_new_to_old = [(new, old.physical_id)
9122
                           for old, new in zip(old_lvs, new_lvs)]
9123
      result = self.rpc.call_blockdev_rename(self.target_node,
9124
                                             rename_new_to_old)
9125
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9126

    
9127
      for old, new in zip(old_lvs, new_lvs):
9128
        new.logical_id = old.logical_id
9129
        self.cfg.SetDiskID(new, self.target_node)
9130

    
9131
      for disk in old_lvs:
9132
        disk.logical_id = ren_fn(disk, temp_suffix)
9133
        self.cfg.SetDiskID(disk, self.target_node)
9134

    
9135
      # Now that the new lvs have the old name, we can add them to the device
9136
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9137
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9138
                                                  new_lvs)
9139
      msg = result.fail_msg
9140
      if msg:
9141
        for new_lv in new_lvs:
9142
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9143
                                               new_lv).fail_msg
9144
          if msg2:
9145
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9146
                               hint=("cleanup manually the unused logical"
9147
                                     "volumes"))
9148
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9149

    
9150
      dev.children = new_lvs
9151

    
9152
      self.cfg.Update(self.instance, feedback_fn)
9153

    
9154
    cstep = 5
9155
    if self.early_release:
9156
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9157
      cstep += 1
9158
      self._RemoveOldStorage(self.target_node, iv_names)
9159
      # WARNING: we release both node locks here, do not do other RPCs
9160
      # than WaitForSync to the primary node
9161
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9162
                    names=[self.target_node, self.other_node])
9163

    
9164
    # Wait for sync
9165
    # This can fail as the old devices are degraded and _WaitForSync
9166
    # does a combined result over all disks, so we don't check its return value
9167
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9168
    cstep += 1
9169
    _WaitForSync(self.lu, self.instance)
9170

    
9171
    # Check all devices manually
9172
    self._CheckDevices(self.instance.primary_node, iv_names)
9173

    
9174
    # Step: remove old storage
9175
    if not self.early_release:
9176
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9177
      cstep += 1
9178
      self._RemoveOldStorage(self.target_node, iv_names)
9179

    
9180
  def _ExecDrbd8Secondary(self, feedback_fn):
9181
    """Replace the secondary node for DRBD 8.
9182

9183
    The algorithm for replace is quite complicated:
9184
      - for all disks of the instance:
9185
        - create new LVs on the new node with same names
9186
        - shutdown the drbd device on the old secondary
9187
        - disconnect the drbd network on the primary
9188
        - create the drbd device on the new secondary
9189
        - network attach the drbd on the primary, using an artifice:
9190
          the drbd code for Attach() will connect to the network if it
9191
          finds a device which is connected to the good local disks but
9192
          not network enabled
9193
      - wait for sync across all devices
9194
      - remove all disks from the old secondary
9195

9196
    Failures are not very well handled.
9197

9198
    """
9199
    steps_total = 6
9200

    
9201
    # Step: check device activation
9202
    self.lu.LogStep(1, steps_total, "Check device existence")
9203
    self._CheckDisksExistence([self.instance.primary_node])
9204
    self._CheckVolumeGroup([self.instance.primary_node])
9205

    
9206
    # Step: check other node consistency
9207
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9208
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9209

    
9210
    # Step: create new storage
9211
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9212
    for idx, dev in enumerate(self.instance.disks):
9213
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9214
                      (self.new_node, idx))
9215
      # we pass force_create=True to force LVM creation
9216
      for new_lv in dev.children:
9217
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9218
                        _GetInstanceInfoText(self.instance), False)
9219

    
9220
    # Step 4: dbrd minors and drbd setups changes
9221
    # after this, we must manually remove the drbd minors on both the
9222
    # error and the success paths
9223
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9224
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9225
                                         for dev in self.instance.disks],
9226
                                        self.instance.name)
9227
    logging.debug("Allocated minors %r", minors)
9228

    
9229
    iv_names = {}
9230
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9231
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9232
                      (self.new_node, idx))
9233
      # create new devices on new_node; note that we create two IDs:
9234
      # one without port, so the drbd will be activated without
9235
      # networking information on the new node at this stage, and one
9236
      # with network, for the latter activation in step 4
9237
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9238
      if self.instance.primary_node == o_node1:
9239
        p_minor = o_minor1
9240
      else:
9241
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9242
        p_minor = o_minor2
9243

    
9244
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9245
                      p_minor, new_minor, o_secret)
9246
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9247
                    p_minor, new_minor, o_secret)
9248

    
9249
      iv_names[idx] = (dev, dev.children, new_net_id)
9250
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9251
                    new_net_id)
9252
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9253
                              logical_id=new_alone_id,
9254
                              children=dev.children,
9255
                              size=dev.size)
9256
      try:
9257
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9258
                              _GetInstanceInfoText(self.instance), False)
9259
      except errors.GenericError:
9260
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9261
        raise
9262

    
9263
    # We have new devices, shutdown the drbd on the old secondary
9264
    for idx, dev in enumerate(self.instance.disks):
9265
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9266
      self.cfg.SetDiskID(dev, self.target_node)
9267
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9268
      if msg:
9269
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9270
                           "node: %s" % (idx, msg),
9271
                           hint=("Please cleanup this device manually as"
9272
                                 " soon as possible"))
9273

    
9274
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9275
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9276
                                               self.node_secondary_ip,
9277
                                               self.instance.disks)\
9278
                                              [self.instance.primary_node]
9279

    
9280
    msg = result.fail_msg
9281
    if msg:
9282
      # detaches didn't succeed (unlikely)
9283
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9284
      raise errors.OpExecError("Can't detach the disks from the network on"
9285
                               " old node: %s" % (msg,))
9286

    
9287
    # if we managed to detach at least one, we update all the disks of
9288
    # the instance to point to the new secondary
9289
    self.lu.LogInfo("Updating instance configuration")
9290
    for dev, _, new_logical_id in iv_names.itervalues():
9291
      dev.logical_id = new_logical_id
9292
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9293

    
9294
    self.cfg.Update(self.instance, feedback_fn)
9295

    
9296
    # and now perform the drbd attach
9297
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9298
                    " (standalone => connected)")
9299
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9300
                                            self.new_node],
9301
                                           self.node_secondary_ip,
9302
                                           self.instance.disks,
9303
                                           self.instance.name,
9304
                                           False)
9305
    for to_node, to_result in result.items():
9306
      msg = to_result.fail_msg
9307
      if msg:
9308
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9309
                           to_node, msg,
9310
                           hint=("please do a gnt-instance info to see the"
9311
                                 " status of disks"))
9312
    cstep = 5
9313
    if self.early_release:
9314
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9315
      cstep += 1
9316
      self._RemoveOldStorage(self.target_node, iv_names)
9317
      # WARNING: we release all node locks here, do not do other RPCs
9318
      # than WaitForSync to the primary node
9319
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9320
                    names=[self.instance.primary_node,
9321
                           self.target_node,
9322
                           self.new_node])
9323

    
9324
    # Wait for sync
9325
    # This can fail as the old devices are degraded and _WaitForSync
9326
    # does a combined result over all disks, so we don't check its return value
9327
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9328
    cstep += 1
9329
    _WaitForSync(self.lu, self.instance)
9330

    
9331
    # Check all devices manually
9332
    self._CheckDevices(self.instance.primary_node, iv_names)
9333

    
9334
    # Step: remove old storage
9335
    if not self.early_release:
9336
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9337
      self._RemoveOldStorage(self.target_node, iv_names)
9338

    
9339

    
9340
class LURepairNodeStorage(NoHooksLU):
9341
  """Repairs the volume group on a node.
9342

9343
  """
9344
  REQ_BGL = False
9345

    
9346
  def CheckArguments(self):
9347
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9348

    
9349
    storage_type = self.op.storage_type
9350

    
9351
    if (constants.SO_FIX_CONSISTENCY not in
9352
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9353
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
9354
                                 " repaired" % storage_type,
9355
                                 errors.ECODE_INVAL)
9356

    
9357
  def ExpandNames(self):
9358
    self.needed_locks = {
9359
      locking.LEVEL_NODE: [self.op.node_name],
9360
      }
9361

    
9362
  def _CheckFaultyDisks(self, instance, node_name):
9363
    """Ensure faulty disks abort the opcode or at least warn."""
9364
    try:
9365
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9366
                                  node_name, True):
9367
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9368
                                   " node '%s'" % (instance.name, node_name),
9369
                                   errors.ECODE_STATE)
9370
    except errors.OpPrereqError, err:
9371
      if self.op.ignore_consistency:
9372
        self.proc.LogWarning(str(err.args[0]))
9373
      else:
9374
        raise
9375

    
9376
  def CheckPrereq(self):
9377
    """Check prerequisites.
9378

9379
    """
9380
    # Check whether any instance on this node has faulty disks
9381
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9382
      if not inst.admin_up:
9383
        continue
9384
      check_nodes = set(inst.all_nodes)
9385
      check_nodes.discard(self.op.node_name)
9386
      for inst_node_name in check_nodes:
9387
        self._CheckFaultyDisks(inst, inst_node_name)
9388

    
9389
  def Exec(self, feedback_fn):
9390
    feedback_fn("Repairing storage unit '%s' on %s ..." %
9391
                (self.op.name, self.op.node_name))
9392

    
9393
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9394
    result = self.rpc.call_storage_execute(self.op.node_name,
9395
                                           self.op.storage_type, st_args,
9396
                                           self.op.name,
9397
                                           constants.SO_FIX_CONSISTENCY)
9398
    result.Raise("Failed to repair storage unit '%s' on %s" %
9399
                 (self.op.name, self.op.node_name))
9400

    
9401

    
9402
class LUNodeEvacStrategy(NoHooksLU):
9403
  """Computes the node evacuation strategy.
9404

9405
  """
9406
  REQ_BGL = False
9407

    
9408
  def CheckArguments(self):
9409
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9410

    
9411
  def ExpandNames(self):
9412
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9413
    self.needed_locks = locks = {}
9414
    if self.op.remote_node is None:
9415
      locks[locking.LEVEL_NODE] = locking.ALL_SET
9416
    else:
9417
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9418
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9419

    
9420
  def Exec(self, feedback_fn):
9421
    if self.op.remote_node is not None:
9422
      instances = []
9423
      for node in self.op.nodes:
9424
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9425
      result = []
9426
      for i in instances:
9427
        if i.primary_node == self.op.remote_node:
9428
          raise errors.OpPrereqError("Node %s is the primary node of"
9429
                                     " instance %s, cannot use it as"
9430
                                     " secondary" %
9431
                                     (self.op.remote_node, i.name),
9432
                                     errors.ECODE_INVAL)
9433
        result.append([i.name, self.op.remote_node])
9434
    else:
9435
      ial = IAllocator(self.cfg, self.rpc,
9436
                       mode=constants.IALLOCATOR_MODE_MEVAC,
9437
                       evac_nodes=self.op.nodes)
9438
      ial.Run(self.op.iallocator, validate=True)
9439
      if not ial.success:
9440
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9441
                                 errors.ECODE_NORES)
9442
      result = ial.result
9443
    return result
9444

    
9445

    
9446
class LUInstanceGrowDisk(LogicalUnit):
9447
  """Grow a disk of an instance.
9448

9449
  """
9450
  HPATH = "disk-grow"
9451
  HTYPE = constants.HTYPE_INSTANCE
9452
  REQ_BGL = False
9453

    
9454
  def ExpandNames(self):
9455
    self._ExpandAndLockInstance()
9456
    self.needed_locks[locking.LEVEL_NODE] = []
9457
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9458

    
9459
  def DeclareLocks(self, level):
9460
    if level == locking.LEVEL_NODE:
9461
      self._LockInstancesNodes()
9462

    
9463
  def BuildHooksEnv(self):
9464
    """Build hooks env.
9465

9466
    This runs on the master, the primary and all the secondaries.
9467

9468
    """
9469
    env = {
9470
      "DISK": self.op.disk,
9471
      "AMOUNT": self.op.amount,
9472
      }
9473
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9474
    return env
9475

    
9476
  def BuildHooksNodes(self):
9477
    """Build hooks nodes.
9478

9479
    """
9480
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9481
    return (nl, nl)
9482

    
9483
  def CheckPrereq(self):
9484
    """Check prerequisites.
9485

9486
    This checks that the instance is in the cluster.
9487

9488
    """
9489
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9490
    assert instance is not None, \
9491
      "Cannot retrieve locked instance %s" % self.op.instance_name
9492
    nodenames = list(instance.all_nodes)
9493
    for node in nodenames:
9494
      _CheckNodeOnline(self, node)
9495

    
9496
    self.instance = instance
9497

    
9498
    if instance.disk_template not in constants.DTS_GROWABLE:
9499
      raise errors.OpPrereqError("Instance's disk layout does not support"
9500
                                 " growing", errors.ECODE_INVAL)
9501

    
9502
    self.disk = instance.FindDisk(self.op.disk)
9503

    
9504
    if instance.disk_template not in (constants.DT_FILE,
9505
                                      constants.DT_SHARED_FILE):
9506
      # TODO: check the free disk space for file, when that feature will be
9507
      # supported
9508
      _CheckNodesFreeDiskPerVG(self, nodenames,
9509
                               self.disk.ComputeGrowth(self.op.amount))
9510

    
9511
  def Exec(self, feedback_fn):
9512
    """Execute disk grow.
9513

9514
    """
9515
    instance = self.instance
9516
    disk = self.disk
9517

    
9518
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9519
    if not disks_ok:
9520
      raise errors.OpExecError("Cannot activate block device to grow")
9521

    
9522
    for node in instance.all_nodes:
9523
      self.cfg.SetDiskID(disk, node)
9524
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9525
      result.Raise("Grow request failed to node %s" % node)
9526

    
9527
      # TODO: Rewrite code to work properly
9528
      # DRBD goes into sync mode for a short amount of time after executing the
9529
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9530
      # calling "resize" in sync mode fails. Sleeping for a short amount of
9531
      # time is a work-around.
9532
      time.sleep(5)
9533

    
9534
    disk.RecordGrow(self.op.amount)
9535
    self.cfg.Update(instance, feedback_fn)
9536
    if self.op.wait_for_sync:
9537
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
9538
      if disk_abort:
9539
        self.proc.LogWarning("Disk sync-ing has not returned a good"
9540
                             " status; please check the instance")
9541
      if not instance.admin_up:
9542
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9543
    elif not instance.admin_up:
9544
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
9545
                           " not supposed to be running because no wait for"
9546
                           " sync mode was requested")
9547

    
9548

    
9549
class LUInstanceQueryData(NoHooksLU):
9550
  """Query runtime instance data.
9551

9552
  """
9553
  REQ_BGL = False
9554

    
9555
  def ExpandNames(self):
9556
    self.needed_locks = {}
9557

    
9558
    # Use locking if requested or when non-static information is wanted
9559
    if not (self.op.static or self.op.use_locking):
9560
      self.LogWarning("Non-static data requested, locks need to be acquired")
9561
      self.op.use_locking = True
9562

    
9563
    if self.op.instances or not self.op.use_locking:
9564
      # Expand instance names right here
9565
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
9566
    else:
9567
      # Will use acquired locks
9568
      self.wanted_names = None
9569

    
9570
    if self.op.use_locking:
9571
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9572

    
9573
      if self.wanted_names is None:
9574
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9575
      else:
9576
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9577

    
9578
      self.needed_locks[locking.LEVEL_NODE] = []
9579
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9580
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9581

    
9582
  def DeclareLocks(self, level):
9583
    if self.op.use_locking and level == locking.LEVEL_NODE:
9584
      self._LockInstancesNodes()
9585

    
9586
  def CheckPrereq(self):
9587
    """Check prerequisites.
9588

9589
    This only checks the optional instance list against the existing names.
9590

9591
    """
9592
    if self.wanted_names is None:
9593
      assert self.op.use_locking, "Locking was not used"
9594
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
9595

    
9596
    self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9597
                             for name in self.wanted_names]
9598

    
9599
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
9600
    """Returns the status of a block device
9601

9602
    """
9603
    if self.op.static or not node:
9604
      return None
9605

    
9606
    self.cfg.SetDiskID(dev, node)
9607

    
9608
    result = self.rpc.call_blockdev_find(node, dev)
9609
    if result.offline:
9610
      return None
9611

    
9612
    result.Raise("Can't compute disk status for %s" % instance_name)
9613

    
9614
    status = result.payload
9615
    if status is None:
9616
      return None
9617

    
9618
    return (status.dev_path, status.major, status.minor,
9619
            status.sync_percent, status.estimated_time,
9620
            status.is_degraded, status.ldisk_status)
9621

    
9622
  def _ComputeDiskStatus(self, instance, snode, dev):
9623
    """Compute block device status.
9624

9625
    """
9626
    if dev.dev_type in constants.LDS_DRBD:
9627
      # we change the snode then (otherwise we use the one passed in)
9628
      if dev.logical_id[0] == instance.primary_node:
9629
        snode = dev.logical_id[1]
9630
      else:
9631
        snode = dev.logical_id[0]
9632

    
9633
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9634
                                              instance.name, dev)
9635
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9636

    
9637
    if dev.children:
9638
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
9639
                      for child in dev.children]
9640
    else:
9641
      dev_children = []
9642

    
9643
    return {
9644
      "iv_name": dev.iv_name,
9645
      "dev_type": dev.dev_type,
9646
      "logical_id": dev.logical_id,
9647
      "physical_id": dev.physical_id,
9648
      "pstatus": dev_pstatus,
9649
      "sstatus": dev_sstatus,
9650
      "children": dev_children,
9651
      "mode": dev.mode,
9652
      "size": dev.size,
9653
      }
9654

    
9655
  def Exec(self, feedback_fn):
9656
    """Gather and return data"""
9657
    result = {}
9658

    
9659
    cluster = self.cfg.GetClusterInfo()
9660

    
9661
    for instance in self.wanted_instances:
9662
      if not self.op.static:
9663
        remote_info = self.rpc.call_instance_info(instance.primary_node,
9664
                                                  instance.name,
9665
                                                  instance.hypervisor)
9666
        remote_info.Raise("Error checking node %s" % instance.primary_node)
9667
        remote_info = remote_info.payload
9668
        if remote_info and "state" in remote_info:
9669
          remote_state = "up"
9670
        else:
9671
          remote_state = "down"
9672
      else:
9673
        remote_state = None
9674
      if instance.admin_up:
9675
        config_state = "up"
9676
      else:
9677
        config_state = "down"
9678

    
9679
      disks = [self._ComputeDiskStatus(instance, None, device)
9680
               for device in instance.disks]
9681

    
9682
      result[instance.name] = {
9683
        "name": instance.name,
9684
        "config_state": config_state,
9685
        "run_state": remote_state,
9686
        "pnode": instance.primary_node,
9687
        "snodes": instance.secondary_nodes,
9688
        "os": instance.os,
9689
        # this happens to be the same format used for hooks
9690
        "nics": _NICListToTuple(self, instance.nics),
9691
        "disk_template": instance.disk_template,
9692
        "disks": disks,
9693
        "hypervisor": instance.hypervisor,
9694
        "network_port": instance.network_port,
9695
        "hv_instance": instance.hvparams,
9696
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
9697
        "be_instance": instance.beparams,
9698
        "be_actual": cluster.FillBE(instance),
9699
        "os_instance": instance.osparams,
9700
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9701
        "serial_no": instance.serial_no,
9702
        "mtime": instance.mtime,
9703
        "ctime": instance.ctime,
9704
        "uuid": instance.uuid,
9705
        }
9706

    
9707
    return result
9708

    
9709

    
9710
class LUInstanceSetParams(LogicalUnit):
9711
  """Modifies an instances's parameters.
9712

9713
  """
9714
  HPATH = "instance-modify"
9715
  HTYPE = constants.HTYPE_INSTANCE
9716
  REQ_BGL = False
9717

    
9718
  def CheckArguments(self):
9719
    if not (self.op.nics or self.op.disks or self.op.disk_template or
9720
            self.op.hvparams or self.op.beparams or self.op.os_name):
9721
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9722

    
9723
    if self.op.hvparams:
9724
      _CheckGlobalHvParams(self.op.hvparams)
9725

    
9726
    # Disk validation
9727
    disk_addremove = 0
9728
    for disk_op, disk_dict in self.op.disks:
9729
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9730
      if disk_op == constants.DDM_REMOVE:
9731
        disk_addremove += 1
9732
        continue
9733
      elif disk_op == constants.DDM_ADD:
9734
        disk_addremove += 1
9735
      else:
9736
        if not isinstance(disk_op, int):
9737
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9738
        if not isinstance(disk_dict, dict):
9739
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9740
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9741

    
9742
      if disk_op == constants.DDM_ADD:
9743
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9744
        if mode not in constants.DISK_ACCESS_SET:
9745
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9746
                                     errors.ECODE_INVAL)
9747
        size = disk_dict.get(constants.IDISK_SIZE, None)
9748
        if size is None:
9749
          raise errors.OpPrereqError("Required disk parameter size missing",
9750
                                     errors.ECODE_INVAL)
9751
        try:
9752
          size = int(size)
9753
        except (TypeError, ValueError), err:
9754
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9755
                                     str(err), errors.ECODE_INVAL)
9756
        disk_dict[constants.IDISK_SIZE] = size
9757
      else:
9758
        # modification of disk
9759
        if constants.IDISK_SIZE in disk_dict:
9760
          raise errors.OpPrereqError("Disk size change not possible, use"
9761
                                     " grow-disk", errors.ECODE_INVAL)
9762

    
9763
    if disk_addremove > 1:
9764
      raise errors.OpPrereqError("Only one disk add or remove operation"
9765
                                 " supported at a time", errors.ECODE_INVAL)
9766

    
9767
    if self.op.disks and self.op.disk_template is not None:
9768
      raise errors.OpPrereqError("Disk template conversion and other disk"
9769
                                 " changes not supported at the same time",
9770
                                 errors.ECODE_INVAL)
9771

    
9772
    if (self.op.disk_template and
9773
        self.op.disk_template in constants.DTS_INT_MIRROR and
9774
        self.op.remote_node is None):
9775
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
9776
                                 " one requires specifying a secondary node",
9777
                                 errors.ECODE_INVAL)
9778

    
9779
    # NIC validation
9780
    nic_addremove = 0
9781
    for nic_op, nic_dict in self.op.nics:
9782
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9783
      if nic_op == constants.DDM_REMOVE:
9784
        nic_addremove += 1
9785
        continue
9786
      elif nic_op == constants.DDM_ADD:
9787
        nic_addremove += 1
9788
      else:
9789
        if not isinstance(nic_op, int):
9790
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9791
        if not isinstance(nic_dict, dict):
9792
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9793
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9794

    
9795
      # nic_dict should be a dict
9796
      nic_ip = nic_dict.get(constants.INIC_IP, None)
9797
      if nic_ip is not None:
9798
        if nic_ip.lower() == constants.VALUE_NONE:
9799
          nic_dict[constants.INIC_IP] = None
9800
        else:
9801
          if not netutils.IPAddress.IsValid(nic_ip):
9802
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9803
                                       errors.ECODE_INVAL)
9804

    
9805
      nic_bridge = nic_dict.get('bridge', None)
9806
      nic_link = nic_dict.get(constants.INIC_LINK, None)
9807
      if nic_bridge and nic_link:
9808
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9809
                                   " at the same time", errors.ECODE_INVAL)
9810
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9811
        nic_dict['bridge'] = None
9812
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9813
        nic_dict[constants.INIC_LINK] = None
9814

    
9815
      if nic_op == constants.DDM_ADD:
9816
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
9817
        if nic_mac is None:
9818
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
9819

    
9820
      if constants.INIC_MAC in nic_dict:
9821
        nic_mac = nic_dict[constants.INIC_MAC]
9822
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9823
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9824

    
9825
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9826
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9827
                                     " modifying an existing nic",
9828
                                     errors.ECODE_INVAL)
9829

    
9830
    if nic_addremove > 1:
9831
      raise errors.OpPrereqError("Only one NIC add or remove operation"
9832
                                 " supported at a time", errors.ECODE_INVAL)
9833

    
9834
  def ExpandNames(self):
9835
    self._ExpandAndLockInstance()
9836
    self.needed_locks[locking.LEVEL_NODE] = []
9837
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9838

    
9839
  def DeclareLocks(self, level):
9840
    if level == locking.LEVEL_NODE:
9841
      self._LockInstancesNodes()
9842
      if self.op.disk_template and self.op.remote_node:
9843
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9844
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9845

    
9846
  def BuildHooksEnv(self):
9847
    """Build hooks env.
9848

9849
    This runs on the master, primary and secondaries.
9850

9851
    """
9852
    args = dict()
9853
    if constants.BE_MEMORY in self.be_new:
9854
      args['memory'] = self.be_new[constants.BE_MEMORY]
9855
    if constants.BE_VCPUS in self.be_new:
9856
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
9857
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9858
    # information at all.
9859
    if self.op.nics:
9860
      args['nics'] = []
9861
      nic_override = dict(self.op.nics)
9862
      for idx, nic in enumerate(self.instance.nics):
9863
        if idx in nic_override:
9864
          this_nic_override = nic_override[idx]
9865
        else:
9866
          this_nic_override = {}
9867
        if constants.INIC_IP in this_nic_override:
9868
          ip = this_nic_override[constants.INIC_IP]
9869
        else:
9870
          ip = nic.ip
9871
        if constants.INIC_MAC in this_nic_override:
9872
          mac = this_nic_override[constants.INIC_MAC]
9873
        else:
9874
          mac = nic.mac
9875
        if idx in self.nic_pnew:
9876
          nicparams = self.nic_pnew[idx]
9877
        else:
9878
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9879
        mode = nicparams[constants.NIC_MODE]
9880
        link = nicparams[constants.NIC_LINK]
9881
        args['nics'].append((ip, mac, mode, link))
9882
      if constants.DDM_ADD in nic_override:
9883
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
9884
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
9885
        nicparams = self.nic_pnew[constants.DDM_ADD]
9886
        mode = nicparams[constants.NIC_MODE]
9887
        link = nicparams[constants.NIC_LINK]
9888
        args['nics'].append((ip, mac, mode, link))
9889
      elif constants.DDM_REMOVE in nic_override:
9890
        del args['nics'][-1]
9891

    
9892
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9893
    if self.op.disk_template:
9894
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9895

    
9896
    return env
9897

    
9898
  def BuildHooksNodes(self):
9899
    """Build hooks nodes.
9900

9901
    """
9902
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9903
    return (nl, nl)
9904

    
9905
  def CheckPrereq(self):
9906
    """Check prerequisites.
9907

9908
    This only checks the instance list against the existing names.
9909

9910
    """
9911
    # checking the new params on the primary/secondary nodes
9912

    
9913
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9914
    cluster = self.cluster = self.cfg.GetClusterInfo()
9915
    assert self.instance is not None, \
9916
      "Cannot retrieve locked instance %s" % self.op.instance_name
9917
    pnode = instance.primary_node
9918
    nodelist = list(instance.all_nodes)
9919

    
9920
    # OS change
9921
    if self.op.os_name and not self.op.force:
9922
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9923
                      self.op.force_variant)
9924
      instance_os = self.op.os_name
9925
    else:
9926
      instance_os = instance.os
9927

    
9928
    if self.op.disk_template:
9929
      if instance.disk_template == self.op.disk_template:
9930
        raise errors.OpPrereqError("Instance already has disk template %s" %
9931
                                   instance.disk_template, errors.ECODE_INVAL)
9932

    
9933
      if (instance.disk_template,
9934
          self.op.disk_template) not in self._DISK_CONVERSIONS:
9935
        raise errors.OpPrereqError("Unsupported disk template conversion from"
9936
                                   " %s to %s" % (instance.disk_template,
9937
                                                  self.op.disk_template),
9938
                                   errors.ECODE_INVAL)
9939
      _CheckInstanceDown(self, instance, "cannot change disk template")
9940
      if self.op.disk_template in constants.DTS_INT_MIRROR:
9941
        if self.op.remote_node == pnode:
9942
          raise errors.OpPrereqError("Given new secondary node %s is the same"
9943
                                     " as the primary node of the instance" %
9944
                                     self.op.remote_node, errors.ECODE_STATE)
9945
        _CheckNodeOnline(self, self.op.remote_node)
9946
        _CheckNodeNotDrained(self, self.op.remote_node)
9947
        # FIXME: here we assume that the old instance type is DT_PLAIN
9948
        assert instance.disk_template == constants.DT_PLAIN
9949
        disks = [{constants.IDISK_SIZE: d.size,
9950
                  constants.IDISK_VG: d.logical_id[0]}
9951
                 for d in instance.disks]
9952
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9953
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9954

    
9955
    # hvparams processing
9956
    if self.op.hvparams:
9957
      hv_type = instance.hypervisor
9958
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9959
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9960
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9961

    
9962
      # local check
9963
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9964
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9965
      self.hv_new = hv_new # the new actual values
9966
      self.hv_inst = i_hvdict # the new dict (without defaults)
9967
    else:
9968
      self.hv_new = self.hv_inst = {}
9969

    
9970
    # beparams processing
9971
    if self.op.beparams:
9972
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9973
                                   use_none=True)
9974
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9975
      be_new = cluster.SimpleFillBE(i_bedict)
9976
      self.be_new = be_new # the new actual values
9977
      self.be_inst = i_bedict # the new dict (without defaults)
9978
    else:
9979
      self.be_new = self.be_inst = {}
9980

    
9981
    # osparams processing
9982
    if self.op.osparams:
9983
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9984
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9985
      self.os_inst = i_osdict # the new dict (without defaults)
9986
    else:
9987
      self.os_inst = {}
9988

    
9989
    self.warn = []
9990

    
9991
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9992
      mem_check_list = [pnode]
9993
      if be_new[constants.BE_AUTO_BALANCE]:
9994
        # either we changed auto_balance to yes or it was from before
9995
        mem_check_list.extend(instance.secondary_nodes)
9996
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9997
                                                  instance.hypervisor)
9998
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9999
                                         instance.hypervisor)
10000
      pninfo = nodeinfo[pnode]
10001
      msg = pninfo.fail_msg
10002
      if msg:
10003
        # Assume the primary node is unreachable and go ahead
10004
        self.warn.append("Can't get info from primary node %s: %s" %
10005
                         (pnode,  msg))
10006
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
10007
        self.warn.append("Node data from primary node %s doesn't contain"
10008
                         " free memory information" % pnode)
10009
      elif instance_info.fail_msg:
10010
        self.warn.append("Can't get instance runtime information: %s" %
10011
                        instance_info.fail_msg)
10012
      else:
10013
        if instance_info.payload:
10014
          current_mem = int(instance_info.payload['memory'])
10015
        else:
10016
          # Assume instance not running
10017
          # (there is a slight race condition here, but it's not very probable,
10018
          # and we have no other way to check)
10019
          current_mem = 0
10020
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10021
                    pninfo.payload['memory_free'])
10022
        if miss_mem > 0:
10023
          raise errors.OpPrereqError("This change will prevent the instance"
10024
                                     " from starting, due to %d MB of memory"
10025
                                     " missing on its primary node" % miss_mem,
10026
                                     errors.ECODE_NORES)
10027

    
10028
      if be_new[constants.BE_AUTO_BALANCE]:
10029
        for node, nres in nodeinfo.items():
10030
          if node not in instance.secondary_nodes:
10031
            continue
10032
          msg = nres.fail_msg
10033
          if msg:
10034
            self.warn.append("Can't get info from secondary node %s: %s" %
10035
                             (node, msg))
10036
          elif not isinstance(nres.payload.get('memory_free', None), int):
10037
            self.warn.append("Secondary node %s didn't return free"
10038
                             " memory information" % node)
10039
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
10040
            self.warn.append("Not enough memory to failover instance to"
10041
                             " secondary node %s" % node)
10042

    
10043
    # NIC processing
10044
    self.nic_pnew = {}
10045
    self.nic_pinst = {}
10046
    for nic_op, nic_dict in self.op.nics:
10047
      if nic_op == constants.DDM_REMOVE:
10048
        if not instance.nics:
10049
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10050
                                     errors.ECODE_INVAL)
10051
        continue
10052
      if nic_op != constants.DDM_ADD:
10053
        # an existing nic
10054
        if not instance.nics:
10055
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10056
                                     " no NICs" % nic_op,
10057
                                     errors.ECODE_INVAL)
10058
        if nic_op < 0 or nic_op >= len(instance.nics):
10059
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10060
                                     " are 0 to %d" %
10061
                                     (nic_op, len(instance.nics) - 1),
10062
                                     errors.ECODE_INVAL)
10063
        old_nic_params = instance.nics[nic_op].nicparams
10064
        old_nic_ip = instance.nics[nic_op].ip
10065
      else:
10066
        old_nic_params = {}
10067
        old_nic_ip = None
10068

    
10069
      update_params_dict = dict([(key, nic_dict[key])
10070
                                 for key in constants.NICS_PARAMETERS
10071
                                 if key in nic_dict])
10072

    
10073
      if 'bridge' in nic_dict:
10074
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
10075

    
10076
      new_nic_params = _GetUpdatedParams(old_nic_params,
10077
                                         update_params_dict)
10078
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10079
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10080
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10081
      self.nic_pinst[nic_op] = new_nic_params
10082
      self.nic_pnew[nic_op] = new_filled_nic_params
10083
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10084

    
10085
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
10086
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10087
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10088
        if msg:
10089
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10090
          if self.op.force:
10091
            self.warn.append(msg)
10092
          else:
10093
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10094
      if new_nic_mode == constants.NIC_MODE_ROUTED:
10095
        if constants.INIC_IP in nic_dict:
10096
          nic_ip = nic_dict[constants.INIC_IP]
10097
        else:
10098
          nic_ip = old_nic_ip
10099
        if nic_ip is None:
10100
          raise errors.OpPrereqError('Cannot set the nic ip to None'
10101
                                     ' on a routed nic', errors.ECODE_INVAL)
10102
      if constants.INIC_MAC in nic_dict:
10103
        nic_mac = nic_dict[constants.INIC_MAC]
10104
        if nic_mac is None:
10105
          raise errors.OpPrereqError('Cannot set the nic mac to None',
10106
                                     errors.ECODE_INVAL)
10107
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10108
          # otherwise generate the mac
10109
          nic_dict[constants.INIC_MAC] = \
10110
            self.cfg.GenerateMAC(self.proc.GetECId())
10111
        else:
10112
          # or validate/reserve the current one
10113
          try:
10114
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10115
          except errors.ReservationError:
10116
            raise errors.OpPrereqError("MAC address %s already in use"
10117
                                       " in cluster" % nic_mac,
10118
                                       errors.ECODE_NOTUNIQUE)
10119

    
10120
    # DISK processing
10121
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10122
      raise errors.OpPrereqError("Disk operations not supported for"
10123
                                 " diskless instances",
10124
                                 errors.ECODE_INVAL)
10125
    for disk_op, _ in self.op.disks:
10126
      if disk_op == constants.DDM_REMOVE:
10127
        if len(instance.disks) == 1:
10128
          raise errors.OpPrereqError("Cannot remove the last disk of"
10129
                                     " an instance", errors.ECODE_INVAL)
10130
        _CheckInstanceDown(self, instance, "cannot remove disks")
10131

    
10132
      if (disk_op == constants.DDM_ADD and
10133
          len(instance.disks) >= constants.MAX_DISKS):
10134
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10135
                                   " add more" % constants.MAX_DISKS,
10136
                                   errors.ECODE_STATE)
10137
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10138
        # an existing disk
10139
        if disk_op < 0 or disk_op >= len(instance.disks):
10140
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
10141
                                     " are 0 to %d" %
10142
                                     (disk_op, len(instance.disks)),
10143
                                     errors.ECODE_INVAL)
10144

    
10145
    return
10146

    
10147
  def _ConvertPlainToDrbd(self, feedback_fn):
10148
    """Converts an instance from plain to drbd.
10149

10150
    """
10151
    feedback_fn("Converting template to drbd")
10152
    instance = self.instance
10153
    pnode = instance.primary_node
10154
    snode = self.op.remote_node
10155

    
10156
    # create a fake disk info for _GenerateDiskTemplate
10157
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10158
                  constants.IDISK_VG: d.logical_id[0]}
10159
                 for d in instance.disks]
10160
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10161
                                      instance.name, pnode, [snode],
10162
                                      disk_info, None, None, 0, feedback_fn)
10163
    info = _GetInstanceInfoText(instance)
10164
    feedback_fn("Creating aditional volumes...")
10165
    # first, create the missing data and meta devices
10166
    for disk in new_disks:
10167
      # unfortunately this is... not too nice
10168
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10169
                            info, True)
10170
      for child in disk.children:
10171
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
10172
    # at this stage, all new LVs have been created, we can rename the
10173
    # old ones
10174
    feedback_fn("Renaming original volumes...")
10175
    rename_list = [(o, n.children[0].logical_id)
10176
                   for (o, n) in zip(instance.disks, new_disks)]
10177
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10178
    result.Raise("Failed to rename original LVs")
10179

    
10180
    feedback_fn("Initializing DRBD devices...")
10181
    # all child devices are in place, we can now create the DRBD devices
10182
    for disk in new_disks:
10183
      for node in [pnode, snode]:
10184
        f_create = node == pnode
10185
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10186

    
10187
    # at this point, the instance has been modified
10188
    instance.disk_template = constants.DT_DRBD8
10189
    instance.disks = new_disks
10190
    self.cfg.Update(instance, feedback_fn)
10191

    
10192
    # disks are created, waiting for sync
10193
    disk_abort = not _WaitForSync(self, instance)
10194
    if disk_abort:
10195
      raise errors.OpExecError("There are some degraded disks for"
10196
                               " this instance, please cleanup manually")
10197

    
10198
  def _ConvertDrbdToPlain(self, feedback_fn):
10199
    """Converts an instance from drbd to plain.
10200

10201
    """
10202
    instance = self.instance
10203
    assert len(instance.secondary_nodes) == 1
10204
    pnode = instance.primary_node
10205
    snode = instance.secondary_nodes[0]
10206
    feedback_fn("Converting template to plain")
10207

    
10208
    old_disks = instance.disks
10209
    new_disks = [d.children[0] for d in old_disks]
10210

    
10211
    # copy over size and mode
10212
    for parent, child in zip(old_disks, new_disks):
10213
      child.size = parent.size
10214
      child.mode = parent.mode
10215

    
10216
    # update instance structure
10217
    instance.disks = new_disks
10218
    instance.disk_template = constants.DT_PLAIN
10219
    self.cfg.Update(instance, feedback_fn)
10220

    
10221
    feedback_fn("Removing volumes on the secondary node...")
10222
    for disk in old_disks:
10223
      self.cfg.SetDiskID(disk, snode)
10224
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10225
      if msg:
10226
        self.LogWarning("Could not remove block device %s on node %s,"
10227
                        " continuing anyway: %s", disk.iv_name, snode, msg)
10228

    
10229
    feedback_fn("Removing unneeded volumes on the primary node...")
10230
    for idx, disk in enumerate(old_disks):
10231
      meta = disk.children[1]
10232
      self.cfg.SetDiskID(meta, pnode)
10233
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10234
      if msg:
10235
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
10236
                        " continuing anyway: %s", idx, pnode, msg)
10237

    
10238
  def Exec(self, feedback_fn):
10239
    """Modifies an instance.
10240

10241
    All parameters take effect only at the next restart of the instance.
10242

10243
    """
10244
    # Process here the warnings from CheckPrereq, as we don't have a
10245
    # feedback_fn there.
10246
    for warn in self.warn:
10247
      feedback_fn("WARNING: %s" % warn)
10248

    
10249
    result = []
10250
    instance = self.instance
10251
    # disk changes
10252
    for disk_op, disk_dict in self.op.disks:
10253
      if disk_op == constants.DDM_REMOVE:
10254
        # remove the last disk
10255
        device = instance.disks.pop()
10256
        device_idx = len(instance.disks)
10257
        for node, disk in device.ComputeNodeTree(instance.primary_node):
10258
          self.cfg.SetDiskID(disk, node)
10259
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10260
          if msg:
10261
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
10262
                            " continuing anyway", device_idx, node, msg)
10263
        result.append(("disk/%d" % device_idx, "remove"))
10264
      elif disk_op == constants.DDM_ADD:
10265
        # add a new disk
10266
        if instance.disk_template in (constants.DT_FILE,
10267
                                        constants.DT_SHARED_FILE):
10268
          file_driver, file_path = instance.disks[0].logical_id
10269
          file_path = os.path.dirname(file_path)
10270
        else:
10271
          file_driver = file_path = None
10272
        disk_idx_base = len(instance.disks)
10273
        new_disk = _GenerateDiskTemplate(self,
10274
                                         instance.disk_template,
10275
                                         instance.name, instance.primary_node,
10276
                                         instance.secondary_nodes,
10277
                                         [disk_dict],
10278
                                         file_path,
10279
                                         file_driver,
10280
                                         disk_idx_base, feedback_fn)[0]
10281
        instance.disks.append(new_disk)
10282
        info = _GetInstanceInfoText(instance)
10283

    
10284
        logging.info("Creating volume %s for instance %s",
10285
                     new_disk.iv_name, instance.name)
10286
        # Note: this needs to be kept in sync with _CreateDisks
10287
        #HARDCODE
10288
        for node in instance.all_nodes:
10289
          f_create = node == instance.primary_node
10290
          try:
10291
            _CreateBlockDev(self, node, instance, new_disk,
10292
                            f_create, info, f_create)
10293
          except errors.OpExecError, err:
10294
            self.LogWarning("Failed to create volume %s (%s) on"
10295
                            " node %s: %s",
10296
                            new_disk.iv_name, new_disk, node, err)
10297
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10298
                       (new_disk.size, new_disk.mode)))
10299
      else:
10300
        # change a given disk
10301
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10302
        result.append(("disk.mode/%d" % disk_op,
10303
                       disk_dict[constants.IDISK_MODE]))
10304

    
10305
    if self.op.disk_template:
10306
      r_shut = _ShutdownInstanceDisks(self, instance)
10307
      if not r_shut:
10308
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10309
                                 " proceed with disk template conversion")
10310
      mode = (instance.disk_template, self.op.disk_template)
10311
      try:
10312
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
10313
      except:
10314
        self.cfg.ReleaseDRBDMinors(instance.name)
10315
        raise
10316
      result.append(("disk_template", self.op.disk_template))
10317

    
10318
    # NIC changes
10319
    for nic_op, nic_dict in self.op.nics:
10320
      if nic_op == constants.DDM_REMOVE:
10321
        # remove the last nic
10322
        del instance.nics[-1]
10323
        result.append(("nic.%d" % len(instance.nics), "remove"))
10324
      elif nic_op == constants.DDM_ADD:
10325
        # mac and bridge should be set, by now
10326
        mac = nic_dict[constants.INIC_MAC]
10327
        ip = nic_dict.get(constants.INIC_IP, None)
10328
        nicparams = self.nic_pinst[constants.DDM_ADD]
10329
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10330
        instance.nics.append(new_nic)
10331
        result.append(("nic.%d" % (len(instance.nics) - 1),
10332
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
10333
                       (new_nic.mac, new_nic.ip,
10334
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10335
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10336
                       )))
10337
      else:
10338
        for key in (constants.INIC_MAC, constants.INIC_IP):
10339
          if key in nic_dict:
10340
            setattr(instance.nics[nic_op], key, nic_dict[key])
10341
        if nic_op in self.nic_pinst:
10342
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10343
        for key, val in nic_dict.iteritems():
10344
          result.append(("nic.%s/%d" % (key, nic_op), val))
10345

    
10346
    # hvparams changes
10347
    if self.op.hvparams:
10348
      instance.hvparams = self.hv_inst
10349
      for key, val in self.op.hvparams.iteritems():
10350
        result.append(("hv/%s" % key, val))
10351

    
10352
    # beparams changes
10353
    if self.op.beparams:
10354
      instance.beparams = self.be_inst
10355
      for key, val in self.op.beparams.iteritems():
10356
        result.append(("be/%s" % key, val))
10357

    
10358
    # OS change
10359
    if self.op.os_name:
10360
      instance.os = self.op.os_name
10361

    
10362
    # osparams changes
10363
    if self.op.osparams:
10364
      instance.osparams = self.os_inst
10365
      for key, val in self.op.osparams.iteritems():
10366
        result.append(("os/%s" % key, val))
10367

    
10368
    self.cfg.Update(instance, feedback_fn)
10369

    
10370
    return result
10371

    
10372
  _DISK_CONVERSIONS = {
10373
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10374
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10375
    }
10376

    
10377

    
10378
class LUBackupQuery(NoHooksLU):
10379
  """Query the exports list
10380

10381
  """
10382
  REQ_BGL = False
10383

    
10384
  def ExpandNames(self):
10385
    self.needed_locks = {}
10386
    self.share_locks[locking.LEVEL_NODE] = 1
10387
    if not self.op.nodes:
10388
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10389
    else:
10390
      self.needed_locks[locking.LEVEL_NODE] = \
10391
        _GetWantedNodes(self, self.op.nodes)
10392

    
10393
  def Exec(self, feedback_fn):
10394
    """Compute the list of all the exported system images.
10395

10396
    @rtype: dict
10397
    @return: a dictionary with the structure node->(export-list)
10398
        where export-list is a list of the instances exported on
10399
        that node.
10400

10401
    """
10402
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
10403
    rpcresult = self.rpc.call_export_list(self.nodes)
10404
    result = {}
10405
    for node in rpcresult:
10406
      if rpcresult[node].fail_msg:
10407
        result[node] = False
10408
      else:
10409
        result[node] = rpcresult[node].payload
10410

    
10411
    return result
10412

    
10413

    
10414
class LUBackupPrepare(NoHooksLU):
10415
  """Prepares an instance for an export and returns useful information.
10416

10417
  """
10418
  REQ_BGL = False
10419

    
10420
  def ExpandNames(self):
10421
    self._ExpandAndLockInstance()
10422

    
10423
  def CheckPrereq(self):
10424
    """Check prerequisites.
10425

10426
    """
10427
    instance_name = self.op.instance_name
10428

    
10429
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10430
    assert self.instance is not None, \
10431
          "Cannot retrieve locked instance %s" % self.op.instance_name
10432
    _CheckNodeOnline(self, self.instance.primary_node)
10433

    
10434
    self._cds = _GetClusterDomainSecret()
10435

    
10436
  def Exec(self, feedback_fn):
10437
    """Prepares an instance for an export.
10438

10439
    """
10440
    instance = self.instance
10441

    
10442
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10443
      salt = utils.GenerateSecret(8)
10444

    
10445
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10446
      result = self.rpc.call_x509_cert_create(instance.primary_node,
10447
                                              constants.RIE_CERT_VALIDITY)
10448
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
10449

    
10450
      (name, cert_pem) = result.payload
10451

    
10452
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10453
                                             cert_pem)
10454

    
10455
      return {
10456
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10457
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10458
                          salt),
10459
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10460
        }
10461

    
10462
    return None
10463

    
10464

    
10465
class LUBackupExport(LogicalUnit):
10466
  """Export an instance to an image in the cluster.
10467

10468
  """
10469
  HPATH = "instance-export"
10470
  HTYPE = constants.HTYPE_INSTANCE
10471
  REQ_BGL = False
10472

    
10473
  def CheckArguments(self):
10474
    """Check the arguments.
10475

10476
    """
10477
    self.x509_key_name = self.op.x509_key_name
10478
    self.dest_x509_ca_pem = self.op.destination_x509_ca
10479

    
10480
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10481
      if not self.x509_key_name:
10482
        raise errors.OpPrereqError("Missing X509 key name for encryption",
10483
                                   errors.ECODE_INVAL)
10484

    
10485
      if not self.dest_x509_ca_pem:
10486
        raise errors.OpPrereqError("Missing destination X509 CA",
10487
                                   errors.ECODE_INVAL)
10488

    
10489
  def ExpandNames(self):
10490
    self._ExpandAndLockInstance()
10491

    
10492
    # Lock all nodes for local exports
10493
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10494
      # FIXME: lock only instance primary and destination node
10495
      #
10496
      # Sad but true, for now we have do lock all nodes, as we don't know where
10497
      # the previous export might be, and in this LU we search for it and
10498
      # remove it from its current node. In the future we could fix this by:
10499
      #  - making a tasklet to search (share-lock all), then create the
10500
      #    new one, then one to remove, after
10501
      #  - removing the removal operation altogether
10502
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10503

    
10504
  def DeclareLocks(self, level):
10505
    """Last minute lock declaration."""
10506
    # All nodes are locked anyway, so nothing to do here.
10507

    
10508
  def BuildHooksEnv(self):
10509
    """Build hooks env.
10510

10511
    This will run on the master, primary node and target node.
10512

10513
    """
10514
    env = {
10515
      "EXPORT_MODE": self.op.mode,
10516
      "EXPORT_NODE": self.op.target_node,
10517
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10518
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10519
      # TODO: Generic function for boolean env variables
10520
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10521
      }
10522

    
10523
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10524

    
10525
    return env
10526

    
10527
  def BuildHooksNodes(self):
10528
    """Build hooks nodes.
10529

10530
    """
10531
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10532

    
10533
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10534
      nl.append(self.op.target_node)
10535

    
10536
    return (nl, nl)
10537

    
10538
  def CheckPrereq(self):
10539
    """Check prerequisites.
10540

10541
    This checks that the instance and node names are valid.
10542

10543
    """
10544
    instance_name = self.op.instance_name
10545

    
10546
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10547
    assert self.instance is not None, \
10548
          "Cannot retrieve locked instance %s" % self.op.instance_name
10549
    _CheckNodeOnline(self, self.instance.primary_node)
10550

    
10551
    if (self.op.remove_instance and self.instance.admin_up and
10552
        not self.op.shutdown):
10553
      raise errors.OpPrereqError("Can not remove instance without shutting it"
10554
                                 " down before")
10555

    
10556
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10557
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10558
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10559
      assert self.dst_node is not None
10560

    
10561
      _CheckNodeOnline(self, self.dst_node.name)
10562
      _CheckNodeNotDrained(self, self.dst_node.name)
10563

    
10564
      self._cds = None
10565
      self.dest_disk_info = None
10566
      self.dest_x509_ca = None
10567

    
10568
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10569
      self.dst_node = None
10570

    
10571
      if len(self.op.target_node) != len(self.instance.disks):
10572
        raise errors.OpPrereqError(("Received destination information for %s"
10573
                                    " disks, but instance %s has %s disks") %
10574
                                   (len(self.op.target_node), instance_name,
10575
                                    len(self.instance.disks)),
10576
                                   errors.ECODE_INVAL)
10577

    
10578
      cds = _GetClusterDomainSecret()
10579

    
10580
      # Check X509 key name
10581
      try:
10582
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10583
      except (TypeError, ValueError), err:
10584
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10585

    
10586
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10587
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10588
                                   errors.ECODE_INVAL)
10589

    
10590
      # Load and verify CA
10591
      try:
10592
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10593
      except OpenSSL.crypto.Error, err:
10594
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10595
                                   (err, ), errors.ECODE_INVAL)
10596

    
10597
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10598
      if errcode is not None:
10599
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10600
                                   (msg, ), errors.ECODE_INVAL)
10601

    
10602
      self.dest_x509_ca = cert
10603

    
10604
      # Verify target information
10605
      disk_info = []
10606
      for idx, disk_data in enumerate(self.op.target_node):
10607
        try:
10608
          (host, port, magic) = \
10609
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10610
        except errors.GenericError, err:
10611
          raise errors.OpPrereqError("Target info for disk %s: %s" %
10612
                                     (idx, err), errors.ECODE_INVAL)
10613

    
10614
        disk_info.append((host, port, magic))
10615

    
10616
      assert len(disk_info) == len(self.op.target_node)
10617
      self.dest_disk_info = disk_info
10618

    
10619
    else:
10620
      raise errors.ProgrammerError("Unhandled export mode %r" %
10621
                                   self.op.mode)
10622

    
10623
    # instance disk type verification
10624
    # TODO: Implement export support for file-based disks
10625
    for disk in self.instance.disks:
10626
      if disk.dev_type == constants.LD_FILE:
10627
        raise errors.OpPrereqError("Export not supported for instances with"
10628
                                   " file-based disks", errors.ECODE_INVAL)
10629

    
10630
  def _CleanupExports(self, feedback_fn):
10631
    """Removes exports of current instance from all other nodes.
10632

10633
    If an instance in a cluster with nodes A..D was exported to node C, its
10634
    exports will be removed from the nodes A, B and D.
10635

10636
    """
10637
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
10638

    
10639
    nodelist = self.cfg.GetNodeList()
10640
    nodelist.remove(self.dst_node.name)
10641

    
10642
    # on one-node clusters nodelist will be empty after the removal
10643
    # if we proceed the backup would be removed because OpBackupQuery
10644
    # substitutes an empty list with the full cluster node list.
10645
    iname = self.instance.name
10646
    if nodelist:
10647
      feedback_fn("Removing old exports for instance %s" % iname)
10648
      exportlist = self.rpc.call_export_list(nodelist)
10649
      for node in exportlist:
10650
        if exportlist[node].fail_msg:
10651
          continue
10652
        if iname in exportlist[node].payload:
10653
          msg = self.rpc.call_export_remove(node, iname).fail_msg
10654
          if msg:
10655
            self.LogWarning("Could not remove older export for instance %s"
10656
                            " on node %s: %s", iname, node, msg)
10657

    
10658
  def Exec(self, feedback_fn):
10659
    """Export an instance to an image in the cluster.
10660

10661
    """
10662
    assert self.op.mode in constants.EXPORT_MODES
10663

    
10664
    instance = self.instance
10665
    src_node = instance.primary_node
10666

    
10667
    if self.op.shutdown:
10668
      # shutdown the instance, but not the disks
10669
      feedback_fn("Shutting down instance %s" % instance.name)
10670
      result = self.rpc.call_instance_shutdown(src_node, instance,
10671
                                               self.op.shutdown_timeout)
10672
      # TODO: Maybe ignore failures if ignore_remove_failures is set
10673
      result.Raise("Could not shutdown instance %s on"
10674
                   " node %s" % (instance.name, src_node))
10675

    
10676
    # set the disks ID correctly since call_instance_start needs the
10677
    # correct drbd minor to create the symlinks
10678
    for disk in instance.disks:
10679
      self.cfg.SetDiskID(disk, src_node)
10680

    
10681
    activate_disks = (not instance.admin_up)
10682

    
10683
    if activate_disks:
10684
      # Activate the instance disks if we'exporting a stopped instance
10685
      feedback_fn("Activating disks for %s" % instance.name)
10686
      _StartInstanceDisks(self, instance, None)
10687

    
10688
    try:
10689
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10690
                                                     instance)
10691

    
10692
      helper.CreateSnapshots()
10693
      try:
10694
        if (self.op.shutdown and instance.admin_up and
10695
            not self.op.remove_instance):
10696
          assert not activate_disks
10697
          feedback_fn("Starting instance %s" % instance.name)
10698
          result = self.rpc.call_instance_start(src_node, instance, None, None)
10699
          msg = result.fail_msg
10700
          if msg:
10701
            feedback_fn("Failed to start instance: %s" % msg)
10702
            _ShutdownInstanceDisks(self, instance)
10703
            raise errors.OpExecError("Could not start instance: %s" % msg)
10704

    
10705
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
10706
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10707
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10708
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
10709
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10710

    
10711
          (key_name, _, _) = self.x509_key_name
10712

    
10713
          dest_ca_pem = \
10714
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10715
                                            self.dest_x509_ca)
10716

    
10717
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10718
                                                     key_name, dest_ca_pem,
10719
                                                     timeouts)
10720
      finally:
10721
        helper.Cleanup()
10722

    
10723
      # Check for backwards compatibility
10724
      assert len(dresults) == len(instance.disks)
10725
      assert compat.all(isinstance(i, bool) for i in dresults), \
10726
             "Not all results are boolean: %r" % dresults
10727

    
10728
    finally:
10729
      if activate_disks:
10730
        feedback_fn("Deactivating disks for %s" % instance.name)
10731
        _ShutdownInstanceDisks(self, instance)
10732

    
10733
    if not (compat.all(dresults) and fin_resu):
10734
      failures = []
10735
      if not fin_resu:
10736
        failures.append("export finalization")
10737
      if not compat.all(dresults):
10738
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10739
                               if not dsk)
10740
        failures.append("disk export: disk(s) %s" % fdsk)
10741

    
10742
      raise errors.OpExecError("Export failed, errors in %s" %
10743
                               utils.CommaJoin(failures))
10744

    
10745
    # At this point, the export was successful, we can cleanup/finish
10746

    
10747
    # Remove instance if requested
10748
    if self.op.remove_instance:
10749
      feedback_fn("Removing instance %s" % instance.name)
10750
      _RemoveInstance(self, feedback_fn, instance,
10751
                      self.op.ignore_remove_failures)
10752

    
10753
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10754
      self._CleanupExports(feedback_fn)
10755

    
10756
    return fin_resu, dresults
10757

    
10758

    
10759
class LUBackupRemove(NoHooksLU):
10760
  """Remove exports related to the named instance.
10761

10762
  """
10763
  REQ_BGL = False
10764

    
10765
  def ExpandNames(self):
10766
    self.needed_locks = {}
10767
    # We need all nodes to be locked in order for RemoveExport to work, but we
10768
    # don't need to lock the instance itself, as nothing will happen to it (and
10769
    # we can remove exports also for a removed instance)
10770
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10771

    
10772
  def Exec(self, feedback_fn):
10773
    """Remove any export.
10774

10775
    """
10776
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10777
    # If the instance was not found we'll try with the name that was passed in.
10778
    # This will only work if it was an FQDN, though.
10779
    fqdn_warn = False
10780
    if not instance_name:
10781
      fqdn_warn = True
10782
      instance_name = self.op.instance_name
10783

    
10784
    locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
10785
    exportlist = self.rpc.call_export_list(locked_nodes)
10786
    found = False
10787
    for node in exportlist:
10788
      msg = exportlist[node].fail_msg
10789
      if msg:
10790
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10791
        continue
10792
      if instance_name in exportlist[node].payload:
10793
        found = True
10794
        result = self.rpc.call_export_remove(node, instance_name)
10795
        msg = result.fail_msg
10796
        if msg:
10797
          logging.error("Could not remove export for instance %s"
10798
                        " on node %s: %s", instance_name, node, msg)
10799

    
10800
    if fqdn_warn and not found:
10801
      feedback_fn("Export not found. If trying to remove an export belonging"
10802
                  " to a deleted instance please use its Fully Qualified"
10803
                  " Domain Name.")
10804

    
10805

    
10806
class LUGroupAdd(LogicalUnit):
10807
  """Logical unit for creating node groups.
10808

10809
  """
10810
  HPATH = "group-add"
10811
  HTYPE = constants.HTYPE_GROUP
10812
  REQ_BGL = False
10813

    
10814
  def ExpandNames(self):
10815
    # We need the new group's UUID here so that we can create and acquire the
10816
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10817
    # that it should not check whether the UUID exists in the configuration.
10818
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10819
    self.needed_locks = {}
10820
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10821

    
10822
  def CheckPrereq(self):
10823
    """Check prerequisites.
10824

10825
    This checks that the given group name is not an existing node group
10826
    already.
10827

10828
    """
10829
    try:
10830
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10831
    except errors.OpPrereqError:
10832
      pass
10833
    else:
10834
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10835
                                 " node group (UUID: %s)" %
10836
                                 (self.op.group_name, existing_uuid),
10837
                                 errors.ECODE_EXISTS)
10838

    
10839
    if self.op.ndparams:
10840
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10841

    
10842
  def BuildHooksEnv(self):
10843
    """Build hooks env.
10844

10845
    """
10846
    return {
10847
      "GROUP_NAME": self.op.group_name,
10848
      }
10849

    
10850
  def BuildHooksNodes(self):
10851
    """Build hooks nodes.
10852

10853
    """
10854
    mn = self.cfg.GetMasterNode()
10855
    return ([mn], [mn])
10856

    
10857
  def Exec(self, feedback_fn):
10858
    """Add the node group to the cluster.
10859

10860
    """
10861
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10862
                                  uuid=self.group_uuid,
10863
                                  alloc_policy=self.op.alloc_policy,
10864
                                  ndparams=self.op.ndparams)
10865

    
10866
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10867
    del self.remove_locks[locking.LEVEL_NODEGROUP]
10868

    
10869

    
10870
class LUGroupAssignNodes(NoHooksLU):
10871
  """Logical unit for assigning nodes to groups.
10872

10873
  """
10874
  REQ_BGL = False
10875

    
10876
  def ExpandNames(self):
10877
    # These raise errors.OpPrereqError on their own:
10878
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10879
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10880

    
10881
    # We want to lock all the affected nodes and groups. We have readily
10882
    # available the list of nodes, and the *destination* group. To gather the
10883
    # list of "source" groups, we need to fetch node information.
10884
    self.node_data = self.cfg.GetAllNodesInfo()
10885
    affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10886
    affected_groups.add(self.group_uuid)
10887

    
10888
    self.needed_locks = {
10889
      locking.LEVEL_NODEGROUP: list(affected_groups),
10890
      locking.LEVEL_NODE: self.op.nodes,
10891
      }
10892

    
10893
  def CheckPrereq(self):
10894
    """Check prerequisites.
10895

10896
    """
10897
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10898
    instance_data = self.cfg.GetAllInstancesInfo()
10899

    
10900
    if self.group is None:
10901
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10902
                               (self.op.group_name, self.group_uuid))
10903

    
10904
    (new_splits, previous_splits) = \
10905
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10906
                                             for node in self.op.nodes],
10907
                                            self.node_data, instance_data)
10908

    
10909
    if new_splits:
10910
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10911

    
10912
      if not self.op.force:
10913
        raise errors.OpExecError("The following instances get split by this"
10914
                                 " change and --force was not given: %s" %
10915
                                 fmt_new_splits)
10916
      else:
10917
        self.LogWarning("This operation will split the following instances: %s",
10918
                        fmt_new_splits)
10919

    
10920
        if previous_splits:
10921
          self.LogWarning("In addition, these already-split instances continue"
10922
                          " to be split across groups: %s",
10923
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
10924

    
10925
  def Exec(self, feedback_fn):
10926
    """Assign nodes to a new group.
10927

10928
    """
10929
    for node in self.op.nodes:
10930
      self.node_data[node].group = self.group_uuid
10931

    
10932
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10933

    
10934
  @staticmethod
10935
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10936
    """Check for split instances after a node assignment.
10937

10938
    This method considers a series of node assignments as an atomic operation,
10939
    and returns information about split instances after applying the set of
10940
    changes.
10941

10942
    In particular, it returns information about newly split instances, and
10943
    instances that were already split, and remain so after the change.
10944

10945
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10946
    considered.
10947

10948
    @type changes: list of (node_name, new_group_uuid) pairs.
10949
    @param changes: list of node assignments to consider.
10950
    @param node_data: a dict with data for all nodes
10951
    @param instance_data: a dict with all instances to consider
10952
    @rtype: a two-tuple
10953
    @return: a list of instances that were previously okay and result split as a
10954
      consequence of this change, and a list of instances that were previously
10955
      split and this change does not fix.
10956

10957
    """
10958
    changed_nodes = dict((node, group) for node, group in changes
10959
                         if node_data[node].group != group)
10960

    
10961
    all_split_instances = set()
10962
    previously_split_instances = set()
10963

    
10964
    def InstanceNodes(instance):
10965
      return [instance.primary_node] + list(instance.secondary_nodes)
10966

    
10967
    for inst in instance_data.values():
10968
      if inst.disk_template not in constants.DTS_INT_MIRROR:
10969
        continue
10970

    
10971
      instance_nodes = InstanceNodes(inst)
10972

    
10973
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
10974
        previously_split_instances.add(inst.name)
10975

    
10976
      if len(set(changed_nodes.get(node, node_data[node].group)
10977
                 for node in instance_nodes)) > 1:
10978
        all_split_instances.add(inst.name)
10979

    
10980
    return (list(all_split_instances - previously_split_instances),
10981
            list(previously_split_instances & all_split_instances))
10982

    
10983

    
10984
class _GroupQuery(_QueryBase):
10985
  FIELDS = query.GROUP_FIELDS
10986

    
10987
  def ExpandNames(self, lu):
10988
    lu.needed_locks = {}
10989

    
10990
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10991
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10992

    
10993
    if not self.names:
10994
      self.wanted = [name_to_uuid[name]
10995
                     for name in utils.NiceSort(name_to_uuid.keys())]
10996
    else:
10997
      # Accept names to be either names or UUIDs.
10998
      missing = []
10999
      self.wanted = []
11000
      all_uuid = frozenset(self._all_groups.keys())
11001

    
11002
      for name in self.names:
11003
        if name in all_uuid:
11004
          self.wanted.append(name)
11005
        elif name in name_to_uuid:
11006
          self.wanted.append(name_to_uuid[name])
11007
        else:
11008
          missing.append(name)
11009

    
11010
      if missing:
11011
        raise errors.OpPrereqError("Some groups do not exist: %s" %
11012
                                   utils.CommaJoin(missing),
11013
                                   errors.ECODE_NOENT)
11014

    
11015
  def DeclareLocks(self, lu, level):
11016
    pass
11017

    
11018
  def _GetQueryData(self, lu):
11019
    """Computes the list of node groups and their attributes.
11020

11021
    """
11022
    do_nodes = query.GQ_NODE in self.requested_data
11023
    do_instances = query.GQ_INST in self.requested_data
11024

    
11025
    group_to_nodes = None
11026
    group_to_instances = None
11027

    
11028
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11029
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11030
    # latter GetAllInstancesInfo() is not enough, for we have to go through
11031
    # instance->node. Hence, we will need to process nodes even if we only need
11032
    # instance information.
11033
    if do_nodes or do_instances:
11034
      all_nodes = lu.cfg.GetAllNodesInfo()
11035
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11036
      node_to_group = {}
11037

    
11038
      for node in all_nodes.values():
11039
        if node.group in group_to_nodes:
11040
          group_to_nodes[node.group].append(node.name)
11041
          node_to_group[node.name] = node.group
11042

    
11043
      if do_instances:
11044
        all_instances = lu.cfg.GetAllInstancesInfo()
11045
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
11046

    
11047
        for instance in all_instances.values():
11048
          node = instance.primary_node
11049
          if node in node_to_group:
11050
            group_to_instances[node_to_group[node]].append(instance.name)
11051

    
11052
        if not do_nodes:
11053
          # Do not pass on node information if it was not requested.
11054
          group_to_nodes = None
11055

    
11056
    return query.GroupQueryData([self._all_groups[uuid]
11057
                                 for uuid in self.wanted],
11058
                                group_to_nodes, group_to_instances)
11059

    
11060

    
11061
class LUGroupQuery(NoHooksLU):
11062
  """Logical unit for querying node groups.
11063

11064
  """
11065
  REQ_BGL = False
11066

    
11067
  def CheckArguments(self):
11068
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11069
                          self.op.output_fields, False)
11070

    
11071
  def ExpandNames(self):
11072
    self.gq.ExpandNames(self)
11073

    
11074
  def Exec(self, feedback_fn):
11075
    return self.gq.OldStyleQuery(self)
11076

    
11077

    
11078
class LUGroupSetParams(LogicalUnit):
11079
  """Modifies the parameters of a node group.
11080

11081
  """
11082
  HPATH = "group-modify"
11083
  HTYPE = constants.HTYPE_GROUP
11084
  REQ_BGL = False
11085

    
11086
  def CheckArguments(self):
11087
    all_changes = [
11088
      self.op.ndparams,
11089
      self.op.alloc_policy,
11090
      ]
11091

    
11092
    if all_changes.count(None) == len(all_changes):
11093
      raise errors.OpPrereqError("Please pass at least one modification",
11094
                                 errors.ECODE_INVAL)
11095

    
11096
  def ExpandNames(self):
11097
    # This raises errors.OpPrereqError on its own:
11098
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11099

    
11100
    self.needed_locks = {
11101
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11102
      }
11103

    
11104
  def CheckPrereq(self):
11105
    """Check prerequisites.
11106

11107
    """
11108
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11109

    
11110
    if self.group is None:
11111
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11112
                               (self.op.group_name, self.group_uuid))
11113

    
11114
    if self.op.ndparams:
11115
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11116
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11117
      self.new_ndparams = new_ndparams
11118

    
11119
  def BuildHooksEnv(self):
11120
    """Build hooks env.
11121

11122
    """
11123
    return {
11124
      "GROUP_NAME": self.op.group_name,
11125
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
11126
      }
11127

    
11128
  def BuildHooksNodes(self):
11129
    """Build hooks nodes.
11130

11131
    """
11132
    mn = self.cfg.GetMasterNode()
11133
    return ([mn], [mn])
11134

    
11135
  def Exec(self, feedback_fn):
11136
    """Modifies the node group.
11137

11138
    """
11139
    result = []
11140

    
11141
    if self.op.ndparams:
11142
      self.group.ndparams = self.new_ndparams
11143
      result.append(("ndparams", str(self.group.ndparams)))
11144

    
11145
    if self.op.alloc_policy:
11146
      self.group.alloc_policy = self.op.alloc_policy
11147

    
11148
    self.cfg.Update(self.group, feedback_fn)
11149
    return result
11150

    
11151

    
11152

    
11153
class LUGroupRemove(LogicalUnit):
11154
  HPATH = "group-remove"
11155
  HTYPE = constants.HTYPE_GROUP
11156
  REQ_BGL = False
11157

    
11158
  def ExpandNames(self):
11159
    # This will raises errors.OpPrereqError on its own:
11160
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11161
    self.needed_locks = {
11162
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11163
      }
11164

    
11165
  def CheckPrereq(self):
11166
    """Check prerequisites.
11167

11168
    This checks that the given group name exists as a node group, that is
11169
    empty (i.e., contains no nodes), and that is not the last group of the
11170
    cluster.
11171

11172
    """
11173
    # Verify that the group is empty.
11174
    group_nodes = [node.name
11175
                   for node in self.cfg.GetAllNodesInfo().values()
11176
                   if node.group == self.group_uuid]
11177

    
11178
    if group_nodes:
11179
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
11180
                                 " nodes: %s" %
11181
                                 (self.op.group_name,
11182
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
11183
                                 errors.ECODE_STATE)
11184

    
11185
    # Verify the cluster would not be left group-less.
11186
    if len(self.cfg.GetNodeGroupList()) == 1:
11187
      raise errors.OpPrereqError("Group '%s' is the only group,"
11188
                                 " cannot be removed" %
11189
                                 self.op.group_name,
11190
                                 errors.ECODE_STATE)
11191

    
11192
  def BuildHooksEnv(self):
11193
    """Build hooks env.
11194

11195
    """
11196
    return {
11197
      "GROUP_NAME": self.op.group_name,
11198
      }
11199

    
11200
  def BuildHooksNodes(self):
11201
    """Build hooks nodes.
11202

11203
    """
11204
    mn = self.cfg.GetMasterNode()
11205
    return ([mn], [mn])
11206

    
11207
  def Exec(self, feedback_fn):
11208
    """Remove the node group.
11209

11210
    """
11211
    try:
11212
      self.cfg.RemoveNodeGroup(self.group_uuid)
11213
    except errors.ConfigurationError:
11214
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11215
                               (self.op.group_name, self.group_uuid))
11216

    
11217
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11218

    
11219

    
11220
class LUGroupRename(LogicalUnit):
11221
  HPATH = "group-rename"
11222
  HTYPE = constants.HTYPE_GROUP
11223
  REQ_BGL = False
11224

    
11225
  def ExpandNames(self):
11226
    # This raises errors.OpPrereqError on its own:
11227
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11228

    
11229
    self.needed_locks = {
11230
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11231
      }
11232

    
11233
  def CheckPrereq(self):
11234
    """Check prerequisites.
11235

11236
    Ensures requested new name is not yet used.
11237

11238
    """
11239
    try:
11240
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11241
    except errors.OpPrereqError:
11242
      pass
11243
    else:
11244
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11245
                                 " node group (UUID: %s)" %
11246
                                 (self.op.new_name, new_name_uuid),
11247
                                 errors.ECODE_EXISTS)
11248

    
11249
  def BuildHooksEnv(self):
11250
    """Build hooks env.
11251

11252
    """
11253
    return {
11254
      "OLD_NAME": self.op.group_name,
11255
      "NEW_NAME": self.op.new_name,
11256
      }
11257

    
11258
  def BuildHooksNodes(self):
11259
    """Build hooks nodes.
11260

11261
    """
11262
    mn = self.cfg.GetMasterNode()
11263

    
11264
    all_nodes = self.cfg.GetAllNodesInfo()
11265
    all_nodes.pop(mn, None)
11266

    
11267
    run_nodes = [mn]
11268
    run_nodes.extend(node.name for node in all_nodes.values()
11269
                     if node.group == self.group_uuid)
11270

    
11271
    return (run_nodes, run_nodes)
11272

    
11273
  def Exec(self, feedback_fn):
11274
    """Rename the node group.
11275

11276
    """
11277
    group = self.cfg.GetNodeGroup(self.group_uuid)
11278

    
11279
    if group is None:
11280
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11281
                               (self.op.group_name, self.group_uuid))
11282

    
11283
    group.name = self.op.new_name
11284
    self.cfg.Update(group, feedback_fn)
11285

    
11286
    return self.op.new_name
11287

    
11288

    
11289
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11290
  """Generic tags LU.
11291

11292
  This is an abstract class which is the parent of all the other tags LUs.
11293

11294
  """
11295
  def ExpandNames(self):
11296
    self.group_uuid = None
11297
    self.needed_locks = {}
11298
    if self.op.kind == constants.TAG_NODE:
11299
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11300
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
11301
    elif self.op.kind == constants.TAG_INSTANCE:
11302
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11303
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11304
    elif self.op.kind == constants.TAG_NODEGROUP:
11305
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11306

    
11307
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11308
    # not possible to acquire the BGL based on opcode parameters)
11309

    
11310
  def CheckPrereq(self):
11311
    """Check prerequisites.
11312

11313
    """
11314
    if self.op.kind == constants.TAG_CLUSTER:
11315
      self.target = self.cfg.GetClusterInfo()
11316
    elif self.op.kind == constants.TAG_NODE:
11317
      self.target = self.cfg.GetNodeInfo(self.op.name)
11318
    elif self.op.kind == constants.TAG_INSTANCE:
11319
      self.target = self.cfg.GetInstanceInfo(self.op.name)
11320
    elif self.op.kind == constants.TAG_NODEGROUP:
11321
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
11322
    else:
11323
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11324
                                 str(self.op.kind), errors.ECODE_INVAL)
11325

    
11326

    
11327
class LUTagsGet(TagsLU):
11328
  """Returns the tags of a given object.
11329

11330
  """
11331
  REQ_BGL = False
11332

    
11333
  def ExpandNames(self):
11334
    TagsLU.ExpandNames(self)
11335

    
11336
    # Share locks as this is only a read operation
11337
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11338

    
11339
  def Exec(self, feedback_fn):
11340
    """Returns the tag list.
11341

11342
    """
11343
    return list(self.target.GetTags())
11344

    
11345

    
11346
class LUTagsSearch(NoHooksLU):
11347
  """Searches the tags for a given pattern.
11348

11349
  """
11350
  REQ_BGL = False
11351

    
11352
  def ExpandNames(self):
11353
    self.needed_locks = {}
11354

    
11355
  def CheckPrereq(self):
11356
    """Check prerequisites.
11357

11358
    This checks the pattern passed for validity by compiling it.
11359

11360
    """
11361
    try:
11362
      self.re = re.compile(self.op.pattern)
11363
    except re.error, err:
11364
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11365
                                 (self.op.pattern, err), errors.ECODE_INVAL)
11366

    
11367
  def Exec(self, feedback_fn):
11368
    """Returns the tag list.
11369

11370
    """
11371
    cfg = self.cfg
11372
    tgts = [("/cluster", cfg.GetClusterInfo())]
11373
    ilist = cfg.GetAllInstancesInfo().values()
11374
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11375
    nlist = cfg.GetAllNodesInfo().values()
11376
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11377
    tgts.extend(("/nodegroup/%s" % n.name, n)
11378
                for n in cfg.GetAllNodeGroupsInfo().values())
11379
    results = []
11380
    for path, target in tgts:
11381
      for tag in target.GetTags():
11382
        if self.re.search(tag):
11383
          results.append((path, tag))
11384
    return results
11385

    
11386

    
11387
class LUTagsSet(TagsLU):
11388
  """Sets a tag on a given object.
11389

11390
  """
11391
  REQ_BGL = False
11392

    
11393
  def CheckPrereq(self):
11394
    """Check prerequisites.
11395

11396
    This checks the type and length of the tag name and value.
11397

11398
    """
11399
    TagsLU.CheckPrereq(self)
11400
    for tag in self.op.tags:
11401
      objects.TaggableObject.ValidateTag(tag)
11402

    
11403
  def Exec(self, feedback_fn):
11404
    """Sets the tag.
11405

11406
    """
11407
    try:
11408
      for tag in self.op.tags:
11409
        self.target.AddTag(tag)
11410
    except errors.TagError, err:
11411
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
11412
    self.cfg.Update(self.target, feedback_fn)
11413

    
11414

    
11415
class LUTagsDel(TagsLU):
11416
  """Delete a list of tags from a given object.
11417

11418
  """
11419
  REQ_BGL = False
11420

    
11421
  def CheckPrereq(self):
11422
    """Check prerequisites.
11423

11424
    This checks that we have the given tag.
11425

11426
    """
11427
    TagsLU.CheckPrereq(self)
11428
    for tag in self.op.tags:
11429
      objects.TaggableObject.ValidateTag(tag)
11430
    del_tags = frozenset(self.op.tags)
11431
    cur_tags = self.target.GetTags()
11432

    
11433
    diff_tags = del_tags - cur_tags
11434
    if diff_tags:
11435
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
11436
      raise errors.OpPrereqError("Tag(s) %s not found" %
11437
                                 (utils.CommaJoin(diff_names), ),
11438
                                 errors.ECODE_NOENT)
11439

    
11440
  def Exec(self, feedback_fn):
11441
    """Remove the tag from the object.
11442

11443
    """
11444
    for tag in self.op.tags:
11445
      self.target.RemoveTag(tag)
11446
    self.cfg.Update(self.target, feedback_fn)
11447

    
11448

    
11449
class LUTestDelay(NoHooksLU):
11450
  """Sleep for a specified amount of time.
11451

11452
  This LU sleeps on the master and/or nodes for a specified amount of
11453
  time.
11454

11455
  """
11456
  REQ_BGL = False
11457

    
11458
  def ExpandNames(self):
11459
    """Expand names and set required locks.
11460

11461
    This expands the node list, if any.
11462

11463
    """
11464
    self.needed_locks = {}
11465
    if self.op.on_nodes:
11466
      # _GetWantedNodes can be used here, but is not always appropriate to use
11467
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11468
      # more information.
11469
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11470
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11471

    
11472
  def _TestDelay(self):
11473
    """Do the actual sleep.
11474

11475
    """
11476
    if self.op.on_master:
11477
      if not utils.TestDelay(self.op.duration):
11478
        raise errors.OpExecError("Error during master delay test")
11479
    if self.op.on_nodes:
11480
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11481
      for node, node_result in result.items():
11482
        node_result.Raise("Failure during rpc call to node %s" % node)
11483

    
11484
  def Exec(self, feedback_fn):
11485
    """Execute the test delay opcode, with the wanted repetitions.
11486

11487
    """
11488
    if self.op.repeat == 0:
11489
      self._TestDelay()
11490
    else:
11491
      top_value = self.op.repeat - 1
11492
      for i in range(self.op.repeat):
11493
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11494
        self._TestDelay()
11495

    
11496

    
11497
class LUTestJqueue(NoHooksLU):
11498
  """Utility LU to test some aspects of the job queue.
11499

11500
  """
11501
  REQ_BGL = False
11502

    
11503
  # Must be lower than default timeout for WaitForJobChange to see whether it
11504
  # notices changed jobs
11505
  _CLIENT_CONNECT_TIMEOUT = 20.0
11506
  _CLIENT_CONFIRM_TIMEOUT = 60.0
11507

    
11508
  @classmethod
11509
  def _NotifyUsingSocket(cls, cb, errcls):
11510
    """Opens a Unix socket and waits for another program to connect.
11511

11512
    @type cb: callable
11513
    @param cb: Callback to send socket name to client
11514
    @type errcls: class
11515
    @param errcls: Exception class to use for errors
11516

11517
    """
11518
    # Using a temporary directory as there's no easy way to create temporary
11519
    # sockets without writing a custom loop around tempfile.mktemp and
11520
    # socket.bind
11521
    tmpdir = tempfile.mkdtemp()
11522
    try:
11523
      tmpsock = utils.PathJoin(tmpdir, "sock")
11524

    
11525
      logging.debug("Creating temporary socket at %s", tmpsock)
11526
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11527
      try:
11528
        sock.bind(tmpsock)
11529
        sock.listen(1)
11530

    
11531
        # Send details to client
11532
        cb(tmpsock)
11533

    
11534
        # Wait for client to connect before continuing
11535
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11536
        try:
11537
          (conn, _) = sock.accept()
11538
        except socket.error, err:
11539
          raise errcls("Client didn't connect in time (%s)" % err)
11540
      finally:
11541
        sock.close()
11542
    finally:
11543
      # Remove as soon as client is connected
11544
      shutil.rmtree(tmpdir)
11545

    
11546
    # Wait for client to close
11547
    try:
11548
      try:
11549
        # pylint: disable-msg=E1101
11550
        # Instance of '_socketobject' has no ... member
11551
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11552
        conn.recv(1)
11553
      except socket.error, err:
11554
        raise errcls("Client failed to confirm notification (%s)" % err)
11555
    finally:
11556
      conn.close()
11557

    
11558
  def _SendNotification(self, test, arg, sockname):
11559
    """Sends a notification to the client.
11560

11561
    @type test: string
11562
    @param test: Test name
11563
    @param arg: Test argument (depends on test)
11564
    @type sockname: string
11565
    @param sockname: Socket path
11566

11567
    """
11568
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11569

    
11570
  def _Notify(self, prereq, test, arg):
11571
    """Notifies the client of a test.
11572

11573
    @type prereq: bool
11574
    @param prereq: Whether this is a prereq-phase test
11575
    @type test: string
11576
    @param test: Test name
11577
    @param arg: Test argument (depends on test)
11578

11579
    """
11580
    if prereq:
11581
      errcls = errors.OpPrereqError
11582
    else:
11583
      errcls = errors.OpExecError
11584

    
11585
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11586
                                                  test, arg),
11587
                                   errcls)
11588

    
11589
  def CheckArguments(self):
11590
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11591
    self.expandnames_calls = 0
11592

    
11593
  def ExpandNames(self):
11594
    checkargs_calls = getattr(self, "checkargs_calls", 0)
11595
    if checkargs_calls < 1:
11596
      raise errors.ProgrammerError("CheckArguments was not called")
11597

    
11598
    self.expandnames_calls += 1
11599

    
11600
    if self.op.notify_waitlock:
11601
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
11602

    
11603
    self.LogInfo("Expanding names")
11604

    
11605
    # Get lock on master node (just to get a lock, not for a particular reason)
11606
    self.needed_locks = {
11607
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11608
      }
11609

    
11610
  def Exec(self, feedback_fn):
11611
    if self.expandnames_calls < 1:
11612
      raise errors.ProgrammerError("ExpandNames was not called")
11613

    
11614
    if self.op.notify_exec:
11615
      self._Notify(False, constants.JQT_EXEC, None)
11616

    
11617
    self.LogInfo("Executing")
11618

    
11619
    if self.op.log_messages:
11620
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11621
      for idx, msg in enumerate(self.op.log_messages):
11622
        self.LogInfo("Sending log message %s", idx + 1)
11623
        feedback_fn(constants.JQT_MSGPREFIX + msg)
11624
        # Report how many test messages have been sent
11625
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11626

    
11627
    if self.op.fail:
11628
      raise errors.OpExecError("Opcode failure was requested")
11629

    
11630
    return True
11631

    
11632

    
11633
class IAllocator(object):
11634
  """IAllocator framework.
11635

11636
  An IAllocator instance has three sets of attributes:
11637
    - cfg that is needed to query the cluster
11638
    - input data (all members of the _KEYS class attribute are required)
11639
    - four buffer attributes (in|out_data|text), that represent the
11640
      input (to the external script) in text and data structure format,
11641
      and the output from it, again in two formats
11642
    - the result variables from the script (success, info, nodes) for
11643
      easy usage
11644

11645
  """
11646
  # pylint: disable-msg=R0902
11647
  # lots of instance attributes
11648
  _ALLO_KEYS = [
11649
    "name", "mem_size", "disks", "disk_template",
11650
    "os", "tags", "nics", "vcpus", "hypervisor",
11651
    ]
11652
  _RELO_KEYS = [
11653
    "name", "relocate_from",
11654
    ]
11655
  _EVAC_KEYS = [
11656
    "evac_nodes",
11657
    ]
11658

    
11659
  def __init__(self, cfg, rpc, mode, **kwargs):
11660
    self.cfg = cfg
11661
    self.rpc = rpc
11662
    # init buffer variables
11663
    self.in_text = self.out_text = self.in_data = self.out_data = None
11664
    # init all input fields so that pylint is happy
11665
    self.mode = mode
11666
    self.mem_size = self.disks = self.disk_template = None
11667
    self.os = self.tags = self.nics = self.vcpus = None
11668
    self.hypervisor = None
11669
    self.relocate_from = None
11670
    self.name = None
11671
    self.evac_nodes = None
11672
    # computed fields
11673
    self.required_nodes = None
11674
    # init result fields
11675
    self.success = self.info = self.result = None
11676
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11677
      keyset = self._ALLO_KEYS
11678
      fn = self._AddNewInstance
11679
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11680
      keyset = self._RELO_KEYS
11681
      fn = self._AddRelocateInstance
11682
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11683
      keyset = self._EVAC_KEYS
11684
      fn = self._AddEvacuateNodes
11685
    else:
11686
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11687
                                   " IAllocator" % self.mode)
11688
    for key in kwargs:
11689
      if key not in keyset:
11690
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
11691
                                     " IAllocator" % key)
11692
      setattr(self, key, kwargs[key])
11693

    
11694
    for key in keyset:
11695
      if key not in kwargs:
11696
        raise errors.ProgrammerError("Missing input parameter '%s' to"
11697
                                     " IAllocator" % key)
11698
    self._BuildInputData(fn)
11699

    
11700
  def _ComputeClusterData(self):
11701
    """Compute the generic allocator input data.
11702

11703
    This is the data that is independent of the actual operation.
11704

11705
    """
11706
    cfg = self.cfg
11707
    cluster_info = cfg.GetClusterInfo()
11708
    # cluster data
11709
    data = {
11710
      "version": constants.IALLOCATOR_VERSION,
11711
      "cluster_name": cfg.GetClusterName(),
11712
      "cluster_tags": list(cluster_info.GetTags()),
11713
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11714
      # we don't have job IDs
11715
      }
11716
    ninfo = cfg.GetAllNodesInfo()
11717
    iinfo = cfg.GetAllInstancesInfo().values()
11718
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11719

    
11720
    # node data
11721
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
11722

    
11723
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11724
      hypervisor_name = self.hypervisor
11725
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11726
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11727
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11728
      hypervisor_name = cluster_info.enabled_hypervisors[0]
11729

    
11730
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11731
                                        hypervisor_name)
11732
    node_iinfo = \
11733
      self.rpc.call_all_instances_info(node_list,
11734
                                       cluster_info.enabled_hypervisors)
11735

    
11736
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11737

    
11738
    config_ndata = self._ComputeBasicNodeData(ninfo)
11739
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11740
                                                 i_list, config_ndata)
11741
    assert len(data["nodes"]) == len(ninfo), \
11742
        "Incomplete node data computed"
11743

    
11744
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11745

    
11746
    self.in_data = data
11747

    
11748
  @staticmethod
11749
  def _ComputeNodeGroupData(cfg):
11750
    """Compute node groups data.
11751

11752
    """
11753
    ng = {}
11754
    for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11755
      ng[guuid] = {
11756
        "name": gdata.name,
11757
        "alloc_policy": gdata.alloc_policy,
11758
        }
11759
    return ng
11760

    
11761
  @staticmethod
11762
  def _ComputeBasicNodeData(node_cfg):
11763
    """Compute global node data.
11764

11765
    @rtype: dict
11766
    @returns: a dict of name: (node dict, node config)
11767

11768
    """
11769
    node_results = {}
11770
    for ninfo in node_cfg.values():
11771
      # fill in static (config-based) values
11772
      pnr = {
11773
        "tags": list(ninfo.GetTags()),
11774
        "primary_ip": ninfo.primary_ip,
11775
        "secondary_ip": ninfo.secondary_ip,
11776
        "offline": ninfo.offline,
11777
        "drained": ninfo.drained,
11778
        "master_candidate": ninfo.master_candidate,
11779
        "group": ninfo.group,
11780
        "master_capable": ninfo.master_capable,
11781
        "vm_capable": ninfo.vm_capable,
11782
        }
11783

    
11784
      node_results[ninfo.name] = pnr
11785

    
11786
    return node_results
11787

    
11788
  @staticmethod
11789
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11790
                              node_results):
11791
    """Compute global node data.
11792

11793
    @param node_results: the basic node structures as filled from the config
11794

11795
    """
11796
    # make a copy of the current dict
11797
    node_results = dict(node_results)
11798
    for nname, nresult in node_data.items():
11799
      assert nname in node_results, "Missing basic data for node %s" % nname
11800
      ninfo = node_cfg[nname]
11801

    
11802
      if not (ninfo.offline or ninfo.drained):
11803
        nresult.Raise("Can't get data for node %s" % nname)
11804
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11805
                                nname)
11806
        remote_info = nresult.payload
11807

    
11808
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
11809
                     'vg_size', 'vg_free', 'cpu_total']:
11810
          if attr not in remote_info:
11811
            raise errors.OpExecError("Node '%s' didn't return attribute"
11812
                                     " '%s'" % (nname, attr))
11813
          if not isinstance(remote_info[attr], int):
11814
            raise errors.OpExecError("Node '%s' returned invalid value"
11815
                                     " for '%s': %s" %
11816
                                     (nname, attr, remote_info[attr]))
11817
        # compute memory used by primary instances
11818
        i_p_mem = i_p_up_mem = 0
11819
        for iinfo, beinfo in i_list:
11820
          if iinfo.primary_node == nname:
11821
            i_p_mem += beinfo[constants.BE_MEMORY]
11822
            if iinfo.name not in node_iinfo[nname].payload:
11823
              i_used_mem = 0
11824
            else:
11825
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11826
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11827
            remote_info['memory_free'] -= max(0, i_mem_diff)
11828

    
11829
            if iinfo.admin_up:
11830
              i_p_up_mem += beinfo[constants.BE_MEMORY]
11831

    
11832
        # compute memory used by instances
11833
        pnr_dyn = {
11834
          "total_memory": remote_info['memory_total'],
11835
          "reserved_memory": remote_info['memory_dom0'],
11836
          "free_memory": remote_info['memory_free'],
11837
          "total_disk": remote_info['vg_size'],
11838
          "free_disk": remote_info['vg_free'],
11839
          "total_cpus": remote_info['cpu_total'],
11840
          "i_pri_memory": i_p_mem,
11841
          "i_pri_up_memory": i_p_up_mem,
11842
          }
11843
        pnr_dyn.update(node_results[nname])
11844
        node_results[nname] = pnr_dyn
11845

    
11846
    return node_results
11847

    
11848
  @staticmethod
11849
  def _ComputeInstanceData(cluster_info, i_list):
11850
    """Compute global instance data.
11851

11852
    """
11853
    instance_data = {}
11854
    for iinfo, beinfo in i_list:
11855
      nic_data = []
11856
      for nic in iinfo.nics:
11857
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11858
        nic_dict = {"mac": nic.mac,
11859
                    "ip": nic.ip,
11860
                    "mode": filled_params[constants.NIC_MODE],
11861
                    "link": filled_params[constants.NIC_LINK],
11862
                   }
11863
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11864
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11865
        nic_data.append(nic_dict)
11866
      pir = {
11867
        "tags": list(iinfo.GetTags()),
11868
        "admin_up": iinfo.admin_up,
11869
        "vcpus": beinfo[constants.BE_VCPUS],
11870
        "memory": beinfo[constants.BE_MEMORY],
11871
        "os": iinfo.os,
11872
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11873
        "nics": nic_data,
11874
        "disks": [{constants.IDISK_SIZE: dsk.size,
11875
                   constants.IDISK_MODE: dsk.mode}
11876
                  for dsk in iinfo.disks],
11877
        "disk_template": iinfo.disk_template,
11878
        "hypervisor": iinfo.hypervisor,
11879
        }
11880
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11881
                                                 pir["disks"])
11882
      instance_data[iinfo.name] = pir
11883

    
11884
    return instance_data
11885

    
11886
  def _AddNewInstance(self):
11887
    """Add new instance data to allocator structure.
11888

11889
    This in combination with _AllocatorGetClusterData will create the
11890
    correct structure needed as input for the allocator.
11891

11892
    The checks for the completeness of the opcode must have already been
11893
    done.
11894

11895
    """
11896
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11897

    
11898
    if self.disk_template in constants.DTS_INT_MIRROR:
11899
      self.required_nodes = 2
11900
    else:
11901
      self.required_nodes = 1
11902
    request = {
11903
      "name": self.name,
11904
      "disk_template": self.disk_template,
11905
      "tags": self.tags,
11906
      "os": self.os,
11907
      "vcpus": self.vcpus,
11908
      "memory": self.mem_size,
11909
      "disks": self.disks,
11910
      "disk_space_total": disk_space,
11911
      "nics": self.nics,
11912
      "required_nodes": self.required_nodes,
11913
      }
11914
    return request
11915

    
11916
  def _AddRelocateInstance(self):
11917
    """Add relocate instance data to allocator structure.
11918

11919
    This in combination with _IAllocatorGetClusterData will create the
11920
    correct structure needed as input for the allocator.
11921

11922
    The checks for the completeness of the opcode must have already been
11923
    done.
11924

11925
    """
11926
    instance = self.cfg.GetInstanceInfo(self.name)
11927
    if instance is None:
11928
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
11929
                                   " IAllocator" % self.name)
11930

    
11931
    if instance.disk_template not in constants.DTS_MIRRORED:
11932
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11933
                                 errors.ECODE_INVAL)
11934

    
11935
    if instance.disk_template in constants.DTS_INT_MIRROR and \
11936
        len(instance.secondary_nodes) != 1:
11937
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
11938
                                 errors.ECODE_STATE)
11939

    
11940
    self.required_nodes = 1
11941
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
11942
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11943

    
11944
    request = {
11945
      "name": self.name,
11946
      "disk_space_total": disk_space,
11947
      "required_nodes": self.required_nodes,
11948
      "relocate_from": self.relocate_from,
11949
      }
11950
    return request
11951

    
11952
  def _AddEvacuateNodes(self):
11953
    """Add evacuate nodes data to allocator structure.
11954

11955
    """
11956
    request = {
11957
      "evac_nodes": self.evac_nodes
11958
      }
11959
    return request
11960

    
11961
  def _BuildInputData(self, fn):
11962
    """Build input data structures.
11963

11964
    """
11965
    self._ComputeClusterData()
11966

    
11967
    request = fn()
11968
    request["type"] = self.mode
11969
    self.in_data["request"] = request
11970

    
11971
    self.in_text = serializer.Dump(self.in_data)
11972

    
11973
  def Run(self, name, validate=True, call_fn=None):
11974
    """Run an instance allocator and return the results.
11975

11976
    """
11977
    if call_fn is None:
11978
      call_fn = self.rpc.call_iallocator_runner
11979

    
11980
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11981
    result.Raise("Failure while running the iallocator script")
11982

    
11983
    self.out_text = result.payload
11984
    if validate:
11985
      self._ValidateResult()
11986

    
11987
  def _ValidateResult(self):
11988
    """Process the allocator results.
11989

11990
    This will process and if successful save the result in
11991
    self.out_data and the other parameters.
11992

11993
    """
11994
    try:
11995
      rdict = serializer.Load(self.out_text)
11996
    except Exception, err:
11997
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11998

    
11999
    if not isinstance(rdict, dict):
12000
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
12001

    
12002
    # TODO: remove backwards compatiblity in later versions
12003
    if "nodes" in rdict and "result" not in rdict:
12004
      rdict["result"] = rdict["nodes"]
12005
      del rdict["nodes"]
12006

    
12007
    for key in "success", "info", "result":
12008
      if key not in rdict:
12009
        raise errors.OpExecError("Can't parse iallocator results:"
12010
                                 " missing key '%s'" % key)
12011
      setattr(self, key, rdict[key])
12012

    
12013
    if not isinstance(rdict["result"], list):
12014
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
12015
                               " is not a list")
12016

    
12017
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
12018
      assert self.relocate_from is not None
12019
      assert self.required_nodes == 1
12020

    
12021
      node2group = dict((name, ndata["group"])
12022
                        for (name, ndata) in self.in_data["nodes"].items())
12023

    
12024
      fn = compat.partial(self._NodesToGroups, node2group,
12025
                          self.in_data["nodegroups"])
12026

    
12027
      request_groups = fn(self.relocate_from)
12028
      result_groups = fn(rdict["result"])
12029

    
12030
      if result_groups != request_groups:
12031
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12032
                                 " differ from original groups (%s)" %
12033
                                 (utils.CommaJoin(result_groups),
12034
                                  utils.CommaJoin(request_groups)))
12035

    
12036
    self.out_data = rdict
12037

    
12038
  @staticmethod
12039
  def _NodesToGroups(node2group, groups, nodes):
12040
    """Returns a list of unique group names for a list of nodes.
12041

12042
    @type node2group: dict
12043
    @param node2group: Map from node name to group UUID
12044
    @type groups: dict
12045
    @param groups: Group information
12046
    @type nodes: list
12047
    @param nodes: Node names
12048

12049
    """
12050
    result = set()
12051

    
12052
    for node in nodes:
12053
      try:
12054
        group_uuid = node2group[node]
12055
      except KeyError:
12056
        # Ignore unknown node
12057
        pass
12058
      else:
12059
        try:
12060
          group = groups[group_uuid]
12061
        except KeyError:
12062
          # Can't find group, let's use UUID
12063
          group_name = group_uuid
12064
        else:
12065
          group_name = group["name"]
12066

    
12067
        result.add(group_name)
12068

    
12069
    return sorted(result)
12070

    
12071

    
12072
class LUTestAllocator(NoHooksLU):
12073
  """Run allocator tests.
12074

12075
  This LU runs the allocator tests
12076

12077
  """
12078
  def CheckPrereq(self):
12079
    """Check prerequisites.
12080

12081
    This checks the opcode parameters depending on the director and mode test.
12082

12083
    """
12084
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12085
      for attr in ["mem_size", "disks", "disk_template",
12086
                   "os", "tags", "nics", "vcpus"]:
12087
        if not hasattr(self.op, attr):
12088
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
12089
                                     attr, errors.ECODE_INVAL)
12090
      iname = self.cfg.ExpandInstanceName(self.op.name)
12091
      if iname is not None:
12092
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
12093
                                   iname, errors.ECODE_EXISTS)
12094
      if not isinstance(self.op.nics, list):
12095
        raise errors.OpPrereqError("Invalid parameter 'nics'",
12096
                                   errors.ECODE_INVAL)
12097
      if not isinstance(self.op.disks, list):
12098
        raise errors.OpPrereqError("Invalid parameter 'disks'",
12099
                                   errors.ECODE_INVAL)
12100
      for row in self.op.disks:
12101
        if (not isinstance(row, dict) or
12102
            "size" not in row or
12103
            not isinstance(row["size"], int) or
12104
            "mode" not in row or
12105
            row["mode"] not in ['r', 'w']):
12106
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
12107
                                     " parameter", errors.ECODE_INVAL)
12108
      if self.op.hypervisor is None:
12109
        self.op.hypervisor = self.cfg.GetHypervisorType()
12110
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12111
      fname = _ExpandInstanceName(self.cfg, self.op.name)
12112
      self.op.name = fname
12113
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12114
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12115
      if not hasattr(self.op, "evac_nodes"):
12116
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12117
                                   " opcode input", errors.ECODE_INVAL)
12118
    else:
12119
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12120
                                 self.op.mode, errors.ECODE_INVAL)
12121

    
12122
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12123
      if self.op.allocator is None:
12124
        raise errors.OpPrereqError("Missing allocator name",
12125
                                   errors.ECODE_INVAL)
12126
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12127
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
12128
                                 self.op.direction, errors.ECODE_INVAL)
12129

    
12130
  def Exec(self, feedback_fn):
12131
    """Run the allocator test.
12132

12133
    """
12134
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12135
      ial = IAllocator(self.cfg, self.rpc,
12136
                       mode=self.op.mode,
12137
                       name=self.op.name,
12138
                       mem_size=self.op.mem_size,
12139
                       disks=self.op.disks,
12140
                       disk_template=self.op.disk_template,
12141
                       os=self.op.os,
12142
                       tags=self.op.tags,
12143
                       nics=self.op.nics,
12144
                       vcpus=self.op.vcpus,
12145
                       hypervisor=self.op.hypervisor,
12146
                       )
12147
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12148
      ial = IAllocator(self.cfg, self.rpc,
12149
                       mode=self.op.mode,
12150
                       name=self.op.name,
12151
                       relocate_from=list(self.relocate_from),
12152
                       )
12153
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12154
      ial = IAllocator(self.cfg, self.rpc,
12155
                       mode=self.op.mode,
12156
                       evac_nodes=self.op.evac_nodes)
12157
    else:
12158
      raise errors.ProgrammerError("Uncatched mode %s in"
12159
                                   " LUTestAllocator.Exec", self.op.mode)
12160

    
12161
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
12162
      result = ial.in_text
12163
    else:
12164
      ial.Run(self.op.allocator, validate=False)
12165
      result = ial.out_text
12166
    return result
12167

    
12168

    
12169
#: Query type implementations
12170
_QUERY_IMPL = {
12171
  constants.QR_INSTANCE: _InstanceQuery,
12172
  constants.QR_NODE: _NodeQuery,
12173
  constants.QR_GROUP: _GroupQuery,
12174
  constants.QR_OS: _OsQuery,
12175
  }
12176

    
12177
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12178

    
12179

    
12180
def _GetQueryImplementation(name):
12181
  """Returns the implemtnation for a query type.
12182

12183
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
12184

12185
  """
12186
  try:
12187
    return _QUERY_IMPL[name]
12188
  except KeyError:
12189
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12190
                               errors.ECODE_INVAL)