Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ a59faf4b

History | View | Annotate | Download (425.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import query
58
from ganeti import qlang
59
from ganeti import opcodes
60

    
61
import ganeti.masterd.instance # pylint: disable-msg=W0611
62

    
63

    
64
def _SupportsOob(cfg, node):
65
  """Tells if node supports OOB.
66

67
  @type cfg: L{config.ConfigWriter}
68
  @param cfg: The cluster configuration
69
  @type node: L{objects.Node}
70
  @param node: The node
71
  @return: The OOB script if supported or an empty string otherwise
72

73
  """
74
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
75

    
76

    
77
class ResultWithJobs:
78
  """Data container for LU results with jobs.
79

80
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
81
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
82
  contained in the C{jobs} attribute and include the job IDs in the opcode
83
  result.
84

85
  """
86
  def __init__(self, jobs, **kwargs):
87
    """Initializes this class.
88

89
    Additional return values can be specified as keyword arguments.
90

91
    @type jobs: list of lists of L{opcode.OpCode}
92
    @param jobs: A list of lists of opcode objects
93

94
    """
95
    self.jobs = jobs
96
    self.other = kwargs
97

    
98

    
99
class LogicalUnit(object):
100
  """Logical Unit base class.
101

102
  Subclasses must follow these rules:
103
    - implement ExpandNames
104
    - implement CheckPrereq (except when tasklets are used)
105
    - implement Exec (except when tasklets are used)
106
    - implement BuildHooksEnv
107
    - implement BuildHooksNodes
108
    - redefine HPATH and HTYPE
109
    - optionally redefine their run requirements:
110
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
111

112
  Note that all commands require root permissions.
113

114
  @ivar dry_run_result: the value (if any) that will be returned to the caller
115
      in dry-run mode (signalled by opcode dry_run parameter)
116

117
  """
118
  HPATH = None
119
  HTYPE = None
120
  REQ_BGL = True
121

    
122
  def __init__(self, processor, op, context, rpc):
123
    """Constructor for LogicalUnit.
124

125
    This needs to be overridden in derived classes in order to check op
126
    validity.
127

128
    """
129
    self.proc = processor
130
    self.op = op
131
    self.cfg = context.cfg
132
    self.glm = context.glm
133
    self.context = context
134
    self.rpc = rpc
135
    # Dicts used to declare locking needs to mcpu
136
    self.needed_locks = None
137
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
138
    self.add_locks = {}
139
    self.remove_locks = {}
140
    # Used to force good behavior when calling helper functions
141
    self.recalculate_locks = {}
142
    # logging
143
    self.Log = processor.Log # pylint: disable-msg=C0103
144
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
145
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
146
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
147
    # support for dry-run
148
    self.dry_run_result = None
149
    # support for generic debug attribute
150
    if (not hasattr(self.op, "debug_level") or
151
        not isinstance(self.op.debug_level, int)):
152
      self.op.debug_level = 0
153

    
154
    # Tasklets
155
    self.tasklets = None
156

    
157
    # Validate opcode parameters and set defaults
158
    self.op.Validate(True)
159

    
160
    self.CheckArguments()
161

    
162
  def CheckArguments(self):
163
    """Check syntactic validity for the opcode arguments.
164

165
    This method is for doing a simple syntactic check and ensure
166
    validity of opcode parameters, without any cluster-related
167
    checks. While the same can be accomplished in ExpandNames and/or
168
    CheckPrereq, doing these separate is better because:
169

170
      - ExpandNames is left as as purely a lock-related function
171
      - CheckPrereq is run after we have acquired locks (and possible
172
        waited for them)
173

174
    The function is allowed to change the self.op attribute so that
175
    later methods can no longer worry about missing parameters.
176

177
    """
178
    pass
179

    
180
  def ExpandNames(self):
181
    """Expand names for this LU.
182

183
    This method is called before starting to execute the opcode, and it should
184
    update all the parameters of the opcode to their canonical form (e.g. a
185
    short node name must be fully expanded after this method has successfully
186
    completed). This way locking, hooks, logging, etc. can work correctly.
187

188
    LUs which implement this method must also populate the self.needed_locks
189
    member, as a dict with lock levels as keys, and a list of needed lock names
190
    as values. Rules:
191

192
      - use an empty dict if you don't need any lock
193
      - if you don't need any lock at a particular level omit that level
194
      - don't put anything for the BGL level
195
      - if you want all locks at a level use locking.ALL_SET as a value
196

197
    If you need to share locks (rather than acquire them exclusively) at one
198
    level you can modify self.share_locks, setting a true value (usually 1) for
199
    that level. By default locks are not shared.
200

201
    This function can also define a list of tasklets, which then will be
202
    executed in order instead of the usual LU-level CheckPrereq and Exec
203
    functions, if those are not defined by the LU.
204

205
    Examples::
206

207
      # Acquire all nodes and one instance
208
      self.needed_locks = {
209
        locking.LEVEL_NODE: locking.ALL_SET,
210
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
211
      }
212
      # Acquire just two nodes
213
      self.needed_locks = {
214
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
215
      }
216
      # Acquire no locks
217
      self.needed_locks = {} # No, you can't leave it to the default value None
218

219
    """
220
    # The implementation of this method is mandatory only if the new LU is
221
    # concurrent, so that old LUs don't need to be changed all at the same
222
    # time.
223
    if self.REQ_BGL:
224
      self.needed_locks = {} # Exclusive LUs don't need locks.
225
    else:
226
      raise NotImplementedError
227

    
228
  def DeclareLocks(self, level):
229
    """Declare LU locking needs for a level
230

231
    While most LUs can just declare their locking needs at ExpandNames time,
232
    sometimes there's the need to calculate some locks after having acquired
233
    the ones before. This function is called just before acquiring locks at a
234
    particular level, but after acquiring the ones at lower levels, and permits
235
    such calculations. It can be used to modify self.needed_locks, and by
236
    default it does nothing.
237

238
    This function is only called if you have something already set in
239
    self.needed_locks for the level.
240

241
    @param level: Locking level which is going to be locked
242
    @type level: member of ganeti.locking.LEVELS
243

244
    """
245

    
246
  def CheckPrereq(self):
247
    """Check prerequisites for this LU.
248

249
    This method should check that the prerequisites for the execution
250
    of this LU are fulfilled. It can do internode communication, but
251
    it should be idempotent - no cluster or system changes are
252
    allowed.
253

254
    The method should raise errors.OpPrereqError in case something is
255
    not fulfilled. Its return value is ignored.
256

257
    This method should also update all the parameters of the opcode to
258
    their canonical form if it hasn't been done by ExpandNames before.
259

260
    """
261
    if self.tasklets is not None:
262
      for (idx, tl) in enumerate(self.tasklets):
263
        logging.debug("Checking prerequisites for tasklet %s/%s",
264
                      idx + 1, len(self.tasklets))
265
        tl.CheckPrereq()
266
    else:
267
      pass
268

    
269
  def Exec(self, feedback_fn):
270
    """Execute the LU.
271

272
    This method should implement the actual work. It should raise
273
    errors.OpExecError for failures that are somewhat dealt with in
274
    code, or expected.
275

276
    """
277
    if self.tasklets is not None:
278
      for (idx, tl) in enumerate(self.tasklets):
279
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
280
        tl.Exec(feedback_fn)
281
    else:
282
      raise NotImplementedError
283

    
284
  def BuildHooksEnv(self):
285
    """Build hooks environment for this LU.
286

287
    @rtype: dict
288
    @return: Dictionary containing the environment that will be used for
289
      running the hooks for this LU. The keys of the dict must not be prefixed
290
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
291
      will extend the environment with additional variables. If no environment
292
      should be defined, an empty dictionary should be returned (not C{None}).
293
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
294
      will not be called.
295

296
    """
297
    raise NotImplementedError
298

    
299
  def BuildHooksNodes(self):
300
    """Build list of nodes to run LU's hooks.
301

302
    @rtype: tuple; (list, list)
303
    @return: Tuple containing a list of node names on which the hook
304
      should run before the execution and a list of node names on which the
305
      hook should run after the execution. No nodes should be returned as an
306
      empty list (and not None).
307
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
308
      will not be called.
309

310
    """
311
    raise NotImplementedError
312

    
313
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
314
    """Notify the LU about the results of its hooks.
315

316
    This method is called every time a hooks phase is executed, and notifies
317
    the Logical Unit about the hooks' result. The LU can then use it to alter
318
    its result based on the hooks.  By default the method does nothing and the
319
    previous result is passed back unchanged but any LU can define it if it
320
    wants to use the local cluster hook-scripts somehow.
321

322
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
323
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
324
    @param hook_results: the results of the multi-node hooks rpc call
325
    @param feedback_fn: function used send feedback back to the caller
326
    @param lu_result: the previous Exec result this LU had, or None
327
        in the PRE phase
328
    @return: the new Exec result, based on the previous result
329
        and hook results
330

331
    """
332
    # API must be kept, thus we ignore the unused argument and could
333
    # be a function warnings
334
    # pylint: disable-msg=W0613,R0201
335
    return lu_result
336

    
337
  def _ExpandAndLockInstance(self):
338
    """Helper function to expand and lock an instance.
339

340
    Many LUs that work on an instance take its name in self.op.instance_name
341
    and need to expand it and then declare the expanded name for locking. This
342
    function does it, and then updates self.op.instance_name to the expanded
343
    name. It also initializes needed_locks as a dict, if this hasn't been done
344
    before.
345

346
    """
347
    if self.needed_locks is None:
348
      self.needed_locks = {}
349
    else:
350
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
351
        "_ExpandAndLockInstance called with instance-level locks set"
352
    self.op.instance_name = _ExpandInstanceName(self.cfg,
353
                                                self.op.instance_name)
354
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
355

    
356
  def _LockInstancesNodes(self, primary_only=False):
357
    """Helper function to declare instances' nodes for locking.
358

359
    This function should be called after locking one or more instances to lock
360
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
361
    with all primary or secondary nodes for instances already locked and
362
    present in self.needed_locks[locking.LEVEL_INSTANCE].
363

364
    It should be called from DeclareLocks, and for safety only works if
365
    self.recalculate_locks[locking.LEVEL_NODE] is set.
366

367
    In the future it may grow parameters to just lock some instance's nodes, or
368
    to just lock primaries or secondary nodes, if needed.
369

370
    If should be called in DeclareLocks in a way similar to::
371

372
      if level == locking.LEVEL_NODE:
373
        self._LockInstancesNodes()
374

375
    @type primary_only: boolean
376
    @param primary_only: only lock primary nodes of locked instances
377

378
    """
379
    assert locking.LEVEL_NODE in self.recalculate_locks, \
380
      "_LockInstancesNodes helper function called with no nodes to recalculate"
381

    
382
    # TODO: check if we're really been called with the instance locks held
383

    
384
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
385
    # future we might want to have different behaviors depending on the value
386
    # of self.recalculate_locks[locking.LEVEL_NODE]
387
    wanted_nodes = []
388
    for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
389
      instance = self.context.cfg.GetInstanceInfo(instance_name)
390
      wanted_nodes.append(instance.primary_node)
391
      if not primary_only:
392
        wanted_nodes.extend(instance.secondary_nodes)
393

    
394
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
395
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
396
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
397
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
398

    
399
    del self.recalculate_locks[locking.LEVEL_NODE]
400

    
401

    
402
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
403
  """Simple LU which runs no hooks.
404

405
  This LU is intended as a parent for other LogicalUnits which will
406
  run no hooks, in order to reduce duplicate code.
407

408
  """
409
  HPATH = None
410
  HTYPE = None
411

    
412
  def BuildHooksEnv(self):
413
    """Empty BuildHooksEnv for NoHooksLu.
414

415
    This just raises an error.
416

417
    """
418
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
419

    
420
  def BuildHooksNodes(self):
421
    """Empty BuildHooksNodes for NoHooksLU.
422

423
    """
424
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
425

    
426

    
427
class Tasklet:
428
  """Tasklet base class.
429

430
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
431
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
432
  tasklets know nothing about locks.
433

434
  Subclasses must follow these rules:
435
    - Implement CheckPrereq
436
    - Implement Exec
437

438
  """
439
  def __init__(self, lu):
440
    self.lu = lu
441

    
442
    # Shortcuts
443
    self.cfg = lu.cfg
444
    self.rpc = lu.rpc
445

    
446
  def CheckPrereq(self):
447
    """Check prerequisites for this tasklets.
448

449
    This method should check whether the prerequisites for the execution of
450
    this tasklet are fulfilled. It can do internode communication, but it
451
    should be idempotent - no cluster or system changes are allowed.
452

453
    The method should raise errors.OpPrereqError in case something is not
454
    fulfilled. Its return value is ignored.
455

456
    This method should also update all parameters to their canonical form if it
457
    hasn't been done before.
458

459
    """
460
    pass
461

    
462
  def Exec(self, feedback_fn):
463
    """Execute the tasklet.
464

465
    This method should implement the actual work. It should raise
466
    errors.OpExecError for failures that are somewhat dealt with in code, or
467
    expected.
468

469
    """
470
    raise NotImplementedError
471

    
472

    
473
class _QueryBase:
474
  """Base for query utility classes.
475

476
  """
477
  #: Attribute holding field definitions
478
  FIELDS = None
479

    
480
  def __init__(self, filter_, fields, use_locking):
481
    """Initializes this class.
482

483
    """
484
    self.use_locking = use_locking
485

    
486
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
487
                             namefield="name")
488
    self.requested_data = self.query.RequestedData()
489
    self.names = self.query.RequestedNames()
490

    
491
    # Sort only if no names were requested
492
    self.sort_by_name = not self.names
493

    
494
    self.do_locking = None
495
    self.wanted = None
496

    
497
  def _GetNames(self, lu, all_names, lock_level):
498
    """Helper function to determine names asked for in the query.
499

500
    """
501
    if self.do_locking:
502
      names = lu.glm.list_owned(lock_level)
503
    else:
504
      names = all_names
505

    
506
    if self.wanted == locking.ALL_SET:
507
      assert not self.names
508
      # caller didn't specify names, so ordering is not important
509
      return utils.NiceSort(names)
510

    
511
    # caller specified names and we must keep the same order
512
    assert self.names
513
    assert not self.do_locking or lu.glm.is_owned(lock_level)
514

    
515
    missing = set(self.wanted).difference(names)
516
    if missing:
517
      raise errors.OpExecError("Some items were removed before retrieving"
518
                               " their data: %s" % missing)
519

    
520
    # Return expanded names
521
    return self.wanted
522

    
523
  def ExpandNames(self, lu):
524
    """Expand names for this query.
525

526
    See L{LogicalUnit.ExpandNames}.
527

528
    """
529
    raise NotImplementedError()
530

    
531
  def DeclareLocks(self, lu, level):
532
    """Declare locks for this query.
533

534
    See L{LogicalUnit.DeclareLocks}.
535

536
    """
537
    raise NotImplementedError()
538

    
539
  def _GetQueryData(self, lu):
540
    """Collects all data for this query.
541

542
    @return: Query data object
543

544
    """
545
    raise NotImplementedError()
546

    
547
  def NewStyleQuery(self, lu):
548
    """Collect data and execute query.
549

550
    """
551
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
552
                                  sort_by_name=self.sort_by_name)
553

    
554
  def OldStyleQuery(self, lu):
555
    """Collect data and execute query.
556

557
    """
558
    return self.query.OldStyleQuery(self._GetQueryData(lu),
559
                                    sort_by_name=self.sort_by_name)
560

    
561

    
562
def _GetWantedNodes(lu, nodes):
563
  """Returns list of checked and expanded node names.
564

565
  @type lu: L{LogicalUnit}
566
  @param lu: the logical unit on whose behalf we execute
567
  @type nodes: list
568
  @param nodes: list of node names or None for all nodes
569
  @rtype: list
570
  @return: the list of nodes, sorted
571
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
572

573
  """
574
  if nodes:
575
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
576

    
577
  return utils.NiceSort(lu.cfg.GetNodeList())
578

    
579

    
580
def _GetWantedInstances(lu, instances):
581
  """Returns list of checked and expanded instance names.
582

583
  @type lu: L{LogicalUnit}
584
  @param lu: the logical unit on whose behalf we execute
585
  @type instances: list
586
  @param instances: list of instance names or None for all instances
587
  @rtype: list
588
  @return: the list of instances, sorted
589
  @raise errors.OpPrereqError: if the instances parameter is wrong type
590
  @raise errors.OpPrereqError: if any of the passed instances is not found
591

592
  """
593
  if instances:
594
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
595
  else:
596
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
597
  return wanted
598

    
599

    
600
def _GetUpdatedParams(old_params, update_dict,
601
                      use_default=True, use_none=False):
602
  """Return the new version of a parameter dictionary.
603

604
  @type old_params: dict
605
  @param old_params: old parameters
606
  @type update_dict: dict
607
  @param update_dict: dict containing new parameter values, or
608
      constants.VALUE_DEFAULT to reset the parameter to its default
609
      value
610
  @param use_default: boolean
611
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
612
      values as 'to be deleted' values
613
  @param use_none: boolean
614
  @type use_none: whether to recognise C{None} values as 'to be
615
      deleted' values
616
  @rtype: dict
617
  @return: the new parameter dictionary
618

619
  """
620
  params_copy = copy.deepcopy(old_params)
621
  for key, val in update_dict.iteritems():
622
    if ((use_default and val == constants.VALUE_DEFAULT) or
623
        (use_none and val is None)):
624
      try:
625
        del params_copy[key]
626
      except KeyError:
627
        pass
628
    else:
629
      params_copy[key] = val
630
  return params_copy
631

    
632

    
633
def _ReleaseLocks(lu, level, names=None, keep=None):
634
  """Releases locks owned by an LU.
635

636
  @type lu: L{LogicalUnit}
637
  @param level: Lock level
638
  @type names: list or None
639
  @param names: Names of locks to release
640
  @type keep: list or None
641
  @param keep: Names of locks to retain
642

643
  """
644
  assert not (keep is not None and names is not None), \
645
         "Only one of the 'names' and the 'keep' parameters can be given"
646

    
647
  if names is not None:
648
    should_release = names.__contains__
649
  elif keep:
650
    should_release = lambda name: name not in keep
651
  else:
652
    should_release = None
653

    
654
  if should_release:
655
    retain = []
656
    release = []
657

    
658
    # Determine which locks to release
659
    for name in lu.glm.list_owned(level):
660
      if should_release(name):
661
        release.append(name)
662
      else:
663
        retain.append(name)
664

    
665
    assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
666

    
667
    # Release just some locks
668
    lu.glm.release(level, names=release)
669

    
670
    assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
671
  else:
672
    # Release everything
673
    lu.glm.release(level)
674

    
675
    assert not lu.glm.is_owned(level), "No locks should be owned"
676

    
677

    
678
def _RunPostHook(lu, node_name):
679
  """Runs the post-hook for an opcode on a single node.
680

681
  """
682
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
683
  try:
684
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
685
  except:
686
    # pylint: disable-msg=W0702
687
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
688

    
689

    
690
def _CheckOutputFields(static, dynamic, selected):
691
  """Checks whether all selected fields are valid.
692

693
  @type static: L{utils.FieldSet}
694
  @param static: static fields set
695
  @type dynamic: L{utils.FieldSet}
696
  @param dynamic: dynamic fields set
697

698
  """
699
  f = utils.FieldSet()
700
  f.Extend(static)
701
  f.Extend(dynamic)
702

    
703
  delta = f.NonMatching(selected)
704
  if delta:
705
    raise errors.OpPrereqError("Unknown output fields selected: %s"
706
                               % ",".join(delta), errors.ECODE_INVAL)
707

    
708

    
709
def _CheckGlobalHvParams(params):
710
  """Validates that given hypervisor params are not global ones.
711

712
  This will ensure that instances don't get customised versions of
713
  global params.
714

715
  """
716
  used_globals = constants.HVC_GLOBALS.intersection(params)
717
  if used_globals:
718
    msg = ("The following hypervisor parameters are global and cannot"
719
           " be customized at instance level, please modify them at"
720
           " cluster level: %s" % utils.CommaJoin(used_globals))
721
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
722

    
723

    
724
def _CheckNodeOnline(lu, node, msg=None):
725
  """Ensure that a given node is online.
726

727
  @param lu: the LU on behalf of which we make the check
728
  @param node: the node to check
729
  @param msg: if passed, should be a message to replace the default one
730
  @raise errors.OpPrereqError: if the node is offline
731

732
  """
733
  if msg is None:
734
    msg = "Can't use offline node"
735
  if lu.cfg.GetNodeInfo(node).offline:
736
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
737

    
738

    
739
def _CheckNodeNotDrained(lu, node):
740
  """Ensure that a given node is not drained.
741

742
  @param lu: the LU on behalf of which we make the check
743
  @param node: the node to check
744
  @raise errors.OpPrereqError: if the node is drained
745

746
  """
747
  if lu.cfg.GetNodeInfo(node).drained:
748
    raise errors.OpPrereqError("Can't use drained node %s" % node,
749
                               errors.ECODE_STATE)
750

    
751

    
752
def _CheckNodeVmCapable(lu, node):
753
  """Ensure that a given node is vm capable.
754

755
  @param lu: the LU on behalf of which we make the check
756
  @param node: the node to check
757
  @raise errors.OpPrereqError: if the node is not vm capable
758

759
  """
760
  if not lu.cfg.GetNodeInfo(node).vm_capable:
761
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
762
                               errors.ECODE_STATE)
763

    
764

    
765
def _CheckNodeHasOS(lu, node, os_name, force_variant):
766
  """Ensure that a node supports a given OS.
767

768
  @param lu: the LU on behalf of which we make the check
769
  @param node: the node to check
770
  @param os_name: the OS to query about
771
  @param force_variant: whether to ignore variant errors
772
  @raise errors.OpPrereqError: if the node is not supporting the OS
773

774
  """
775
  result = lu.rpc.call_os_get(node, os_name)
776
  result.Raise("OS '%s' not in supported OS list for node %s" %
777
               (os_name, node),
778
               prereq=True, ecode=errors.ECODE_INVAL)
779
  if not force_variant:
780
    _CheckOSVariant(result.payload, os_name)
781

    
782

    
783
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
784
  """Ensure that a node has the given secondary ip.
785

786
  @type lu: L{LogicalUnit}
787
  @param lu: the LU on behalf of which we make the check
788
  @type node: string
789
  @param node: the node to check
790
  @type secondary_ip: string
791
  @param secondary_ip: the ip to check
792
  @type prereq: boolean
793
  @param prereq: whether to throw a prerequisite or an execute error
794
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
795
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
796

797
  """
798
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
799
  result.Raise("Failure checking secondary ip on node %s" % node,
800
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
801
  if not result.payload:
802
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
803
           " please fix and re-run this command" % secondary_ip)
804
    if prereq:
805
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
806
    else:
807
      raise errors.OpExecError(msg)
808

    
809

    
810
def _GetClusterDomainSecret():
811
  """Reads the cluster domain secret.
812

813
  """
814
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
815
                               strict=True)
816

    
817

    
818
def _CheckInstanceDown(lu, instance, reason):
819
  """Ensure that an instance is not running."""
820
  if instance.admin_up:
821
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
822
                               (instance.name, reason), errors.ECODE_STATE)
823

    
824
  pnode = instance.primary_node
825
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
826
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
827
              prereq=True, ecode=errors.ECODE_ENVIRON)
828

    
829
  if instance.name in ins_l.payload:
830
    raise errors.OpPrereqError("Instance %s is running, %s" %
831
                               (instance.name, reason), errors.ECODE_STATE)
832

    
833

    
834
def _ExpandItemName(fn, name, kind):
835
  """Expand an item name.
836

837
  @param fn: the function to use for expansion
838
  @param name: requested item name
839
  @param kind: text description ('Node' or 'Instance')
840
  @return: the resolved (full) name
841
  @raise errors.OpPrereqError: if the item is not found
842

843
  """
844
  full_name = fn(name)
845
  if full_name is None:
846
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
847
                               errors.ECODE_NOENT)
848
  return full_name
849

    
850

    
851
def _ExpandNodeName(cfg, name):
852
  """Wrapper over L{_ExpandItemName} for nodes."""
853
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
854

    
855

    
856
def _ExpandInstanceName(cfg, name):
857
  """Wrapper over L{_ExpandItemName} for instance."""
858
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
859

    
860

    
861
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
862
                          memory, vcpus, nics, disk_template, disks,
863
                          bep, hvp, hypervisor_name):
864
  """Builds instance related env variables for hooks
865

866
  This builds the hook environment from individual variables.
867

868
  @type name: string
869
  @param name: the name of the instance
870
  @type primary_node: string
871
  @param primary_node: the name of the instance's primary node
872
  @type secondary_nodes: list
873
  @param secondary_nodes: list of secondary nodes as strings
874
  @type os_type: string
875
  @param os_type: the name of the instance's OS
876
  @type status: boolean
877
  @param status: the should_run status of the instance
878
  @type memory: string
879
  @param memory: the memory size of the instance
880
  @type vcpus: string
881
  @param vcpus: the count of VCPUs the instance has
882
  @type nics: list
883
  @param nics: list of tuples (ip, mac, mode, link) representing
884
      the NICs the instance has
885
  @type disk_template: string
886
  @param disk_template: the disk template of the instance
887
  @type disks: list
888
  @param disks: the list of (size, mode) pairs
889
  @type bep: dict
890
  @param bep: the backend parameters for the instance
891
  @type hvp: dict
892
  @param hvp: the hypervisor parameters for the instance
893
  @type hypervisor_name: string
894
  @param hypervisor_name: the hypervisor for the instance
895
  @rtype: dict
896
  @return: the hook environment for this instance
897

898
  """
899
  if status:
900
    str_status = "up"
901
  else:
902
    str_status = "down"
903
  env = {
904
    "OP_TARGET": name,
905
    "INSTANCE_NAME": name,
906
    "INSTANCE_PRIMARY": primary_node,
907
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
908
    "INSTANCE_OS_TYPE": os_type,
909
    "INSTANCE_STATUS": str_status,
910
    "INSTANCE_MEMORY": memory,
911
    "INSTANCE_VCPUS": vcpus,
912
    "INSTANCE_DISK_TEMPLATE": disk_template,
913
    "INSTANCE_HYPERVISOR": hypervisor_name,
914
  }
915

    
916
  if nics:
917
    nic_count = len(nics)
918
    for idx, (ip, mac, mode, link) in enumerate(nics):
919
      if ip is None:
920
        ip = ""
921
      env["INSTANCE_NIC%d_IP" % idx] = ip
922
      env["INSTANCE_NIC%d_MAC" % idx] = mac
923
      env["INSTANCE_NIC%d_MODE" % idx] = mode
924
      env["INSTANCE_NIC%d_LINK" % idx] = link
925
      if mode == constants.NIC_MODE_BRIDGED:
926
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
927
  else:
928
    nic_count = 0
929

    
930
  env["INSTANCE_NIC_COUNT"] = nic_count
931

    
932
  if disks:
933
    disk_count = len(disks)
934
    for idx, (size, mode) in enumerate(disks):
935
      env["INSTANCE_DISK%d_SIZE" % idx] = size
936
      env["INSTANCE_DISK%d_MODE" % idx] = mode
937
  else:
938
    disk_count = 0
939

    
940
  env["INSTANCE_DISK_COUNT"] = disk_count
941

    
942
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
943
    for key, value in source.items():
944
      env["INSTANCE_%s_%s" % (kind, key)] = value
945

    
946
  return env
947

    
948

    
949
def _NICListToTuple(lu, nics):
950
  """Build a list of nic information tuples.
951

952
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
953
  value in LUInstanceQueryData.
954

955
  @type lu:  L{LogicalUnit}
956
  @param lu: the logical unit on whose behalf we execute
957
  @type nics: list of L{objects.NIC}
958
  @param nics: list of nics to convert to hooks tuples
959

960
  """
961
  hooks_nics = []
962
  cluster = lu.cfg.GetClusterInfo()
963
  for nic in nics:
964
    ip = nic.ip
965
    mac = nic.mac
966
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
967
    mode = filled_params[constants.NIC_MODE]
968
    link = filled_params[constants.NIC_LINK]
969
    hooks_nics.append((ip, mac, mode, link))
970
  return hooks_nics
971

    
972

    
973
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
974
  """Builds instance related env variables for hooks from an object.
975

976
  @type lu: L{LogicalUnit}
977
  @param lu: the logical unit on whose behalf we execute
978
  @type instance: L{objects.Instance}
979
  @param instance: the instance for which we should build the
980
      environment
981
  @type override: dict
982
  @param override: dictionary with key/values that will override
983
      our values
984
  @rtype: dict
985
  @return: the hook environment dictionary
986

987
  """
988
  cluster = lu.cfg.GetClusterInfo()
989
  bep = cluster.FillBE(instance)
990
  hvp = cluster.FillHV(instance)
991
  args = {
992
    'name': instance.name,
993
    'primary_node': instance.primary_node,
994
    'secondary_nodes': instance.secondary_nodes,
995
    'os_type': instance.os,
996
    'status': instance.admin_up,
997
    'memory': bep[constants.BE_MEMORY],
998
    'vcpus': bep[constants.BE_VCPUS],
999
    'nics': _NICListToTuple(lu, instance.nics),
1000
    'disk_template': instance.disk_template,
1001
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
1002
    'bep': bep,
1003
    'hvp': hvp,
1004
    'hypervisor_name': instance.hypervisor,
1005
  }
1006
  if override:
1007
    args.update(override)
1008
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1009

    
1010

    
1011
def _AdjustCandidatePool(lu, exceptions):
1012
  """Adjust the candidate pool after node operations.
1013

1014
  """
1015
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1016
  if mod_list:
1017
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1018
               utils.CommaJoin(node.name for node in mod_list))
1019
    for name in mod_list:
1020
      lu.context.ReaddNode(name)
1021
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1022
  if mc_now > mc_max:
1023
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1024
               (mc_now, mc_max))
1025

    
1026

    
1027
def _DecideSelfPromotion(lu, exceptions=None):
1028
  """Decide whether I should promote myself as a master candidate.
1029

1030
  """
1031
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1032
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1033
  # the new node will increase mc_max with one, so:
1034
  mc_should = min(mc_should + 1, cp_size)
1035
  return mc_now < mc_should
1036

    
1037

    
1038
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1039
  """Check that the brigdes needed by a list of nics exist.
1040

1041
  """
1042
  cluster = lu.cfg.GetClusterInfo()
1043
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1044
  brlist = [params[constants.NIC_LINK] for params in paramslist
1045
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1046
  if brlist:
1047
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1048
    result.Raise("Error checking bridges on destination node '%s'" %
1049
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1050

    
1051

    
1052
def _CheckInstanceBridgesExist(lu, instance, node=None):
1053
  """Check that the brigdes needed by an instance exist.
1054

1055
  """
1056
  if node is None:
1057
    node = instance.primary_node
1058
  _CheckNicsBridgesExist(lu, instance.nics, node)
1059

    
1060

    
1061
def _CheckOSVariant(os_obj, name):
1062
  """Check whether an OS name conforms to the os variants specification.
1063

1064
  @type os_obj: L{objects.OS}
1065
  @param os_obj: OS object to check
1066
  @type name: string
1067
  @param name: OS name passed by the user, to check for validity
1068

1069
  """
1070
  if not os_obj.supported_variants:
1071
    return
1072
  variant = objects.OS.GetVariant(name)
1073
  if not variant:
1074
    raise errors.OpPrereqError("OS name must include a variant",
1075
                               errors.ECODE_INVAL)
1076

    
1077
  if variant not in os_obj.supported_variants:
1078
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1079

    
1080

    
1081
def _GetNodeInstancesInner(cfg, fn):
1082
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1083

    
1084

    
1085
def _GetNodeInstances(cfg, node_name):
1086
  """Returns a list of all primary and secondary instances on a node.
1087

1088
  """
1089

    
1090
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1091

    
1092

    
1093
def _GetNodePrimaryInstances(cfg, node_name):
1094
  """Returns primary instances on a node.
1095

1096
  """
1097
  return _GetNodeInstancesInner(cfg,
1098
                                lambda inst: node_name == inst.primary_node)
1099

    
1100

    
1101
def _GetNodeSecondaryInstances(cfg, node_name):
1102
  """Returns secondary instances on a node.
1103

1104
  """
1105
  return _GetNodeInstancesInner(cfg,
1106
                                lambda inst: node_name in inst.secondary_nodes)
1107

    
1108

    
1109
def _GetStorageTypeArgs(cfg, storage_type):
1110
  """Returns the arguments for a storage type.
1111

1112
  """
1113
  # Special case for file storage
1114
  if storage_type == constants.ST_FILE:
1115
    # storage.FileStorage wants a list of storage directories
1116
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1117

    
1118
  return []
1119

    
1120

    
1121
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1122
  faulty = []
1123

    
1124
  for dev in instance.disks:
1125
    cfg.SetDiskID(dev, node_name)
1126

    
1127
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1128
  result.Raise("Failed to get disk status from node %s" % node_name,
1129
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1130

    
1131
  for idx, bdev_status in enumerate(result.payload):
1132
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1133
      faulty.append(idx)
1134

    
1135
  return faulty
1136

    
1137

    
1138
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1139
  """Check the sanity of iallocator and node arguments and use the
1140
  cluster-wide iallocator if appropriate.
1141

1142
  Check that at most one of (iallocator, node) is specified. If none is
1143
  specified, then the LU's opcode's iallocator slot is filled with the
1144
  cluster-wide default iallocator.
1145

1146
  @type iallocator_slot: string
1147
  @param iallocator_slot: the name of the opcode iallocator slot
1148
  @type node_slot: string
1149
  @param node_slot: the name of the opcode target node slot
1150

1151
  """
1152
  node = getattr(lu.op, node_slot, None)
1153
  iallocator = getattr(lu.op, iallocator_slot, None)
1154

    
1155
  if node is not None and iallocator is not None:
1156
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1157
                               errors.ECODE_INVAL)
1158
  elif node is None and iallocator is None:
1159
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1160
    if default_iallocator:
1161
      setattr(lu.op, iallocator_slot, default_iallocator)
1162
    else:
1163
      raise errors.OpPrereqError("No iallocator or node given and no"
1164
                                 " cluster-wide default iallocator found."
1165
                                 " Please specify either an iallocator or a"
1166
                                 " node, or set a cluster-wide default"
1167
                                 " iallocator.")
1168

    
1169

    
1170
class LUClusterPostInit(LogicalUnit):
1171
  """Logical unit for running hooks after cluster initialization.
1172

1173
  """
1174
  HPATH = "cluster-init"
1175
  HTYPE = constants.HTYPE_CLUSTER
1176

    
1177
  def BuildHooksEnv(self):
1178
    """Build hooks env.
1179

1180
    """
1181
    return {
1182
      "OP_TARGET": self.cfg.GetClusterName(),
1183
      }
1184

    
1185
  def BuildHooksNodes(self):
1186
    """Build hooks nodes.
1187

1188
    """
1189
    return ([], [self.cfg.GetMasterNode()])
1190

    
1191
  def Exec(self, feedback_fn):
1192
    """Nothing to do.
1193

1194
    """
1195
    return True
1196

    
1197

    
1198
class LUClusterDestroy(LogicalUnit):
1199
  """Logical unit for destroying the cluster.
1200

1201
  """
1202
  HPATH = "cluster-destroy"
1203
  HTYPE = constants.HTYPE_CLUSTER
1204

    
1205
  def BuildHooksEnv(self):
1206
    """Build hooks env.
1207

1208
    """
1209
    return {
1210
      "OP_TARGET": self.cfg.GetClusterName(),
1211
      }
1212

    
1213
  def BuildHooksNodes(self):
1214
    """Build hooks nodes.
1215

1216
    """
1217
    return ([], [])
1218

    
1219
  def CheckPrereq(self):
1220
    """Check prerequisites.
1221

1222
    This checks whether the cluster is empty.
1223

1224
    Any errors are signaled by raising errors.OpPrereqError.
1225

1226
    """
1227
    master = self.cfg.GetMasterNode()
1228

    
1229
    nodelist = self.cfg.GetNodeList()
1230
    if len(nodelist) != 1 or nodelist[0] != master:
1231
      raise errors.OpPrereqError("There are still %d node(s) in"
1232
                                 " this cluster." % (len(nodelist) - 1),
1233
                                 errors.ECODE_INVAL)
1234
    instancelist = self.cfg.GetInstanceList()
1235
    if instancelist:
1236
      raise errors.OpPrereqError("There are still %d instance(s) in"
1237
                                 " this cluster." % len(instancelist),
1238
                                 errors.ECODE_INVAL)
1239

    
1240
  def Exec(self, feedback_fn):
1241
    """Destroys the cluster.
1242

1243
    """
1244
    master = self.cfg.GetMasterNode()
1245

    
1246
    # Run post hooks on master node before it's removed
1247
    _RunPostHook(self, master)
1248

    
1249
    result = self.rpc.call_node_stop_master(master, False)
1250
    result.Raise("Could not disable the master role")
1251

    
1252
    return master
1253

    
1254

    
1255
def _VerifyCertificate(filename):
1256
  """Verifies a certificate for LUClusterVerify.
1257

1258
  @type filename: string
1259
  @param filename: Path to PEM file
1260

1261
  """
1262
  try:
1263
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1264
                                           utils.ReadFile(filename))
1265
  except Exception, err: # pylint: disable-msg=W0703
1266
    return (LUClusterVerify.ETYPE_ERROR,
1267
            "Failed to load X509 certificate %s: %s" % (filename, err))
1268

    
1269
  (errcode, msg) = \
1270
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1271
                                constants.SSL_CERT_EXPIRATION_ERROR)
1272

    
1273
  if msg:
1274
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1275
  else:
1276
    fnamemsg = None
1277

    
1278
  if errcode is None:
1279
    return (None, fnamemsg)
1280
  elif errcode == utils.CERT_WARNING:
1281
    return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1282
  elif errcode == utils.CERT_ERROR:
1283
    return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1284

    
1285
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1286

    
1287

    
1288
class LUClusterVerify(LogicalUnit):
1289
  """Verifies the cluster status.
1290

1291
  """
1292
  HPATH = "cluster-verify"
1293
  HTYPE = constants.HTYPE_CLUSTER
1294
  REQ_BGL = False
1295

    
1296
  TCLUSTER = "cluster"
1297
  TNODE = "node"
1298
  TINSTANCE = "instance"
1299

    
1300
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1301
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1302
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1303
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1304
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1305
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1306
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1307
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1308
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1309
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1310
  ENODEDRBD = (TNODE, "ENODEDRBD")
1311
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1312
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1313
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1314
  ENODEHV = (TNODE, "ENODEHV")
1315
  ENODELVM = (TNODE, "ENODELVM")
1316
  ENODEN1 = (TNODE, "ENODEN1")
1317
  ENODENET = (TNODE, "ENODENET")
1318
  ENODEOS = (TNODE, "ENODEOS")
1319
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1320
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1321
  ENODERPC = (TNODE, "ENODERPC")
1322
  ENODESSH = (TNODE, "ENODESSH")
1323
  ENODEVERSION = (TNODE, "ENODEVERSION")
1324
  ENODESETUP = (TNODE, "ENODESETUP")
1325
  ENODETIME = (TNODE, "ENODETIME")
1326
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1327

    
1328
  ETYPE_FIELD = "code"
1329
  ETYPE_ERROR = "ERROR"
1330
  ETYPE_WARNING = "WARNING"
1331

    
1332
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1333

    
1334
  class NodeImage(object):
1335
    """A class representing the logical and physical status of a node.
1336

1337
    @type name: string
1338
    @ivar name: the node name to which this object refers
1339
    @ivar volumes: a structure as returned from
1340
        L{ganeti.backend.GetVolumeList} (runtime)
1341
    @ivar instances: a list of running instances (runtime)
1342
    @ivar pinst: list of configured primary instances (config)
1343
    @ivar sinst: list of configured secondary instances (config)
1344
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1345
        instances for which this node is secondary (config)
1346
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1347
    @ivar dfree: free disk, as reported by the node (runtime)
1348
    @ivar offline: the offline status (config)
1349
    @type rpc_fail: boolean
1350
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1351
        not whether the individual keys were correct) (runtime)
1352
    @type lvm_fail: boolean
1353
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1354
    @type hyp_fail: boolean
1355
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1356
    @type ghost: boolean
1357
    @ivar ghost: whether this is a known node or not (config)
1358
    @type os_fail: boolean
1359
    @ivar os_fail: whether the RPC call didn't return valid OS data
1360
    @type oslist: list
1361
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1362
    @type vm_capable: boolean
1363
    @ivar vm_capable: whether the node can host instances
1364

1365
    """
1366
    def __init__(self, offline=False, name=None, vm_capable=True):
1367
      self.name = name
1368
      self.volumes = {}
1369
      self.instances = []
1370
      self.pinst = []
1371
      self.sinst = []
1372
      self.sbp = {}
1373
      self.mfree = 0
1374
      self.dfree = 0
1375
      self.offline = offline
1376
      self.vm_capable = vm_capable
1377
      self.rpc_fail = False
1378
      self.lvm_fail = False
1379
      self.hyp_fail = False
1380
      self.ghost = False
1381
      self.os_fail = False
1382
      self.oslist = {}
1383

    
1384
  def ExpandNames(self):
1385
    self.needed_locks = {
1386
      locking.LEVEL_NODE: locking.ALL_SET,
1387
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1388
    }
1389
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1390

    
1391
  def _Error(self, ecode, item, msg, *args, **kwargs):
1392
    """Format an error message.
1393

1394
    Based on the opcode's error_codes parameter, either format a
1395
    parseable error code, or a simpler error string.
1396

1397
    This must be called only from Exec and functions called from Exec.
1398

1399
    """
1400
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1401
    itype, etxt = ecode
1402
    # first complete the msg
1403
    if args:
1404
      msg = msg % args
1405
    # then format the whole message
1406
    if self.op.error_codes:
1407
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1408
    else:
1409
      if item:
1410
        item = " " + item
1411
      else:
1412
        item = ""
1413
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1414
    # and finally report it via the feedback_fn
1415
    self._feedback_fn("  - %s" % msg)
1416

    
1417
  def _ErrorIf(self, cond, *args, **kwargs):
1418
    """Log an error message if the passed condition is True.
1419

1420
    """
1421
    cond = bool(cond) or self.op.debug_simulate_errors
1422
    if cond:
1423
      self._Error(*args, **kwargs)
1424
    # do not mark the operation as failed for WARN cases only
1425
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1426
      self.bad = self.bad or cond
1427

    
1428
  def _VerifyNode(self, ninfo, nresult):
1429
    """Perform some basic validation on data returned from a node.
1430

1431
      - check the result data structure is well formed and has all the
1432
        mandatory fields
1433
      - check ganeti version
1434

1435
    @type ninfo: L{objects.Node}
1436
    @param ninfo: the node to check
1437
    @param nresult: the results from the node
1438
    @rtype: boolean
1439
    @return: whether overall this call was successful (and we can expect
1440
         reasonable values in the respose)
1441

1442
    """
1443
    node = ninfo.name
1444
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1445

    
1446
    # main result, nresult should be a non-empty dict
1447
    test = not nresult or not isinstance(nresult, dict)
1448
    _ErrorIf(test, self.ENODERPC, node,
1449
                  "unable to verify node: no data returned")
1450
    if test:
1451
      return False
1452

    
1453
    # compares ganeti version
1454
    local_version = constants.PROTOCOL_VERSION
1455
    remote_version = nresult.get("version", None)
1456
    test = not (remote_version and
1457
                isinstance(remote_version, (list, tuple)) and
1458
                len(remote_version) == 2)
1459
    _ErrorIf(test, self.ENODERPC, node,
1460
             "connection to node returned invalid data")
1461
    if test:
1462
      return False
1463

    
1464
    test = local_version != remote_version[0]
1465
    _ErrorIf(test, self.ENODEVERSION, node,
1466
             "incompatible protocol versions: master %s,"
1467
             " node %s", local_version, remote_version[0])
1468
    if test:
1469
      return False
1470

    
1471
    # node seems compatible, we can actually try to look into its results
1472

    
1473
    # full package version
1474
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1475
                  self.ENODEVERSION, node,
1476
                  "software version mismatch: master %s, node %s",
1477
                  constants.RELEASE_VERSION, remote_version[1],
1478
                  code=self.ETYPE_WARNING)
1479

    
1480
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1481
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1482
      for hv_name, hv_result in hyp_result.iteritems():
1483
        test = hv_result is not None
1484
        _ErrorIf(test, self.ENODEHV, node,
1485
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1486

    
1487
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1488
    if ninfo.vm_capable and isinstance(hvp_result, list):
1489
      for item, hv_name, hv_result in hvp_result:
1490
        _ErrorIf(True, self.ENODEHV, node,
1491
                 "hypervisor %s parameter verify failure (source %s): %s",
1492
                 hv_name, item, hv_result)
1493

    
1494
    test = nresult.get(constants.NV_NODESETUP,
1495
                       ["Missing NODESETUP results"])
1496
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1497
             "; ".join(test))
1498

    
1499
    return True
1500

    
1501
  def _VerifyNodeTime(self, ninfo, nresult,
1502
                      nvinfo_starttime, nvinfo_endtime):
1503
    """Check the node time.
1504

1505
    @type ninfo: L{objects.Node}
1506
    @param ninfo: the node to check
1507
    @param nresult: the remote results for the node
1508
    @param nvinfo_starttime: the start time of the RPC call
1509
    @param nvinfo_endtime: the end time of the RPC call
1510

1511
    """
1512
    node = ninfo.name
1513
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1514

    
1515
    ntime = nresult.get(constants.NV_TIME, None)
1516
    try:
1517
      ntime_merged = utils.MergeTime(ntime)
1518
    except (ValueError, TypeError):
1519
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1520
      return
1521

    
1522
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1523
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1524
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1525
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1526
    else:
1527
      ntime_diff = None
1528

    
1529
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1530
             "Node time diverges by at least %s from master node time",
1531
             ntime_diff)
1532

    
1533
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1534
    """Check the node time.
1535

1536
    @type ninfo: L{objects.Node}
1537
    @param ninfo: the node to check
1538
    @param nresult: the remote results for the node
1539
    @param vg_name: the configured VG name
1540

1541
    """
1542
    if vg_name is None:
1543
      return
1544

    
1545
    node = ninfo.name
1546
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1547

    
1548
    # checks vg existence and size > 20G
1549
    vglist = nresult.get(constants.NV_VGLIST, None)
1550
    test = not vglist
1551
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1552
    if not test:
1553
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1554
                                            constants.MIN_VG_SIZE)
1555
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1556

    
1557
    # check pv names
1558
    pvlist = nresult.get(constants.NV_PVLIST, None)
1559
    test = pvlist is None
1560
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1561
    if not test:
1562
      # check that ':' is not present in PV names, since it's a
1563
      # special character for lvcreate (denotes the range of PEs to
1564
      # use on the PV)
1565
      for _, pvname, owner_vg in pvlist:
1566
        test = ":" in pvname
1567
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1568
                 " '%s' of VG '%s'", pvname, owner_vg)
1569

    
1570
  def _VerifyNodeNetwork(self, ninfo, nresult):
1571
    """Check the node time.
1572

1573
    @type ninfo: L{objects.Node}
1574
    @param ninfo: the node to check
1575
    @param nresult: the remote results for the node
1576

1577
    """
1578
    node = ninfo.name
1579
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1580

    
1581
    test = constants.NV_NODELIST not in nresult
1582
    _ErrorIf(test, self.ENODESSH, node,
1583
             "node hasn't returned node ssh connectivity data")
1584
    if not test:
1585
      if nresult[constants.NV_NODELIST]:
1586
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1587
          _ErrorIf(True, self.ENODESSH, node,
1588
                   "ssh communication with node '%s': %s", a_node, a_msg)
1589

    
1590
    test = constants.NV_NODENETTEST not in nresult
1591
    _ErrorIf(test, self.ENODENET, node,
1592
             "node hasn't returned node tcp connectivity data")
1593
    if not test:
1594
      if nresult[constants.NV_NODENETTEST]:
1595
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1596
        for anode in nlist:
1597
          _ErrorIf(True, self.ENODENET, node,
1598
                   "tcp communication with node '%s': %s",
1599
                   anode, nresult[constants.NV_NODENETTEST][anode])
1600

    
1601
    test = constants.NV_MASTERIP not in nresult
1602
    _ErrorIf(test, self.ENODENET, node,
1603
             "node hasn't returned node master IP reachability data")
1604
    if not test:
1605
      if not nresult[constants.NV_MASTERIP]:
1606
        if node == self.master_node:
1607
          msg = "the master node cannot reach the master IP (not configured?)"
1608
        else:
1609
          msg = "cannot reach the master IP"
1610
        _ErrorIf(True, self.ENODENET, node, msg)
1611

    
1612
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1613
                      diskstatus):
1614
    """Verify an instance.
1615

1616
    This function checks to see if the required block devices are
1617
    available on the instance's node.
1618

1619
    """
1620
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1621
    node_current = instanceconfig.primary_node
1622

    
1623
    node_vol_should = {}
1624
    instanceconfig.MapLVsByNode(node_vol_should)
1625

    
1626
    for node in node_vol_should:
1627
      n_img = node_image[node]
1628
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1629
        # ignore missing volumes on offline or broken nodes
1630
        continue
1631
      for volume in node_vol_should[node]:
1632
        test = volume not in n_img.volumes
1633
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1634
                 "volume %s missing on node %s", volume, node)
1635

    
1636
    if instanceconfig.admin_up:
1637
      pri_img = node_image[node_current]
1638
      test = instance not in pri_img.instances and not pri_img.offline
1639
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1640
               "instance not running on its primary node %s",
1641
               node_current)
1642

    
1643
    for node, n_img in node_image.items():
1644
      if node != node_current:
1645
        test = instance in n_img.instances
1646
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1647
                 "instance should not run on node %s", node)
1648

    
1649
    diskdata = [(nname, success, status, idx)
1650
                for (nname, disks) in diskstatus.items()
1651
                for idx, (success, status) in enumerate(disks)]
1652

    
1653
    for nname, success, bdev_status, idx in diskdata:
1654
      # the 'ghost node' construction in Exec() ensures that we have a
1655
      # node here
1656
      snode = node_image[nname]
1657
      bad_snode = snode.ghost or snode.offline
1658
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1659
               self.EINSTANCEFAULTYDISK, instance,
1660
               "couldn't retrieve status for disk/%s on %s: %s",
1661
               idx, nname, bdev_status)
1662
      _ErrorIf((instanceconfig.admin_up and success and
1663
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1664
               self.EINSTANCEFAULTYDISK, instance,
1665
               "disk/%s on %s is faulty", idx, nname)
1666

    
1667
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1668
    """Verify if there are any unknown volumes in the cluster.
1669

1670
    The .os, .swap and backup volumes are ignored. All other volumes are
1671
    reported as unknown.
1672

1673
    @type reserved: L{ganeti.utils.FieldSet}
1674
    @param reserved: a FieldSet of reserved volume names
1675

1676
    """
1677
    for node, n_img in node_image.items():
1678
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1679
        # skip non-healthy nodes
1680
        continue
1681
      for volume in n_img.volumes:
1682
        test = ((node not in node_vol_should or
1683
                volume not in node_vol_should[node]) and
1684
                not reserved.Matches(volume))
1685
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1686
                      "volume %s is unknown", volume)
1687

    
1688
  def _VerifyOrphanInstances(self, instancelist, node_image):
1689
    """Verify the list of running instances.
1690

1691
    This checks what instances are running but unknown to the cluster.
1692

1693
    """
1694
    for node, n_img in node_image.items():
1695
      for o_inst in n_img.instances:
1696
        test = o_inst not in instancelist
1697
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1698
                      "instance %s on node %s should not exist", o_inst, node)
1699

    
1700
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1701
    """Verify N+1 Memory Resilience.
1702

1703
    Check that if one single node dies we can still start all the
1704
    instances it was primary for.
1705

1706
    """
1707
    cluster_info = self.cfg.GetClusterInfo()
1708
    for node, n_img in node_image.items():
1709
      # This code checks that every node which is now listed as
1710
      # secondary has enough memory to host all instances it is
1711
      # supposed to should a single other node in the cluster fail.
1712
      # FIXME: not ready for failover to an arbitrary node
1713
      # FIXME: does not support file-backed instances
1714
      # WARNING: we currently take into account down instances as well
1715
      # as up ones, considering that even if they're down someone
1716
      # might want to start them even in the event of a node failure.
1717
      if n_img.offline:
1718
        # we're skipping offline nodes from the N+1 warning, since
1719
        # most likely we don't have good memory infromation from them;
1720
        # we already list instances living on such nodes, and that's
1721
        # enough warning
1722
        continue
1723
      for prinode, instances in n_img.sbp.items():
1724
        needed_mem = 0
1725
        for instance in instances:
1726
          bep = cluster_info.FillBE(instance_cfg[instance])
1727
          if bep[constants.BE_AUTO_BALANCE]:
1728
            needed_mem += bep[constants.BE_MEMORY]
1729
        test = n_img.mfree < needed_mem
1730
        self._ErrorIf(test, self.ENODEN1, node,
1731
                      "not enough memory to accomodate instance failovers"
1732
                      " should node %s fail (%dMiB needed, %dMiB available)",
1733
                      prinode, needed_mem, n_img.mfree)
1734

    
1735
  @classmethod
1736
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1737
                   (files_all, files_all_opt, files_mc, files_vm)):
1738
    """Verifies file checksums collected from all nodes.
1739

1740
    @param errorif: Callback for reporting errors
1741
    @param nodeinfo: List of L{objects.Node} objects
1742
    @param master_node: Name of master node
1743
    @param all_nvinfo: RPC results
1744

1745
    """
1746
    node_names = frozenset(node.name for node in nodeinfo)
1747

    
1748
    assert master_node in node_names
1749
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1750
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1751
           "Found file listed in more than one file list"
1752

    
1753
    # Define functions determining which nodes to consider for a file
1754
    file2nodefn = dict([(filename, fn)
1755
      for (files, fn) in [(files_all, None),
1756
                          (files_all_opt, None),
1757
                          (files_mc, lambda node: (node.master_candidate or
1758
                                                   node.name == master_node)),
1759
                          (files_vm, lambda node: node.vm_capable)]
1760
      for filename in files])
1761

    
1762
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1763

    
1764
    for node in nodeinfo:
1765
      nresult = all_nvinfo[node.name]
1766

    
1767
      if nresult.fail_msg or not nresult.payload:
1768
        node_files = None
1769
      else:
1770
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
1771

    
1772
      test = not (node_files and isinstance(node_files, dict))
1773
      errorif(test, cls.ENODEFILECHECK, node.name,
1774
              "Node did not return file checksum data")
1775
      if test:
1776
        continue
1777

    
1778
      for (filename, checksum) in node_files.items():
1779
        # Check if the file should be considered for a node
1780
        fn = file2nodefn[filename]
1781
        if fn is None or fn(node):
1782
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
1783

    
1784
    for (filename, checksums) in fileinfo.items():
1785
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1786

    
1787
      # Nodes having the file
1788
      with_file = frozenset(node_name
1789
                            for nodes in fileinfo[filename].values()
1790
                            for node_name in nodes)
1791

    
1792
      # Nodes missing file
1793
      missing_file = node_names - with_file
1794

    
1795
      if filename in files_all_opt:
1796
        # All or no nodes
1797
        errorif(missing_file and missing_file != node_names,
1798
                cls.ECLUSTERFILECHECK, None,
1799
                "File %s is optional, but it must exist on all or no nodes (not"
1800
                " found on %s)",
1801
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
1802
      else:
1803
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
1804
                "File %s is missing from node(s) %s", filename,
1805
                utils.CommaJoin(utils.NiceSort(missing_file)))
1806

    
1807
      # See if there are multiple versions of the file
1808
      test = len(checksums) > 1
1809
      if test:
1810
        variants = ["variant %s on %s" %
1811
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
1812
                    for (idx, (checksum, nodes)) in
1813
                      enumerate(sorted(checksums.items()))]
1814
      else:
1815
        variants = []
1816

    
1817
      errorif(test, cls.ECLUSTERFILECHECK, None,
1818
              "File %s found with %s different checksums (%s)",
1819
              filename, len(checksums), "; ".join(variants))
1820

    
1821
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1822
                      drbd_map):
1823
    """Verifies and the node DRBD status.
1824

1825
    @type ninfo: L{objects.Node}
1826
    @param ninfo: the node to check
1827
    @param nresult: the remote results for the node
1828
    @param instanceinfo: the dict of instances
1829
    @param drbd_helper: the configured DRBD usermode helper
1830
    @param drbd_map: the DRBD map as returned by
1831
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1832

1833
    """
1834
    node = ninfo.name
1835
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1836

    
1837
    if drbd_helper:
1838
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1839
      test = (helper_result == None)
1840
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1841
               "no drbd usermode helper returned")
1842
      if helper_result:
1843
        status, payload = helper_result
1844
        test = not status
1845
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1846
                 "drbd usermode helper check unsuccessful: %s", payload)
1847
        test = status and (payload != drbd_helper)
1848
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1849
                 "wrong drbd usermode helper: %s", payload)
1850

    
1851
    # compute the DRBD minors
1852
    node_drbd = {}
1853
    for minor, instance in drbd_map[node].items():
1854
      test = instance not in instanceinfo
1855
      _ErrorIf(test, self.ECLUSTERCFG, None,
1856
               "ghost instance '%s' in temporary DRBD map", instance)
1857
        # ghost instance should not be running, but otherwise we
1858
        # don't give double warnings (both ghost instance and
1859
        # unallocated minor in use)
1860
      if test:
1861
        node_drbd[minor] = (instance, False)
1862
      else:
1863
        instance = instanceinfo[instance]
1864
        node_drbd[minor] = (instance.name, instance.admin_up)
1865

    
1866
    # and now check them
1867
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1868
    test = not isinstance(used_minors, (tuple, list))
1869
    _ErrorIf(test, self.ENODEDRBD, node,
1870
             "cannot parse drbd status file: %s", str(used_minors))
1871
    if test:
1872
      # we cannot check drbd status
1873
      return
1874

    
1875
    for minor, (iname, must_exist) in node_drbd.items():
1876
      test = minor not in used_minors and must_exist
1877
      _ErrorIf(test, self.ENODEDRBD, node,
1878
               "drbd minor %d of instance %s is not active", minor, iname)
1879
    for minor in used_minors:
1880
      test = minor not in node_drbd
1881
      _ErrorIf(test, self.ENODEDRBD, node,
1882
               "unallocated drbd minor %d is in use", minor)
1883

    
1884
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1885
    """Builds the node OS structures.
1886

1887
    @type ninfo: L{objects.Node}
1888
    @param ninfo: the node to check
1889
    @param nresult: the remote results for the node
1890
    @param nimg: the node image object
1891

1892
    """
1893
    node = ninfo.name
1894
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1895

    
1896
    remote_os = nresult.get(constants.NV_OSLIST, None)
1897
    test = (not isinstance(remote_os, list) or
1898
            not compat.all(isinstance(v, list) and len(v) == 7
1899
                           for v in remote_os))
1900

    
1901
    _ErrorIf(test, self.ENODEOS, node,
1902
             "node hasn't returned valid OS data")
1903

    
1904
    nimg.os_fail = test
1905

    
1906
    if test:
1907
      return
1908

    
1909
    os_dict = {}
1910

    
1911
    for (name, os_path, status, diagnose,
1912
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1913

    
1914
      if name not in os_dict:
1915
        os_dict[name] = []
1916

    
1917
      # parameters is a list of lists instead of list of tuples due to
1918
      # JSON lacking a real tuple type, fix it:
1919
      parameters = [tuple(v) for v in parameters]
1920
      os_dict[name].append((os_path, status, diagnose,
1921
                            set(variants), set(parameters), set(api_ver)))
1922

    
1923
    nimg.oslist = os_dict
1924

    
1925
  def _VerifyNodeOS(self, ninfo, nimg, base):
1926
    """Verifies the node OS list.
1927

1928
    @type ninfo: L{objects.Node}
1929
    @param ninfo: the node to check
1930
    @param nimg: the node image object
1931
    @param base: the 'template' node we match against (e.g. from the master)
1932

1933
    """
1934
    node = ninfo.name
1935
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1936

    
1937
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1938

    
1939
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
1940
    for os_name, os_data in nimg.oslist.items():
1941
      assert os_data, "Empty OS status for OS %s?!" % os_name
1942
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1943
      _ErrorIf(not f_status, self.ENODEOS, node,
1944
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1945
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1946
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1947
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1948
      # this will catched in backend too
1949
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1950
               and not f_var, self.ENODEOS, node,
1951
               "OS %s with API at least %d does not declare any variant",
1952
               os_name, constants.OS_API_V15)
1953
      # comparisons with the 'base' image
1954
      test = os_name not in base.oslist
1955
      _ErrorIf(test, self.ENODEOS, node,
1956
               "Extra OS %s not present on reference node (%s)",
1957
               os_name, base.name)
1958
      if test:
1959
        continue
1960
      assert base.oslist[os_name], "Base node has empty OS status?"
1961
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1962
      if not b_status:
1963
        # base OS is invalid, skipping
1964
        continue
1965
      for kind, a, b in [("API version", f_api, b_api),
1966
                         ("variants list", f_var, b_var),
1967
                         ("parameters", beautify_params(f_param),
1968
                          beautify_params(b_param))]:
1969
        _ErrorIf(a != b, self.ENODEOS, node,
1970
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
1971
                 kind, os_name, base.name,
1972
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
1973

    
1974
    # check any missing OSes
1975
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1976
    _ErrorIf(missing, self.ENODEOS, node,
1977
             "OSes present on reference node %s but missing on this node: %s",
1978
             base.name, utils.CommaJoin(missing))
1979

    
1980
  def _VerifyOob(self, ninfo, nresult):
1981
    """Verifies out of band functionality of a node.
1982

1983
    @type ninfo: L{objects.Node}
1984
    @param ninfo: the node to check
1985
    @param nresult: the remote results for the node
1986

1987
    """
1988
    node = ninfo.name
1989
    # We just have to verify the paths on master and/or master candidates
1990
    # as the oob helper is invoked on the master
1991
    if ((ninfo.master_candidate or ninfo.master_capable) and
1992
        constants.NV_OOB_PATHS in nresult):
1993
      for path_result in nresult[constants.NV_OOB_PATHS]:
1994
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1995

    
1996
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1997
    """Verifies and updates the node volume data.
1998

1999
    This function will update a L{NodeImage}'s internal structures
2000
    with data from the remote call.
2001

2002
    @type ninfo: L{objects.Node}
2003
    @param ninfo: the node to check
2004
    @param nresult: the remote results for the node
2005
    @param nimg: the node image object
2006
    @param vg_name: the configured VG name
2007

2008
    """
2009
    node = ninfo.name
2010
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2011

    
2012
    nimg.lvm_fail = True
2013
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2014
    if vg_name is None:
2015
      pass
2016
    elif isinstance(lvdata, basestring):
2017
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2018
               utils.SafeEncode(lvdata))
2019
    elif not isinstance(lvdata, dict):
2020
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2021
    else:
2022
      nimg.volumes = lvdata
2023
      nimg.lvm_fail = False
2024

    
2025
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2026
    """Verifies and updates the node instance list.
2027

2028
    If the listing was successful, then updates this node's instance
2029
    list. Otherwise, it marks the RPC call as failed for the instance
2030
    list key.
2031

2032
    @type ninfo: L{objects.Node}
2033
    @param ninfo: the node to check
2034
    @param nresult: the remote results for the node
2035
    @param nimg: the node image object
2036

2037
    """
2038
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2039
    test = not isinstance(idata, list)
2040
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2041
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2042
    if test:
2043
      nimg.hyp_fail = True
2044
    else:
2045
      nimg.instances = idata
2046

    
2047
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2048
    """Verifies and computes a node information map
2049

2050
    @type ninfo: L{objects.Node}
2051
    @param ninfo: the node to check
2052
    @param nresult: the remote results for the node
2053
    @param nimg: the node image object
2054
    @param vg_name: the configured VG name
2055

2056
    """
2057
    node = ninfo.name
2058
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2059

    
2060
    # try to read free memory (from the hypervisor)
2061
    hv_info = nresult.get(constants.NV_HVINFO, None)
2062
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2063
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2064
    if not test:
2065
      try:
2066
        nimg.mfree = int(hv_info["memory_free"])
2067
      except (ValueError, TypeError):
2068
        _ErrorIf(True, self.ENODERPC, node,
2069
                 "node returned invalid nodeinfo, check hypervisor")
2070

    
2071
    # FIXME: devise a free space model for file based instances as well
2072
    if vg_name is not None:
2073
      test = (constants.NV_VGLIST not in nresult or
2074
              vg_name not in nresult[constants.NV_VGLIST])
2075
      _ErrorIf(test, self.ENODELVM, node,
2076
               "node didn't return data for the volume group '%s'"
2077
               " - it is either missing or broken", vg_name)
2078
      if not test:
2079
        try:
2080
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2081
        except (ValueError, TypeError):
2082
          _ErrorIf(True, self.ENODERPC, node,
2083
                   "node returned invalid LVM info, check LVM status")
2084

    
2085
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2086
    """Gets per-disk status information for all instances.
2087

2088
    @type nodelist: list of strings
2089
    @param nodelist: Node names
2090
    @type node_image: dict of (name, L{objects.Node})
2091
    @param node_image: Node objects
2092
    @type instanceinfo: dict of (name, L{objects.Instance})
2093
    @param instanceinfo: Instance objects
2094
    @rtype: {instance: {node: [(succes, payload)]}}
2095
    @return: a dictionary of per-instance dictionaries with nodes as
2096
        keys and disk information as values; the disk information is a
2097
        list of tuples (success, payload)
2098

2099
    """
2100
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2101

    
2102
    node_disks = {}
2103
    node_disks_devonly = {}
2104
    diskless_instances = set()
2105
    diskless = constants.DT_DISKLESS
2106

    
2107
    for nname in nodelist:
2108
      node_instances = list(itertools.chain(node_image[nname].pinst,
2109
                                            node_image[nname].sinst))
2110
      diskless_instances.update(inst for inst in node_instances
2111
                                if instanceinfo[inst].disk_template == diskless)
2112
      disks = [(inst, disk)
2113
               for inst in node_instances
2114
               for disk in instanceinfo[inst].disks]
2115

    
2116
      if not disks:
2117
        # No need to collect data
2118
        continue
2119

    
2120
      node_disks[nname] = disks
2121

    
2122
      # Creating copies as SetDiskID below will modify the objects and that can
2123
      # lead to incorrect data returned from nodes
2124
      devonly = [dev.Copy() for (_, dev) in disks]
2125

    
2126
      for dev in devonly:
2127
        self.cfg.SetDiskID(dev, nname)
2128

    
2129
      node_disks_devonly[nname] = devonly
2130

    
2131
    assert len(node_disks) == len(node_disks_devonly)
2132

    
2133
    # Collect data from all nodes with disks
2134
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2135
                                                          node_disks_devonly)
2136

    
2137
    assert len(result) == len(node_disks)
2138

    
2139
    instdisk = {}
2140

    
2141
    for (nname, nres) in result.items():
2142
      disks = node_disks[nname]
2143

    
2144
      if nres.offline:
2145
        # No data from this node
2146
        data = len(disks) * [(False, "node offline")]
2147
      else:
2148
        msg = nres.fail_msg
2149
        _ErrorIf(msg, self.ENODERPC, nname,
2150
                 "while getting disk information: %s", msg)
2151
        if msg:
2152
          # No data from this node
2153
          data = len(disks) * [(False, msg)]
2154
        else:
2155
          data = []
2156
          for idx, i in enumerate(nres.payload):
2157
            if isinstance(i, (tuple, list)) and len(i) == 2:
2158
              data.append(i)
2159
            else:
2160
              logging.warning("Invalid result from node %s, entry %d: %s",
2161
                              nname, idx, i)
2162
              data.append((False, "Invalid result from the remote node"))
2163

    
2164
      for ((inst, _), status) in zip(disks, data):
2165
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2166

    
2167
    # Add empty entries for diskless instances.
2168
    for inst in diskless_instances:
2169
      assert inst not in instdisk
2170
      instdisk[inst] = {}
2171

    
2172
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2173
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2174
                      compat.all(isinstance(s, (tuple, list)) and
2175
                                 len(s) == 2 for s in statuses)
2176
                      for inst, nnames in instdisk.items()
2177
                      for nname, statuses in nnames.items())
2178
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2179

    
2180
    return instdisk
2181

    
2182
  def _VerifyHVP(self, hvp_data):
2183
    """Verifies locally the syntax of the hypervisor parameters.
2184

2185
    """
2186
    for item, hv_name, hv_params in hvp_data:
2187
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2188
             (item, hv_name))
2189
      try:
2190
        hv_class = hypervisor.GetHypervisor(hv_name)
2191
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2192
        hv_class.CheckParameterSyntax(hv_params)
2193
      except errors.GenericError, err:
2194
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2195

    
2196
  def BuildHooksEnv(self):
2197
    """Build hooks env.
2198

2199
    Cluster-Verify hooks just ran in the post phase and their failure makes
2200
    the output be logged in the verify output and the verification to fail.
2201

2202
    """
2203
    cfg = self.cfg
2204

    
2205
    env = {
2206
      "CLUSTER_TAGS": " ".join(cfg.GetClusterInfo().GetTags())
2207
      }
2208

    
2209
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2210
               for node in cfg.GetAllNodesInfo().values())
2211

    
2212
    return env
2213

    
2214
  def BuildHooksNodes(self):
2215
    """Build hooks nodes.
2216

2217
    """
2218
    return ([], self.cfg.GetNodeList())
2219

    
2220
  def Exec(self, feedback_fn):
2221
    """Verify integrity of cluster, performing various test on nodes.
2222

2223
    """
2224
    # This method has too many local variables. pylint: disable-msg=R0914
2225
    self.bad = False
2226
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2227
    verbose = self.op.verbose
2228
    self._feedback_fn = feedback_fn
2229
    feedback_fn("* Verifying global settings")
2230
    for msg in self.cfg.VerifyConfig():
2231
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2232

    
2233
    # Check the cluster certificates
2234
    for cert_filename in constants.ALL_CERT_FILES:
2235
      (errcode, msg) = _VerifyCertificate(cert_filename)
2236
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2237

    
2238
    vg_name = self.cfg.GetVGName()
2239
    drbd_helper = self.cfg.GetDRBDHelper()
2240
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2241
    cluster = self.cfg.GetClusterInfo()
2242
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2243
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2244
    nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2245
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2246
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2247
                        for iname in instancelist)
2248
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2249
    i_non_redundant = [] # Non redundant instances
2250
    i_non_a_balanced = [] # Non auto-balanced instances
2251
    n_offline = 0 # Count of offline nodes
2252
    n_drained = 0 # Count of nodes being drained
2253
    node_vol_should = {}
2254

    
2255
    # FIXME: verify OS list
2256

    
2257
    # File verification
2258
    filemap = _ComputeAncillaryFiles(cluster, False)
2259

    
2260
    # do local checksums
2261
    master_node = self.master_node = self.cfg.GetMasterNode()
2262
    master_ip = self.cfg.GetMasterIP()
2263

    
2264
    # Compute the set of hypervisor parameters
2265
    hvp_data = []
2266
    for hv_name in hypervisors:
2267
      hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2268
    for os_name, os_hvp in cluster.os_hvp.items():
2269
      for hv_name, hv_params in os_hvp.items():
2270
        if not hv_params:
2271
          continue
2272
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2273
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
2274
    # TODO: collapse identical parameter values in a single one
2275
    for instance in instanceinfo.values():
2276
      if not instance.hvparams:
2277
        continue
2278
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2279
                       cluster.FillHV(instance)))
2280
    # and verify them locally
2281
    self._VerifyHVP(hvp_data)
2282

    
2283
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2284
    node_verify_param = {
2285
      constants.NV_FILELIST:
2286
        utils.UniqueSequence(filename
2287
                             for files in filemap
2288
                             for filename in files),
2289
      constants.NV_NODELIST: [node.name for node in nodeinfo
2290
                              if not node.offline],
2291
      constants.NV_HYPERVISOR: hypervisors,
2292
      constants.NV_HVPARAMS: hvp_data,
2293
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2294
                                  node.secondary_ip) for node in nodeinfo
2295
                                 if not node.offline],
2296
      constants.NV_INSTANCELIST: hypervisors,
2297
      constants.NV_VERSION: None,
2298
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2299
      constants.NV_NODESETUP: None,
2300
      constants.NV_TIME: None,
2301
      constants.NV_MASTERIP: (master_node, master_ip),
2302
      constants.NV_OSLIST: None,
2303
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2304
      }
2305

    
2306
    if vg_name is not None:
2307
      node_verify_param[constants.NV_VGLIST] = None
2308
      node_verify_param[constants.NV_LVLIST] = vg_name
2309
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2310
      node_verify_param[constants.NV_DRBDLIST] = None
2311

    
2312
    if drbd_helper:
2313
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2314

    
2315
    # Build our expected cluster state
2316
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2317
                                                 name=node.name,
2318
                                                 vm_capable=node.vm_capable))
2319
                      for node in nodeinfo)
2320

    
2321
    # Gather OOB paths
2322
    oob_paths = []
2323
    for node in nodeinfo:
2324
      path = _SupportsOob(self.cfg, node)
2325
      if path and path not in oob_paths:
2326
        oob_paths.append(path)
2327

    
2328
    if oob_paths:
2329
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2330

    
2331
    for instance in instancelist:
2332
      inst_config = instanceinfo[instance]
2333

    
2334
      for nname in inst_config.all_nodes:
2335
        if nname not in node_image:
2336
          # ghost node
2337
          gnode = self.NodeImage(name=nname)
2338
          gnode.ghost = True
2339
          node_image[nname] = gnode
2340

    
2341
      inst_config.MapLVsByNode(node_vol_should)
2342

    
2343
      pnode = inst_config.primary_node
2344
      node_image[pnode].pinst.append(instance)
2345

    
2346
      for snode in inst_config.secondary_nodes:
2347
        nimg = node_image[snode]
2348
        nimg.sinst.append(instance)
2349
        if pnode not in nimg.sbp:
2350
          nimg.sbp[pnode] = []
2351
        nimg.sbp[pnode].append(instance)
2352

    
2353
    # At this point, we have the in-memory data structures complete,
2354
    # except for the runtime information, which we'll gather next
2355

    
2356
    # Due to the way our RPC system works, exact response times cannot be
2357
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2358
    # time before and after executing the request, we can at least have a time
2359
    # window.
2360
    nvinfo_starttime = time.time()
2361
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2362
                                           self.cfg.GetClusterName())
2363
    nvinfo_endtime = time.time()
2364

    
2365
    all_drbd_map = self.cfg.ComputeDRBDMap()
2366

    
2367
    feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2368
    instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2369

    
2370
    feedback_fn("* Verifying configuration file consistency")
2371
    self._VerifyFiles(_ErrorIf, nodeinfo, master_node, all_nvinfo, filemap)
2372

    
2373
    feedback_fn("* Verifying node status")
2374

    
2375
    refos_img = None
2376

    
2377
    for node_i in nodeinfo:
2378
      node = node_i.name
2379
      nimg = node_image[node]
2380

    
2381
      if node_i.offline:
2382
        if verbose:
2383
          feedback_fn("* Skipping offline node %s" % (node,))
2384
        n_offline += 1
2385
        continue
2386

    
2387
      if node == master_node:
2388
        ntype = "master"
2389
      elif node_i.master_candidate:
2390
        ntype = "master candidate"
2391
      elif node_i.drained:
2392
        ntype = "drained"
2393
        n_drained += 1
2394
      else:
2395
        ntype = "regular"
2396
      if verbose:
2397
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2398

    
2399
      msg = all_nvinfo[node].fail_msg
2400
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2401
      if msg:
2402
        nimg.rpc_fail = True
2403
        continue
2404

    
2405
      nresult = all_nvinfo[node].payload
2406

    
2407
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2408
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2409
      self._VerifyNodeNetwork(node_i, nresult)
2410
      self._VerifyOob(node_i, nresult)
2411

    
2412
      if nimg.vm_capable:
2413
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2414
        self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2415
                             all_drbd_map)
2416

    
2417
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2418
        self._UpdateNodeInstances(node_i, nresult, nimg)
2419
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2420
        self._UpdateNodeOS(node_i, nresult, nimg)
2421
        if not nimg.os_fail:
2422
          if refos_img is None:
2423
            refos_img = nimg
2424
          self._VerifyNodeOS(node_i, nimg, refos_img)
2425

    
2426
    feedback_fn("* Verifying instance status")
2427
    for instance in instancelist:
2428
      if verbose:
2429
        feedback_fn("* Verifying instance %s" % instance)
2430
      inst_config = instanceinfo[instance]
2431
      self._VerifyInstance(instance, inst_config, node_image,
2432
                           instdisk[instance])
2433
      inst_nodes_offline = []
2434

    
2435
      pnode = inst_config.primary_node
2436
      pnode_img = node_image[pnode]
2437
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2438
               self.ENODERPC, pnode, "instance %s, connection to"
2439
               " primary node failed", instance)
2440

    
2441
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2442
               self.EINSTANCEBADNODE, instance,
2443
               "instance is marked as running and lives on offline node %s",
2444
               inst_config.primary_node)
2445

    
2446
      # If the instance is non-redundant we cannot survive losing its primary
2447
      # node, so we are not N+1 compliant. On the other hand we have no disk
2448
      # templates with more than one secondary so that situation is not well
2449
      # supported either.
2450
      # FIXME: does not support file-backed instances
2451
      if not inst_config.secondary_nodes:
2452
        i_non_redundant.append(instance)
2453

    
2454
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2455
               instance, "instance has multiple secondary nodes: %s",
2456
               utils.CommaJoin(inst_config.secondary_nodes),
2457
               code=self.ETYPE_WARNING)
2458

    
2459
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2460
        pnode = inst_config.primary_node
2461
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2462
        instance_groups = {}
2463

    
2464
        for node in instance_nodes:
2465
          instance_groups.setdefault(nodeinfo_byname[node].group,
2466
                                     []).append(node)
2467

    
2468
        pretty_list = [
2469
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2470
          # Sort so that we always list the primary node first.
2471
          for group, nodes in sorted(instance_groups.items(),
2472
                                     key=lambda (_, nodes): pnode in nodes,
2473
                                     reverse=True)]
2474

    
2475
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2476
                      instance, "instance has primary and secondary nodes in"
2477
                      " different groups: %s", utils.CommaJoin(pretty_list),
2478
                      code=self.ETYPE_WARNING)
2479

    
2480
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2481
        i_non_a_balanced.append(instance)
2482

    
2483
      for snode in inst_config.secondary_nodes:
2484
        s_img = node_image[snode]
2485
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2486
                 "instance %s, connection to secondary node failed", instance)
2487

    
2488
        if s_img.offline:
2489
          inst_nodes_offline.append(snode)
2490

    
2491
      # warn that the instance lives on offline nodes
2492
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2493
               "instance has offline secondary node(s) %s",
2494
               utils.CommaJoin(inst_nodes_offline))
2495
      # ... or ghost/non-vm_capable nodes
2496
      for node in inst_config.all_nodes:
2497
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2498
                 "instance lives on ghost node %s", node)
2499
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2500
                 instance, "instance lives on non-vm_capable node %s", node)
2501

    
2502
    feedback_fn("* Verifying orphan volumes")
2503
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2504
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2505

    
2506
    feedback_fn("* Verifying orphan instances")
2507
    self._VerifyOrphanInstances(instancelist, node_image)
2508

    
2509
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2510
      feedback_fn("* Verifying N+1 Memory redundancy")
2511
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2512

    
2513
    feedback_fn("* Other Notes")
2514
    if i_non_redundant:
2515
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2516
                  % len(i_non_redundant))
2517

    
2518
    if i_non_a_balanced:
2519
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2520
                  % len(i_non_a_balanced))
2521

    
2522
    if n_offline:
2523
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2524

    
2525
    if n_drained:
2526
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2527

    
2528
    return not self.bad
2529

    
2530
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2531
    """Analyze the post-hooks' result
2532

2533
    This method analyses the hook result, handles it, and sends some
2534
    nicely-formatted feedback back to the user.
2535

2536
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2537
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2538
    @param hooks_results: the results of the multi-node hooks rpc call
2539
    @param feedback_fn: function used send feedback back to the caller
2540
    @param lu_result: previous Exec result
2541
    @return: the new Exec result, based on the previous result
2542
        and hook results
2543

2544
    """
2545
    # We only really run POST phase hooks, and are only interested in
2546
    # their results
2547
    if phase == constants.HOOKS_PHASE_POST:
2548
      # Used to change hooks' output to proper indentation
2549
      feedback_fn("* Hooks Results")
2550
      assert hooks_results, "invalid result from hooks"
2551

    
2552
      for node_name in hooks_results:
2553
        res = hooks_results[node_name]
2554
        msg = res.fail_msg
2555
        test = msg and not res.offline
2556
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2557
                      "Communication failure in hooks execution: %s", msg)
2558
        if res.offline or msg:
2559
          # No need to investigate payload if node is offline or gave an error.
2560
          # override manually lu_result here as _ErrorIf only
2561
          # overrides self.bad
2562
          lu_result = 1
2563
          continue
2564
        for script, hkr, output in res.payload:
2565
          test = hkr == constants.HKR_FAIL
2566
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2567
                        "Script %s failed, output:", script)
2568
          if test:
2569
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2570
            feedback_fn("%s" % output)
2571
            lu_result = 0
2572

    
2573
      return lu_result
2574

    
2575

    
2576
class LUClusterVerifyDisks(NoHooksLU):
2577
  """Verifies the cluster disks status.
2578

2579
  """
2580
  REQ_BGL = False
2581

    
2582
  def ExpandNames(self):
2583
    self.needed_locks = {
2584
      locking.LEVEL_NODE: locking.ALL_SET,
2585
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2586
    }
2587
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2588

    
2589
  def Exec(self, feedback_fn):
2590
    """Verify integrity of cluster disks.
2591

2592
    @rtype: tuple of three items
2593
    @return: a tuple of (dict of node-to-node_error, list of instances
2594
        which need activate-disks, dict of instance: (node, volume) for
2595
        missing volumes
2596

2597
    """
2598
    result = res_nodes, res_instances, res_missing = {}, [], {}
2599

    
2600
    nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2601
    instances = self.cfg.GetAllInstancesInfo().values()
2602

    
2603
    nv_dict = {}
2604
    for inst in instances:
2605
      inst_lvs = {}
2606
      if not inst.admin_up:
2607
        continue
2608
      inst.MapLVsByNode(inst_lvs)
2609
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2610
      for node, vol_list in inst_lvs.iteritems():
2611
        for vol in vol_list:
2612
          nv_dict[(node, vol)] = inst
2613

    
2614
    if not nv_dict:
2615
      return result
2616

    
2617
    node_lvs = self.rpc.call_lv_list(nodes, [])
2618
    for node, node_res in node_lvs.items():
2619
      if node_res.offline:
2620
        continue
2621
      msg = node_res.fail_msg
2622
      if msg:
2623
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2624
        res_nodes[node] = msg
2625
        continue
2626

    
2627
      lvs = node_res.payload
2628
      for lv_name, (_, _, lv_online) in lvs.items():
2629
        inst = nv_dict.pop((node, lv_name), None)
2630
        if (not lv_online and inst is not None
2631
            and inst.name not in res_instances):
2632
          res_instances.append(inst.name)
2633

    
2634
    # any leftover items in nv_dict are missing LVs, let's arrange the
2635
    # data better
2636
    for key, inst in nv_dict.iteritems():
2637
      if inst.name not in res_missing:
2638
        res_missing[inst.name] = []
2639
      res_missing[inst.name].append(key)
2640

    
2641
    return result
2642

    
2643

    
2644
class LUClusterRepairDiskSizes(NoHooksLU):
2645
  """Verifies the cluster disks sizes.
2646

2647
  """
2648
  REQ_BGL = False
2649

    
2650
  def ExpandNames(self):
2651
    if self.op.instances:
2652
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
2653
      self.needed_locks = {
2654
        locking.LEVEL_NODE: [],
2655
        locking.LEVEL_INSTANCE: self.wanted_names,
2656
        }
2657
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2658
    else:
2659
      self.wanted_names = None
2660
      self.needed_locks = {
2661
        locking.LEVEL_NODE: locking.ALL_SET,
2662
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2663
        }
2664
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2665

    
2666
  def DeclareLocks(self, level):
2667
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2668
      self._LockInstancesNodes(primary_only=True)
2669

    
2670
  def CheckPrereq(self):
2671
    """Check prerequisites.
2672

2673
    This only checks the optional instance list against the existing names.
2674

2675
    """
2676
    if self.wanted_names is None:
2677
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
2678

    
2679
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2680
                             in self.wanted_names]
2681

    
2682
  def _EnsureChildSizes(self, disk):
2683
    """Ensure children of the disk have the needed disk size.
2684

2685
    This is valid mainly for DRBD8 and fixes an issue where the
2686
    children have smaller disk size.
2687

2688
    @param disk: an L{ganeti.objects.Disk} object
2689

2690
    """
2691
    if disk.dev_type == constants.LD_DRBD8:
2692
      assert disk.children, "Empty children for DRBD8?"
2693
      fchild = disk.children[0]
2694
      mismatch = fchild.size < disk.size
2695
      if mismatch:
2696
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2697
                     fchild.size, disk.size)
2698
        fchild.size = disk.size
2699

    
2700
      # and we recurse on this child only, not on the metadev
2701
      return self._EnsureChildSizes(fchild) or mismatch
2702
    else:
2703
      return False
2704

    
2705
  def Exec(self, feedback_fn):
2706
    """Verify the size of cluster disks.
2707

2708
    """
2709
    # TODO: check child disks too
2710
    # TODO: check differences in size between primary/secondary nodes
2711
    per_node_disks = {}
2712
    for instance in self.wanted_instances:
2713
      pnode = instance.primary_node
2714
      if pnode not in per_node_disks:
2715
        per_node_disks[pnode] = []
2716
      for idx, disk in enumerate(instance.disks):
2717
        per_node_disks[pnode].append((instance, idx, disk))
2718

    
2719
    changed = []
2720
    for node, dskl in per_node_disks.items():
2721
      newl = [v[2].Copy() for v in dskl]
2722
      for dsk in newl:
2723
        self.cfg.SetDiskID(dsk, node)
2724
      result = self.rpc.call_blockdev_getsize(node, newl)
2725
      if result.fail_msg:
2726
        self.LogWarning("Failure in blockdev_getsize call to node"
2727
                        " %s, ignoring", node)
2728
        continue
2729
      if len(result.payload) != len(dskl):
2730
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
2731
                        " result.payload=%s", node, len(dskl), result.payload)
2732
        self.LogWarning("Invalid result from node %s, ignoring node results",
2733
                        node)
2734
        continue
2735
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
2736
        if size is None:
2737
          self.LogWarning("Disk %d of instance %s did not return size"
2738
                          " information, ignoring", idx, instance.name)
2739
          continue
2740
        if not isinstance(size, (int, long)):
2741
          self.LogWarning("Disk %d of instance %s did not return valid"
2742
                          " size information, ignoring", idx, instance.name)
2743
          continue
2744
        size = size >> 20
2745
        if size != disk.size:
2746
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2747
                       " correcting: recorded %d, actual %d", idx,
2748
                       instance.name, disk.size, size)
2749
          disk.size = size
2750
          self.cfg.Update(instance, feedback_fn)
2751
          changed.append((instance.name, idx, size))
2752
        if self._EnsureChildSizes(disk):
2753
          self.cfg.Update(instance, feedback_fn)
2754
          changed.append((instance.name, idx, disk.size))
2755
    return changed
2756

    
2757

    
2758
class LUClusterRename(LogicalUnit):
2759
  """Rename the cluster.
2760

2761
  """
2762
  HPATH = "cluster-rename"
2763
  HTYPE = constants.HTYPE_CLUSTER
2764

    
2765
  def BuildHooksEnv(self):
2766
    """Build hooks env.
2767

2768
    """
2769
    return {
2770
      "OP_TARGET": self.cfg.GetClusterName(),
2771
      "NEW_NAME": self.op.name,
2772
      }
2773

    
2774
  def BuildHooksNodes(self):
2775
    """Build hooks nodes.
2776

2777
    """
2778
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2779

    
2780
  def CheckPrereq(self):
2781
    """Verify that the passed name is a valid one.
2782

2783
    """
2784
    hostname = netutils.GetHostname(name=self.op.name,
2785
                                    family=self.cfg.GetPrimaryIPFamily())
2786

    
2787
    new_name = hostname.name
2788
    self.ip = new_ip = hostname.ip
2789
    old_name = self.cfg.GetClusterName()
2790
    old_ip = self.cfg.GetMasterIP()
2791
    if new_name == old_name and new_ip == old_ip:
2792
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2793
                                 " cluster has changed",
2794
                                 errors.ECODE_INVAL)
2795
    if new_ip != old_ip:
2796
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2797
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2798
                                   " reachable on the network" %
2799
                                   new_ip, errors.ECODE_NOTUNIQUE)
2800

    
2801
    self.op.name = new_name
2802

    
2803
  def Exec(self, feedback_fn):
2804
    """Rename the cluster.
2805

2806
    """
2807
    clustername = self.op.name
2808
    ip = self.ip
2809

    
2810
    # shutdown the master IP
2811
    master = self.cfg.GetMasterNode()
2812
    result = self.rpc.call_node_stop_master(master, False)
2813
    result.Raise("Could not disable the master role")
2814

    
2815
    try:
2816
      cluster = self.cfg.GetClusterInfo()
2817
      cluster.cluster_name = clustername
2818
      cluster.master_ip = ip
2819
      self.cfg.Update(cluster, feedback_fn)
2820

    
2821
      # update the known hosts file
2822
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2823
      node_list = self.cfg.GetOnlineNodeList()
2824
      try:
2825
        node_list.remove(master)
2826
      except ValueError:
2827
        pass
2828
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2829
    finally:
2830
      result = self.rpc.call_node_start_master(master, False, False)
2831
      msg = result.fail_msg
2832
      if msg:
2833
        self.LogWarning("Could not re-enable the master role on"
2834
                        " the master, please restart manually: %s", msg)
2835

    
2836
    return clustername
2837

    
2838

    
2839
class LUClusterSetParams(LogicalUnit):
2840
  """Change the parameters of the cluster.
2841

2842
  """
2843
  HPATH = "cluster-modify"
2844
  HTYPE = constants.HTYPE_CLUSTER
2845
  REQ_BGL = False
2846

    
2847
  def CheckArguments(self):
2848
    """Check parameters
2849

2850
    """
2851
    if self.op.uid_pool:
2852
      uidpool.CheckUidPool(self.op.uid_pool)
2853

    
2854
    if self.op.add_uids:
2855
      uidpool.CheckUidPool(self.op.add_uids)
2856

    
2857
    if self.op.remove_uids:
2858
      uidpool.CheckUidPool(self.op.remove_uids)
2859

    
2860
  def ExpandNames(self):
2861
    # FIXME: in the future maybe other cluster params won't require checking on
2862
    # all nodes to be modified.
2863
    self.needed_locks = {
2864
      locking.LEVEL_NODE: locking.ALL_SET,
2865
    }
2866
    self.share_locks[locking.LEVEL_NODE] = 1
2867

    
2868
  def BuildHooksEnv(self):
2869
    """Build hooks env.
2870

2871
    """
2872
    return {
2873
      "OP_TARGET": self.cfg.GetClusterName(),
2874
      "NEW_VG_NAME": self.op.vg_name,
2875
      }
2876

    
2877
  def BuildHooksNodes(self):
2878
    """Build hooks nodes.
2879

2880
    """
2881
    mn = self.cfg.GetMasterNode()
2882
    return ([mn], [mn])
2883

    
2884
  def CheckPrereq(self):
2885
    """Check prerequisites.
2886

2887
    This checks whether the given params don't conflict and
2888
    if the given volume group is valid.
2889

2890
    """
2891
    if self.op.vg_name is not None and not self.op.vg_name:
2892
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2893
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2894
                                   " instances exist", errors.ECODE_INVAL)
2895

    
2896
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2897
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2898
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2899
                                   " drbd-based instances exist",
2900
                                   errors.ECODE_INVAL)
2901

    
2902
    node_list = self.glm.list_owned(locking.LEVEL_NODE)
2903

    
2904
    # if vg_name not None, checks given volume group on all nodes
2905
    if self.op.vg_name:
2906
      vglist = self.rpc.call_vg_list(node_list)
2907
      for node in node_list:
2908
        msg = vglist[node].fail_msg
2909
        if msg:
2910
          # ignoring down node
2911
          self.LogWarning("Error while gathering data on node %s"
2912
                          " (ignoring node): %s", node, msg)
2913
          continue
2914
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2915
                                              self.op.vg_name,
2916
                                              constants.MIN_VG_SIZE)
2917
        if vgstatus:
2918
          raise errors.OpPrereqError("Error on node '%s': %s" %
2919
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2920

    
2921
    if self.op.drbd_helper:
2922
      # checks given drbd helper on all nodes
2923
      helpers = self.rpc.call_drbd_helper(node_list)
2924
      for node in node_list:
2925
        ninfo = self.cfg.GetNodeInfo(node)
2926
        if ninfo.offline:
2927
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2928
          continue
2929
        msg = helpers[node].fail_msg
2930
        if msg:
2931
          raise errors.OpPrereqError("Error checking drbd helper on node"
2932
                                     " '%s': %s" % (node, msg),
2933
                                     errors.ECODE_ENVIRON)
2934
        node_helper = helpers[node].payload
2935
        if node_helper != self.op.drbd_helper:
2936
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2937
                                     (node, node_helper), errors.ECODE_ENVIRON)
2938

    
2939
    self.cluster = cluster = self.cfg.GetClusterInfo()
2940
    # validate params changes
2941
    if self.op.beparams:
2942
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2943
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2944

    
2945
    if self.op.ndparams:
2946
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2947
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2948

    
2949
      # TODO: we need a more general way to handle resetting
2950
      # cluster-level parameters to default values
2951
      if self.new_ndparams["oob_program"] == "":
2952
        self.new_ndparams["oob_program"] = \
2953
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
2954

    
2955
    if self.op.nicparams:
2956
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2957
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2958
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2959
      nic_errors = []
2960

    
2961
      # check all instances for consistency
2962
      for instance in self.cfg.GetAllInstancesInfo().values():
2963
        for nic_idx, nic in enumerate(instance.nics):
2964
          params_copy = copy.deepcopy(nic.nicparams)
2965
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2966

    
2967
          # check parameter syntax
2968
          try:
2969
            objects.NIC.CheckParameterSyntax(params_filled)
2970
          except errors.ConfigurationError, err:
2971
            nic_errors.append("Instance %s, nic/%d: %s" %
2972
                              (instance.name, nic_idx, err))
2973

    
2974
          # if we're moving instances to routed, check that they have an ip
2975
          target_mode = params_filled[constants.NIC_MODE]
2976
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2977
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2978
                              (instance.name, nic_idx))
2979
      if nic_errors:
2980
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2981
                                   "\n".join(nic_errors))
2982

    
2983
    # hypervisor list/parameters
2984
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2985
    if self.op.hvparams:
2986
      for hv_name, hv_dict in self.op.hvparams.items():
2987
        if hv_name not in self.new_hvparams:
2988
          self.new_hvparams[hv_name] = hv_dict
2989
        else:
2990
          self.new_hvparams[hv_name].update(hv_dict)
2991

    
2992
    # os hypervisor parameters
2993
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2994
    if self.op.os_hvp:
2995
      for os_name, hvs in self.op.os_hvp.items():
2996
        if os_name not in self.new_os_hvp:
2997
          self.new_os_hvp[os_name] = hvs
2998
        else:
2999
          for hv_name, hv_dict in hvs.items():
3000
            if hv_name not in self.new_os_hvp[os_name]:
3001
              self.new_os_hvp[os_name][hv_name] = hv_dict
3002
            else:
3003
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3004

    
3005
    # os parameters
3006
    self.new_osp = objects.FillDict(cluster.osparams, {})
3007
    if self.op.osparams:
3008
      for os_name, osp in self.op.osparams.items():
3009
        if os_name not in self.new_osp:
3010
          self.new_osp[os_name] = {}
3011

    
3012
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3013
                                                  use_none=True)
3014

    
3015
        if not self.new_osp[os_name]:
3016
          # we removed all parameters
3017
          del self.new_osp[os_name]
3018
        else:
3019
          # check the parameter validity (remote check)
3020
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3021
                         os_name, self.new_osp[os_name])
3022

    
3023
    # changes to the hypervisor list
3024
    if self.op.enabled_hypervisors is not None:
3025
      self.hv_list = self.op.enabled_hypervisors
3026
      for hv in self.hv_list:
3027
        # if the hypervisor doesn't already exist in the cluster
3028
        # hvparams, we initialize it to empty, and then (in both
3029
        # cases) we make sure to fill the defaults, as we might not
3030
        # have a complete defaults list if the hypervisor wasn't
3031
        # enabled before
3032
        if hv not in new_hvp:
3033
          new_hvp[hv] = {}
3034
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3035
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3036
    else:
3037
      self.hv_list = cluster.enabled_hypervisors
3038

    
3039
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3040
      # either the enabled list has changed, or the parameters have, validate
3041
      for hv_name, hv_params in self.new_hvparams.items():
3042
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3043
            (self.op.enabled_hypervisors and
3044
             hv_name in self.op.enabled_hypervisors)):
3045
          # either this is a new hypervisor, or its parameters have changed
3046
          hv_class = hypervisor.GetHypervisor(hv_name)
3047
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3048
          hv_class.CheckParameterSyntax(hv_params)
3049
          _CheckHVParams(self, node_list, hv_name, hv_params)
3050

    
3051
    if self.op.os_hvp:
3052
      # no need to check any newly-enabled hypervisors, since the
3053
      # defaults have already been checked in the above code-block
3054
      for os_name, os_hvp in self.new_os_hvp.items():
3055
        for hv_name, hv_params in os_hvp.items():
3056
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3057
          # we need to fill in the new os_hvp on top of the actual hv_p
3058
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3059
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3060
          hv_class = hypervisor.GetHypervisor(hv_name)
3061
          hv_class.CheckParameterSyntax(new_osp)
3062
          _CheckHVParams(self, node_list, hv_name, new_osp)
3063

    
3064
    if self.op.default_iallocator:
3065
      alloc_script = utils.FindFile(self.op.default_iallocator,
3066
                                    constants.IALLOCATOR_SEARCH_PATH,
3067
                                    os.path.isfile)
3068
      if alloc_script is None:
3069
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3070
                                   " specified" % self.op.default_iallocator,
3071
                                   errors.ECODE_INVAL)
3072

    
3073
  def Exec(self, feedback_fn):
3074
    """Change the parameters of the cluster.
3075

3076
    """
3077
    if self.op.vg_name is not None:
3078
      new_volume = self.op.vg_name
3079
      if not new_volume:
3080
        new_volume = None
3081
      if new_volume != self.cfg.GetVGName():
3082
        self.cfg.SetVGName(new_volume)
3083
      else:
3084
        feedback_fn("Cluster LVM configuration already in desired"
3085
                    " state, not changing")
3086
    if self.op.drbd_helper is not None:
3087
      new_helper = self.op.drbd_helper
3088
      if not new_helper:
3089
        new_helper = None
3090
      if new_helper != self.cfg.GetDRBDHelper():
3091
        self.cfg.SetDRBDHelper(new_helper)
3092
      else:
3093
        feedback_fn("Cluster DRBD helper already in desired state,"
3094
                    " not changing")
3095
    if self.op.hvparams:
3096
      self.cluster.hvparams = self.new_hvparams
3097
    if self.op.os_hvp:
3098
      self.cluster.os_hvp = self.new_os_hvp
3099
    if self.op.enabled_hypervisors is not None:
3100
      self.cluster.hvparams = self.new_hvparams
3101
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3102
    if self.op.beparams:
3103
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3104
    if self.op.nicparams:
3105
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3106
    if self.op.osparams:
3107
      self.cluster.osparams = self.new_osp
3108
    if self.op.ndparams:
3109
      self.cluster.ndparams = self.new_ndparams
3110

    
3111
    if self.op.candidate_pool_size is not None:
3112
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3113
      # we need to update the pool size here, otherwise the save will fail
3114
      _AdjustCandidatePool(self, [])
3115

    
3116
    if self.op.maintain_node_health is not None:
3117
      self.cluster.maintain_node_health = self.op.maintain_node_health
3118

    
3119
    if self.op.prealloc_wipe_disks is not None:
3120
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3121

    
3122
    if self.op.add_uids is not None:
3123
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3124

    
3125
    if self.op.remove_uids is not None:
3126
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3127

    
3128
    if self.op.uid_pool is not None:
3129
      self.cluster.uid_pool = self.op.uid_pool
3130

    
3131
    if self.op.default_iallocator is not None:
3132
      self.cluster.default_iallocator = self.op.default_iallocator
3133

    
3134
    if self.op.reserved_lvs is not None:
3135
      self.cluster.reserved_lvs = self.op.reserved_lvs
3136

    
3137
    def helper_os(aname, mods, desc):
3138
      desc += " OS list"
3139
      lst = getattr(self.cluster, aname)
3140
      for key, val in mods:
3141
        if key == constants.DDM_ADD:
3142
          if val in lst:
3143
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3144
          else:
3145
            lst.append(val)
3146
        elif key == constants.DDM_REMOVE:
3147
          if val in lst:
3148
            lst.remove(val)
3149
          else:
3150
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3151
        else:
3152
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3153

    
3154
    if self.op.hidden_os:
3155
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3156

    
3157
    if self.op.blacklisted_os:
3158
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3159

    
3160
    if self.op.master_netdev:
3161
      master = self.cfg.GetMasterNode()
3162
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3163
                  self.cluster.master_netdev)
3164
      result = self.rpc.call_node_stop_master(master, False)
3165
      result.Raise("Could not disable the master ip")
3166
      feedback_fn("Changing master_netdev from %s to %s" %
3167
                  (self.cluster.master_netdev, self.op.master_netdev))
3168
      self.cluster.master_netdev = self.op.master_netdev
3169

    
3170
    self.cfg.Update(self.cluster, feedback_fn)
3171

    
3172
    if self.op.master_netdev:
3173
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3174
                  self.op.master_netdev)
3175
      result = self.rpc.call_node_start_master(master, False, False)
3176
      if result.fail_msg:
3177
        self.LogWarning("Could not re-enable the master ip on"
3178
                        " the master, please restart manually: %s",
3179
                        result.fail_msg)
3180

    
3181

    
3182
def _UploadHelper(lu, nodes, fname):
3183
  """Helper for uploading a file and showing warnings.
3184

3185
  """
3186
  if os.path.exists(fname):
3187
    result = lu.rpc.call_upload_file(nodes, fname)
3188
    for to_node, to_result in result.items():
3189
      msg = to_result.fail_msg
3190
      if msg:
3191
        msg = ("Copy of file %s to node %s failed: %s" %
3192
               (fname, to_node, msg))
3193
        lu.proc.LogWarning(msg)
3194

    
3195

    
3196
def _ComputeAncillaryFiles(cluster, redist):
3197
  """Compute files external to Ganeti which need to be consistent.
3198

3199
  @type redist: boolean
3200
  @param redist: Whether to include files which need to be redistributed
3201

3202
  """
3203
  # Compute files for all nodes
3204
  files_all = set([
3205
    constants.SSH_KNOWN_HOSTS_FILE,
3206
    constants.CONFD_HMAC_KEY,
3207
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3208
    ])
3209

    
3210
  if not redist:
3211
    files_all.update(constants.ALL_CERT_FILES)
3212
    files_all.update(ssconf.SimpleStore().GetFileList())
3213

    
3214
  if cluster.modify_etc_hosts:
3215
    files_all.add(constants.ETC_HOSTS)
3216

    
3217
  # Files which must either exist on all nodes or on none
3218
  files_all_opt = set([
3219
    constants.RAPI_USERS_FILE,
3220
    ])
3221

    
3222
  # Files which should only be on master candidates
3223
  files_mc = set()
3224
  if not redist:
3225
    files_mc.add(constants.CLUSTER_CONF_FILE)
3226

    
3227
  # Files which should only be on VM-capable nodes
3228
  files_vm = set(filename
3229
    for hv_name in cluster.enabled_hypervisors
3230
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3231

    
3232
  # Filenames must be unique
3233
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3234
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3235
         "Found file listed in more than one file list"
3236

    
3237
  return (files_all, files_all_opt, files_mc, files_vm)
3238

    
3239

    
3240
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3241
  """Distribute additional files which are part of the cluster configuration.
3242

3243
  ConfigWriter takes care of distributing the config and ssconf files, but
3244
  there are more files which should be distributed to all nodes. This function
3245
  makes sure those are copied.
3246

3247
  @param lu: calling logical unit
3248
  @param additional_nodes: list of nodes not in the config to distribute to
3249
  @type additional_vm: boolean
3250
  @param additional_vm: whether the additional nodes are vm-capable or not
3251

3252
  """
3253
  # Gather target nodes
3254
  cluster = lu.cfg.GetClusterInfo()
3255
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3256

    
3257
  online_nodes = lu.cfg.GetOnlineNodeList()
3258
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3259

    
3260
  if additional_nodes is not None:
3261
    online_nodes.extend(additional_nodes)
3262
    if additional_vm:
3263
      vm_nodes.extend(additional_nodes)
3264

    
3265
  # Never distribute to master node
3266
  for nodelist in [online_nodes, vm_nodes]:
3267
    if master_info.name in nodelist:
3268
      nodelist.remove(master_info.name)
3269

    
3270
  # Gather file lists
3271
  (files_all, files_all_opt, files_mc, files_vm) = \
3272
    _ComputeAncillaryFiles(cluster, True)
3273

    
3274
  # Never re-distribute configuration file from here
3275
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3276
              constants.CLUSTER_CONF_FILE in files_vm)
3277
  assert not files_mc, "Master candidates not handled in this function"
3278

    
3279
  filemap = [
3280
    (online_nodes, files_all),
3281
    (online_nodes, files_all_opt),
3282
    (vm_nodes, files_vm),
3283
    ]
3284

    
3285
  # Upload the files
3286
  for (node_list, files) in filemap:
3287
    for fname in files:
3288
      _UploadHelper(lu, node_list, fname)
3289

    
3290

    
3291
class LUClusterRedistConf(NoHooksLU):
3292
  """Force the redistribution of cluster configuration.
3293

3294
  This is a very simple LU.
3295

3296
  """
3297
  REQ_BGL = False
3298

    
3299
  def ExpandNames(self):
3300
    self.needed_locks = {
3301
      locking.LEVEL_NODE: locking.ALL_SET,
3302
    }
3303
    self.share_locks[locking.LEVEL_NODE] = 1
3304

    
3305
  def Exec(self, feedback_fn):
3306
    """Redistribute the configuration.
3307

3308
    """
3309
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3310
    _RedistributeAncillaryFiles(self)
3311

    
3312

    
3313
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3314
  """Sleep and poll for an instance's disk to sync.
3315

3316
  """
3317
  if not instance.disks or disks is not None and not disks:
3318
    return True
3319

    
3320
  disks = _ExpandCheckDisks(instance, disks)
3321

    
3322
  if not oneshot:
3323
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3324

    
3325
  node = instance.primary_node
3326

    
3327
  for dev in disks:
3328
    lu.cfg.SetDiskID(dev, node)
3329

    
3330
  # TODO: Convert to utils.Retry
3331

    
3332
  retries = 0
3333
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3334
  while True:
3335
    max_time = 0
3336
    done = True
3337
    cumul_degraded = False
3338
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3339
    msg = rstats.fail_msg
3340
    if msg:
3341
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3342
      retries += 1
3343
      if retries >= 10:
3344
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3345
                                 " aborting." % node)
3346
      time.sleep(6)
3347
      continue
3348
    rstats = rstats.payload
3349
    retries = 0
3350
    for i, mstat in enumerate(rstats):
3351
      if mstat is None:
3352
        lu.LogWarning("Can't compute data for node %s/%s",
3353
                           node, disks[i].iv_name)
3354
        continue
3355

    
3356
      cumul_degraded = (cumul_degraded or
3357
                        (mstat.is_degraded and mstat.sync_percent is None))
3358
      if mstat.sync_percent is not None:
3359
        done = False
3360
        if mstat.estimated_time is not None:
3361
          rem_time = ("%s remaining (estimated)" %
3362
                      utils.FormatSeconds(mstat.estimated_time))
3363
          max_time = mstat.estimated_time
3364
        else:
3365
          rem_time = "no time estimate"
3366
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3367
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3368

    
3369
    # if we're done but degraded, let's do a few small retries, to
3370
    # make sure we see a stable and not transient situation; therefore
3371
    # we force restart of the loop
3372
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3373
      logging.info("Degraded disks found, %d retries left", degr_retries)
3374
      degr_retries -= 1
3375
      time.sleep(1)
3376
      continue
3377

    
3378
    if done or oneshot:
3379
      break
3380

    
3381
    time.sleep(min(60, max_time))
3382

    
3383
  if done:
3384
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3385
  return not cumul_degraded
3386

    
3387

    
3388
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3389
  """Check that mirrors are not degraded.
3390

3391
  The ldisk parameter, if True, will change the test from the
3392
  is_degraded attribute (which represents overall non-ok status for
3393
  the device(s)) to the ldisk (representing the local storage status).
3394

3395
  """
3396
  lu.cfg.SetDiskID(dev, node)
3397

    
3398
  result = True
3399

    
3400
  if on_primary or dev.AssembleOnSecondary():
3401
    rstats = lu.rpc.call_blockdev_find(node, dev)
3402
    msg = rstats.fail_msg
3403
    if msg:
3404
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3405
      result = False
3406
    elif not rstats.payload:
3407
      lu.LogWarning("Can't find disk on node %s", node)
3408
      result = False
3409
    else:
3410
      if ldisk:
3411
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3412
      else:
3413
        result = result and not rstats.payload.is_degraded
3414

    
3415
  if dev.children:
3416
    for child in dev.children:
3417
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3418

    
3419
  return result
3420

    
3421

    
3422
class LUOobCommand(NoHooksLU):
3423
  """Logical unit for OOB handling.
3424

3425
  """
3426
  REG_BGL = False
3427
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3428

    
3429
  def ExpandNames(self):
3430
    """Gather locks we need.
3431

3432
    """
3433
    if self.op.node_names:
3434
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3435
      lock_names = self.op.node_names
3436
    else:
3437
      lock_names = locking.ALL_SET
3438

    
3439
    self.needed_locks = {
3440
      locking.LEVEL_NODE: lock_names,
3441
      }
3442

    
3443
  def CheckPrereq(self):
3444
    """Check prerequisites.
3445

3446
    This checks:
3447
     - the node exists in the configuration
3448
     - OOB is supported
3449

3450
    Any errors are signaled by raising errors.OpPrereqError.
3451

3452
    """
3453
    self.nodes = []
3454
    self.master_node = self.cfg.GetMasterNode()
3455

    
3456
    assert self.op.power_delay >= 0.0
3457

    
3458
    if self.op.node_names:
3459
      if (self.op.command in self._SKIP_MASTER and
3460
          self.master_node in self.op.node_names):
3461
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3462
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3463

    
3464
        if master_oob_handler:
3465
          additional_text = ("run '%s %s %s' if you want to operate on the"
3466
                             " master regardless") % (master_oob_handler,
3467
                                                      self.op.command,
3468
                                                      self.master_node)
3469
        else:
3470
          additional_text = "it does not support out-of-band operations"
3471

    
3472
        raise errors.OpPrereqError(("Operating on the master node %s is not"
3473
                                    " allowed for %s; %s") %
3474
                                   (self.master_node, self.op.command,
3475
                                    additional_text), errors.ECODE_INVAL)
3476
    else:
3477
      self.op.node_names = self.cfg.GetNodeList()
3478
      if self.op.command in self._SKIP_MASTER:
3479
        self.op.node_names.remove(self.master_node)
3480

    
3481
    if self.op.command in self._SKIP_MASTER:
3482
      assert self.master_node not in self.op.node_names
3483

    
3484
    for node_name in self.op.node_names:
3485
      node = self.cfg.GetNodeInfo(node_name)
3486

    
3487
      if node is None:
3488
        raise errors.OpPrereqError("Node %s not found" % node_name,
3489
                                   errors.ECODE_NOENT)
3490
      else:
3491
        self.nodes.append(node)
3492

    
3493
      if (not self.op.ignore_status and
3494
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3495
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3496
                                    " not marked offline") % node_name,
3497
                                   errors.ECODE_STATE)
3498

    
3499
  def Exec(self, feedback_fn):
3500
    """Execute OOB and return result if we expect any.
3501

3502
    """
3503
    master_node = self.master_node
3504
    ret = []
3505

    
3506
    for idx, node in enumerate(utils.NiceSort(self.nodes,
3507
                                              key=lambda node: node.name)):
3508
      node_entry = [(constants.RS_NORMAL, node.name)]
3509
      ret.append(node_entry)
3510

    
3511
      oob_program = _SupportsOob(self.cfg, node)
3512

    
3513
      if not oob_program:
3514
        node_entry.append((constants.RS_UNAVAIL, None))
3515
        continue
3516

    
3517
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3518
                   self.op.command, oob_program, node.name)
3519
      result = self.rpc.call_run_oob(master_node, oob_program,
3520
                                     self.op.command, node.name,
3521
                                     self.op.timeout)
3522

    
3523
      if result.fail_msg:
3524
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3525
                        node.name, result.fail_msg)
3526
        node_entry.append((constants.RS_NODATA, None))
3527
      else:
3528
        try:
3529
          self._CheckPayload(result)
3530
        except errors.OpExecError, err:
3531
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
3532
                          node.name, err)
3533
          node_entry.append((constants.RS_NODATA, None))
3534
        else:
3535
          if self.op.command == constants.OOB_HEALTH:
3536
            # For health we should log important events
3537
            for item, status in result.payload:
3538
              if status in [constants.OOB_STATUS_WARNING,
3539
                            constants.OOB_STATUS_CRITICAL]:
3540
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
3541
                                item, node.name, status)
3542

    
3543
          if self.op.command == constants.OOB_POWER_ON:
3544
            node.powered = True
3545
          elif self.op.command == constants.OOB_POWER_OFF:
3546
            node.powered = False
3547
          elif self.op.command == constants.OOB_POWER_STATUS:
3548
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3549
            if powered != node.powered:
3550
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3551
                               " match actual power state (%s)"), node.powered,
3552
                              node.name, powered)
3553

    
3554
          # For configuration changing commands we should update the node
3555
          if self.op.command in (constants.OOB_POWER_ON,
3556
                                 constants.OOB_POWER_OFF):
3557
            self.cfg.Update(node, feedback_fn)
3558

    
3559
          node_entry.append((constants.RS_NORMAL, result.payload))
3560

    
3561
          if (self.op.command == constants.OOB_POWER_ON and
3562
              idx < len(self.nodes) - 1):
3563
            time.sleep(self.op.power_delay)
3564

    
3565
    return ret
3566

    
3567
  def _CheckPayload(self, result):
3568
    """Checks if the payload is valid.
3569

3570
    @param result: RPC result
3571
    @raises errors.OpExecError: If payload is not valid
3572

3573
    """
3574
    errs = []
3575
    if self.op.command == constants.OOB_HEALTH:
3576
      if not isinstance(result.payload, list):
3577
        errs.append("command 'health' is expected to return a list but got %s" %
3578
                    type(result.payload))
3579
      else:
3580
        for item, status in result.payload:
3581
          if status not in constants.OOB_STATUSES:
3582
            errs.append("health item '%s' has invalid status '%s'" %
3583
                        (item, status))
3584

    
3585
    if self.op.command == constants.OOB_POWER_STATUS:
3586
      if not isinstance(result.payload, dict):
3587
        errs.append("power-status is expected to return a dict but got %s" %
3588
                    type(result.payload))
3589

    
3590
    if self.op.command in [
3591
        constants.OOB_POWER_ON,
3592
        constants.OOB_POWER_OFF,
3593
        constants.OOB_POWER_CYCLE,
3594
        ]:
3595
      if result.payload is not None:
3596
        errs.append("%s is expected to not return payload but got '%s'" %
3597
                    (self.op.command, result.payload))
3598

    
3599
    if errs:
3600
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3601
                               utils.CommaJoin(errs))
3602

    
3603
class _OsQuery(_QueryBase):
3604
  FIELDS = query.OS_FIELDS
3605

    
3606
  def ExpandNames(self, lu):
3607
    # Lock all nodes in shared mode
3608
    # Temporary removal of locks, should be reverted later
3609
    # TODO: reintroduce locks when they are lighter-weight
3610
    lu.needed_locks = {}
3611
    #self.share_locks[locking.LEVEL_NODE] = 1
3612
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3613

    
3614
    # The following variables interact with _QueryBase._GetNames
3615
    if self.names:
3616
      self.wanted = self.names
3617
    else:
3618
      self.wanted = locking.ALL_SET
3619

    
3620
    self.do_locking = self.use_locking
3621

    
3622
  def DeclareLocks(self, lu, level):
3623
    pass
3624

    
3625
  @staticmethod
3626
  def _DiagnoseByOS(rlist):
3627
    """Remaps a per-node return list into an a per-os per-node dictionary
3628

3629
    @param rlist: a map with node names as keys and OS objects as values
3630

3631
    @rtype: dict
3632
    @return: a dictionary with osnames as keys and as value another
3633
        map, with nodes as keys and tuples of (path, status, diagnose,
3634
        variants, parameters, api_versions) as values, eg::
3635

3636
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3637
                                     (/srv/..., False, "invalid api")],
3638
                           "node2": [(/srv/..., True, "", [], [])]}
3639
          }
3640

3641
    """
3642
    all_os = {}
3643
    # we build here the list of nodes that didn't fail the RPC (at RPC
3644
    # level), so that nodes with a non-responding node daemon don't
3645
    # make all OSes invalid
3646
    good_nodes = [node_name for node_name in rlist
3647
                  if not rlist[node_name].fail_msg]
3648
    for node_name, nr in rlist.items():
3649
      if nr.fail_msg or not nr.payload:
3650
        continue
3651
      for (name, path, status, diagnose, variants,
3652
           params, api_versions) in nr.payload:
3653
        if name not in all_os:
3654
          # build a list of nodes for this os containing empty lists
3655
          # for each node in node_list
3656
          all_os[name] = {}
3657
          for nname in good_nodes:
3658
            all_os[name][nname] = []
3659
        # convert params from [name, help] to (name, help)
3660
        params = [tuple(v) for v in params]
3661
        all_os[name][node_name].append((path, status, diagnose,
3662
                                        variants, params, api_versions))
3663
    return all_os
3664

    
3665
  def _GetQueryData(self, lu):
3666
    """Computes the list of nodes and their attributes.
3667

3668
    """
3669
    # Locking is not used
3670
    assert not (compat.any(lu.glm.is_owned(level)
3671
                           for level in locking.LEVELS
3672
                           if level != locking.LEVEL_CLUSTER) or
3673
                self.do_locking or self.use_locking)
3674

    
3675
    valid_nodes = [node.name
3676
                   for node in lu.cfg.GetAllNodesInfo().values()
3677
                   if not node.offline and node.vm_capable]
3678
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3679
    cluster = lu.cfg.GetClusterInfo()
3680

    
3681
    data = {}
3682

    
3683
    for (os_name, os_data) in pol.items():
3684
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3685
                          hidden=(os_name in cluster.hidden_os),
3686
                          blacklisted=(os_name in cluster.blacklisted_os))
3687

    
3688
      variants = set()
3689
      parameters = set()
3690
      api_versions = set()
3691

    
3692
      for idx, osl in enumerate(os_data.values()):
3693
        info.valid = bool(info.valid and osl and osl[0][1])
3694
        if not info.valid:
3695
          break
3696

    
3697
        (node_variants, node_params, node_api) = osl[0][3:6]
3698
        if idx == 0:
3699
          # First entry
3700
          variants.update(node_variants)
3701
          parameters.update(node_params)
3702
          api_versions.update(node_api)
3703
        else:
3704
          # Filter out inconsistent values
3705
          variants.intersection_update(node_variants)
3706
          parameters.intersection_update(node_params)
3707
          api_versions.intersection_update(node_api)
3708

    
3709
      info.variants = list(variants)
3710
      info.parameters = list(parameters)
3711
      info.api_versions = list(api_versions)
3712

    
3713
      data[os_name] = info
3714

    
3715
    # Prepare data in requested order
3716
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3717
            if name in data]
3718

    
3719

    
3720
class LUOsDiagnose(NoHooksLU):
3721
  """Logical unit for OS diagnose/query.
3722

3723
  """
3724
  REQ_BGL = False
3725

    
3726
  @staticmethod
3727
  def _BuildFilter(fields, names):
3728
    """Builds a filter for querying OSes.
3729

3730
    """
3731
    name_filter = qlang.MakeSimpleFilter("name", names)
3732

    
3733
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3734
    # respective field is not requested
3735
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3736
                     for fname in ["hidden", "blacklisted"]
3737
                     if fname not in fields]
3738
    if "valid" not in fields:
3739
      status_filter.append([qlang.OP_TRUE, "valid"])
3740

    
3741
    if status_filter:
3742
      status_filter.insert(0, qlang.OP_AND)
3743
    else:
3744
      status_filter = None
3745

    
3746
    if name_filter and status_filter:
3747
      return [qlang.OP_AND, name_filter, status_filter]
3748
    elif name_filter:
3749
      return name_filter
3750
    else:
3751
      return status_filter
3752

    
3753
  def CheckArguments(self):
3754
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3755
                       self.op.output_fields, False)
3756

    
3757
  def ExpandNames(self):
3758
    self.oq.ExpandNames(self)
3759

    
3760
  def Exec(self, feedback_fn):
3761
    return self.oq.OldStyleQuery(self)
3762

    
3763

    
3764
class LUNodeRemove(LogicalUnit):
3765
  """Logical unit for removing a node.
3766

3767
  """
3768
  HPATH = "node-remove"
3769
  HTYPE = constants.HTYPE_NODE
3770

    
3771
  def BuildHooksEnv(self):
3772
    """Build hooks env.
3773

3774
    This doesn't run on the target node in the pre phase as a failed
3775
    node would then be impossible to remove.
3776

3777
    """
3778
    return {
3779
      "OP_TARGET": self.op.node_name,
3780
      "NODE_NAME": self.op.node_name,
3781
      }
3782

    
3783
  def BuildHooksNodes(self):
3784
    """Build hooks nodes.
3785

3786
    """
3787
    all_nodes = self.cfg.GetNodeList()
3788
    try:
3789
      all_nodes.remove(self.op.node_name)
3790
    except ValueError:
3791
      logging.warning("Node '%s', which is about to be removed, was not found"
3792
                      " in the list of all nodes", self.op.node_name)
3793
    return (all_nodes, all_nodes)
3794

    
3795
  def CheckPrereq(self):
3796
    """Check prerequisites.
3797

3798
    This checks:
3799
     - the node exists in the configuration
3800
     - it does not have primary or secondary instances
3801
     - it's not the master
3802

3803
    Any errors are signaled by raising errors.OpPrereqError.
3804

3805
    """
3806
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3807
    node = self.cfg.GetNodeInfo(self.op.node_name)
3808
    assert node is not None
3809

    
3810
    instance_list = self.cfg.GetInstanceList()
3811

    
3812
    masternode = self.cfg.GetMasterNode()
3813
    if node.name == masternode:
3814
      raise errors.OpPrereqError("Node is the master node, failover to another"
3815
                                 " node is required", errors.ECODE_INVAL)
3816

    
3817
    for instance_name in instance_list:
3818
      instance = self.cfg.GetInstanceInfo(instance_name)
3819
      if node.name in instance.all_nodes:
3820
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3821
                                   " please remove first" % instance_name,
3822
                                   errors.ECODE_INVAL)
3823
    self.op.node_name = node.name
3824
    self.node = node
3825

    
3826
  def Exec(self, feedback_fn):
3827
    """Removes the node from the cluster.
3828

3829
    """
3830
    node = self.node
3831
    logging.info("Stopping the node daemon and removing configs from node %s",
3832
                 node.name)
3833

    
3834
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3835

    
3836
    # Promote nodes to master candidate as needed
3837
    _AdjustCandidatePool(self, exceptions=[node.name])
3838
    self.context.RemoveNode(node.name)
3839

    
3840
    # Run post hooks on the node before it's removed
3841
    _RunPostHook(self, node.name)
3842

    
3843
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3844
    msg = result.fail_msg
3845
    if msg:
3846
      self.LogWarning("Errors encountered on the remote node while leaving"
3847
                      " the cluster: %s", msg)
3848

    
3849
    # Remove node from our /etc/hosts
3850
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3851
      master_node = self.cfg.GetMasterNode()
3852
      result = self.rpc.call_etc_hosts_modify(master_node,
3853
                                              constants.ETC_HOSTS_REMOVE,
3854
                                              node.name, None)
3855
      result.Raise("Can't update hosts file with new host data")
3856
      _RedistributeAncillaryFiles(self)
3857

    
3858

    
3859
class _NodeQuery(_QueryBase):
3860
  FIELDS = query.NODE_FIELDS
3861

    
3862
  def ExpandNames(self, lu):
3863
    lu.needed_locks = {}
3864
    lu.share_locks[locking.LEVEL_NODE] = 1
3865

    
3866
    if self.names:
3867
      self.wanted = _GetWantedNodes(lu, self.names)
3868
    else:
3869
      self.wanted = locking.ALL_SET
3870

    
3871
    self.do_locking = (self.use_locking and
3872
                       query.NQ_LIVE in self.requested_data)
3873

    
3874
    if self.do_locking:
3875
      # if we don't request only static fields, we need to lock the nodes
3876
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3877

    
3878
  def DeclareLocks(self, lu, level):
3879
    pass
3880

    
3881
  def _GetQueryData(self, lu):
3882
    """Computes the list of nodes and their attributes.
3883

3884
    """
3885
    all_info = lu.cfg.GetAllNodesInfo()
3886

    
3887
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3888

    
3889
    # Gather data as requested
3890
    if query.NQ_LIVE in self.requested_data:
3891
      # filter out non-vm_capable nodes
3892
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3893

    
3894
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3895
                                        lu.cfg.GetHypervisorType())
3896
      live_data = dict((name, nresult.payload)
3897
                       for (name, nresult) in node_data.items()
3898
                       if not nresult.fail_msg and nresult.payload)
3899
    else:
3900
      live_data = None
3901

    
3902
    if query.NQ_INST in self.requested_data:
3903
      node_to_primary = dict([(name, set()) for name in nodenames])
3904
      node_to_secondary = dict([(name, set()) for name in nodenames])
3905

    
3906
      inst_data = lu.cfg.GetAllInstancesInfo()
3907

    
3908
      for inst in inst_data.values():
3909
        if inst.primary_node in node_to_primary:
3910
          node_to_primary[inst.primary_node].add(inst.name)
3911
        for secnode in inst.secondary_nodes:
3912
          if secnode in node_to_secondary:
3913
            node_to_secondary[secnode].add(inst.name)
3914
    else:
3915
      node_to_primary = None
3916
      node_to_secondary = None
3917

    
3918
    if query.NQ_OOB in self.requested_data:
3919
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3920
                         for name, node in all_info.iteritems())
3921
    else:
3922
      oob_support = None
3923

    
3924
    if query.NQ_GROUP in self.requested_data:
3925
      groups = lu.cfg.GetAllNodeGroupsInfo()
3926
    else:
3927
      groups = {}
3928

    
3929
    return query.NodeQueryData([all_info[name] for name in nodenames],
3930
                               live_data, lu.cfg.GetMasterNode(),
3931
                               node_to_primary, node_to_secondary, groups,
3932
                               oob_support, lu.cfg.GetClusterInfo())
3933

    
3934

    
3935
class LUNodeQuery(NoHooksLU):
3936
  """Logical unit for querying nodes.
3937

3938
  """
3939
  # pylint: disable-msg=W0142
3940
  REQ_BGL = False
3941

    
3942
  def CheckArguments(self):
3943
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3944
                         self.op.output_fields, self.op.use_locking)
3945

    
3946
  def ExpandNames(self):
3947
    self.nq.ExpandNames(self)
3948

    
3949
  def Exec(self, feedback_fn):
3950
    return self.nq.OldStyleQuery(self)
3951

    
3952

    
3953
class LUNodeQueryvols(NoHooksLU):
3954
  """Logical unit for getting volumes on node(s).
3955

3956
  """
3957
  REQ_BGL = False
3958
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3959
  _FIELDS_STATIC = utils.FieldSet("node")
3960

    
3961
  def CheckArguments(self):
3962
    _CheckOutputFields(static=self._FIELDS_STATIC,
3963
                       dynamic=self._FIELDS_DYNAMIC,
3964
                       selected=self.op.output_fields)
3965

    
3966
  def ExpandNames(self):
3967
    self.needed_locks = {}
3968
    self.share_locks[locking.LEVEL_NODE] = 1
3969
    if not self.op.nodes:
3970
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3971
    else:
3972
      self.needed_locks[locking.LEVEL_NODE] = \
3973
        _GetWantedNodes(self, self.op.nodes)
3974

    
3975
  def Exec(self, feedback_fn):
3976
    """Computes the list of nodes and their attributes.
3977

3978
    """
3979
    nodenames = self.glm.list_owned(locking.LEVEL_NODE)
3980
    volumes = self.rpc.call_node_volumes(nodenames)
3981

    
3982
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3983
             in self.cfg.GetInstanceList()]
3984

    
3985
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3986

    
3987
    output = []
3988
    for node in nodenames:
3989
      nresult = volumes[node]
3990
      if nresult.offline:
3991
        continue
3992
      msg = nresult.fail_msg
3993
      if msg:
3994
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3995
        continue
3996

    
3997
      node_vols = nresult.payload[:]
3998
      node_vols.sort(key=lambda vol: vol['dev'])
3999

    
4000
      for vol in node_vols:
4001
        node_output = []
4002
        for field in self.op.output_fields:
4003
          if field == "node":
4004
            val = node
4005
          elif field == "phys":
4006
            val = vol['dev']
4007
          elif field == "vg":
4008
            val = vol['vg']
4009
          elif field == "name":
4010
            val = vol['name']
4011
          elif field == "size":
4012
            val = int(float(vol['size']))
4013
          elif field == "instance":
4014
            for inst in ilist:
4015
              if node not in lv_by_node[inst]:
4016
                continue
4017
              if vol['name'] in lv_by_node[inst][node]:
4018
                val = inst.name
4019
                break
4020
            else:
4021
              val = '-'
4022
          else:
4023
            raise errors.ParameterError(field)
4024
          node_output.append(str(val))
4025

    
4026
        output.append(node_output)
4027

    
4028
    return output
4029

    
4030

    
4031
class LUNodeQueryStorage(NoHooksLU):
4032
  """Logical unit for getting information on storage units on node(s).
4033

4034
  """
4035
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4036
  REQ_BGL = False
4037

    
4038
  def CheckArguments(self):
4039
    _CheckOutputFields(static=self._FIELDS_STATIC,
4040
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4041
                       selected=self.op.output_fields)
4042

    
4043
  def ExpandNames(self):
4044
    self.needed_locks = {}
4045
    self.share_locks[locking.LEVEL_NODE] = 1
4046

    
4047
    if self.op.nodes:
4048
      self.needed_locks[locking.LEVEL_NODE] = \
4049
        _GetWantedNodes(self, self.op.nodes)
4050
    else:
4051
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4052

    
4053
  def Exec(self, feedback_fn):
4054
    """Computes the list of nodes and their attributes.
4055

4056
    """
4057
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4058

    
4059
    # Always get name to sort by
4060
    if constants.SF_NAME in self.op.output_fields:
4061
      fields = self.op.output_fields[:]
4062
    else:
4063
      fields = [constants.SF_NAME] + self.op.output_fields
4064

    
4065
    # Never ask for node or type as it's only known to the LU
4066
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4067
      while extra in fields:
4068
        fields.remove(extra)
4069

    
4070
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4071
    name_idx = field_idx[constants.SF_NAME]
4072

    
4073
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4074
    data = self.rpc.call_storage_list(self.nodes,
4075
                                      self.op.storage_type, st_args,
4076
                                      self.op.name, fields)
4077

    
4078
    result = []
4079

    
4080
    for node in utils.NiceSort(self.nodes):
4081
      nresult = data[node]
4082
      if nresult.offline:
4083
        continue
4084

    
4085
      msg = nresult.fail_msg
4086
      if msg:
4087
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4088
        continue
4089

    
4090
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4091

    
4092
      for name in utils.NiceSort(rows.keys()):
4093
        row = rows[name]
4094

    
4095
        out = []
4096

    
4097
        for field in self.op.output_fields:
4098
          if field == constants.SF_NODE:
4099
            val = node
4100
          elif field == constants.SF_TYPE:
4101
            val = self.op.storage_type
4102
          elif field in field_idx:
4103
            val = row[field_idx[field]]
4104
          else:
4105
            raise errors.ParameterError(field)
4106

    
4107
          out.append(val)
4108

    
4109
        result.append(out)
4110

    
4111
    return result
4112

    
4113

    
4114
class _InstanceQuery(_QueryBase):
4115
  FIELDS = query.INSTANCE_FIELDS
4116

    
4117
  def ExpandNames(self, lu):
4118
    lu.needed_locks = {}
4119
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
4120
    lu.share_locks[locking.LEVEL_NODE] = 1
4121

    
4122
    if self.names:
4123
      self.wanted = _GetWantedInstances(lu, self.names)
4124
    else:
4125
      self.wanted = locking.ALL_SET
4126

    
4127
    self.do_locking = (self.use_locking and
4128
                       query.IQ_LIVE in self.requested_data)
4129
    if self.do_locking:
4130
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4131
      lu.needed_locks[locking.LEVEL_NODE] = []
4132
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4133

    
4134
  def DeclareLocks(self, lu, level):
4135
    if level == locking.LEVEL_NODE and self.do_locking:
4136
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
4137

    
4138
  def _GetQueryData(self, lu):
4139
    """Computes the list of instances and their attributes.
4140

4141
    """
4142
    cluster = lu.cfg.GetClusterInfo()
4143
    all_info = lu.cfg.GetAllInstancesInfo()
4144

    
4145
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4146

    
4147
    instance_list = [all_info[name] for name in instance_names]
4148
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4149
                                        for inst in instance_list)))
4150
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4151
    bad_nodes = []
4152
    offline_nodes = []
4153
    wrongnode_inst = set()
4154

    
4155
    # Gather data as requested
4156
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4157
      live_data = {}
4158
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4159
      for name in nodes:
4160
        result = node_data[name]
4161
        if result.offline:
4162
          # offline nodes will be in both lists
4163
          assert result.fail_msg
4164
          offline_nodes.append(name)
4165
        if result.fail_msg:
4166
          bad_nodes.append(name)
4167
        elif result.payload:
4168
          for inst in result.payload:
4169
            if inst in all_info:
4170
              if all_info[inst].primary_node == name:
4171
                live_data.update(result.payload)
4172
              else:
4173
                wrongnode_inst.add(inst)
4174
            else:
4175
              # orphan instance; we don't list it here as we don't
4176
              # handle this case yet in the output of instance listing
4177
              logging.warning("Orphan instance '%s' found on node %s",
4178
                              inst, name)
4179
        # else no instance is alive
4180
    else:
4181
      live_data = {}
4182

    
4183
    if query.IQ_DISKUSAGE in self.requested_data:
4184
      disk_usage = dict((inst.name,
4185
                         _ComputeDiskSize(inst.disk_template,
4186
                                          [{constants.IDISK_SIZE: disk.size}
4187
                                           for disk in inst.disks]))
4188
                        for inst in instance_list)
4189
    else:
4190
      disk_usage = None
4191

    
4192
    if query.IQ_CONSOLE in self.requested_data:
4193
      consinfo = {}
4194
      for inst in instance_list:
4195
        if inst.name in live_data:
4196
          # Instance is running
4197
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4198
        else:
4199
          consinfo[inst.name] = None
4200
      assert set(consinfo.keys()) == set(instance_names)
4201
    else:
4202
      consinfo = None
4203

    
4204
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4205
                                   disk_usage, offline_nodes, bad_nodes,
4206
                                   live_data, wrongnode_inst, consinfo)
4207

    
4208

    
4209
class LUQuery(NoHooksLU):
4210
  """Query for resources/items of a certain kind.
4211

4212
  """
4213
  # pylint: disable-msg=W0142
4214
  REQ_BGL = False
4215

    
4216
  def CheckArguments(self):
4217
    qcls = _GetQueryImplementation(self.op.what)
4218

    
4219
    self.impl = qcls(self.op.filter, self.op.fields, False)
4220

    
4221
  def ExpandNames(self):
4222
    self.impl.ExpandNames(self)
4223

    
4224
  def DeclareLocks(self, level):
4225
    self.impl.DeclareLocks(self, level)
4226

    
4227
  def Exec(self, feedback_fn):
4228
    return self.impl.NewStyleQuery(self)
4229

    
4230

    
4231
class LUQueryFields(NoHooksLU):
4232
  """Query for resources/items of a certain kind.
4233

4234
  """
4235
  # pylint: disable-msg=W0142
4236
  REQ_BGL = False
4237

    
4238
  def CheckArguments(self):
4239
    self.qcls = _GetQueryImplementation(self.op.what)
4240

    
4241
  def ExpandNames(self):
4242
    self.needed_locks = {}
4243

    
4244
  def Exec(self, feedback_fn):
4245
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4246

    
4247

    
4248
class LUNodeModifyStorage(NoHooksLU):
4249
  """Logical unit for modifying a storage volume on a node.
4250

4251
  """
4252
  REQ_BGL = False
4253

    
4254
  def CheckArguments(self):
4255
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4256

    
4257
    storage_type = self.op.storage_type
4258

    
4259
    try:
4260
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4261
    except KeyError:
4262
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4263
                                 " modified" % storage_type,
4264
                                 errors.ECODE_INVAL)
4265

    
4266
    diff = set(self.op.changes.keys()) - modifiable
4267
    if diff:
4268
      raise errors.OpPrereqError("The following fields can not be modified for"
4269
                                 " storage units of type '%s': %r" %
4270
                                 (storage_type, list(diff)),
4271
                                 errors.ECODE_INVAL)
4272

    
4273
  def ExpandNames(self):
4274
    self.needed_locks = {
4275
      locking.LEVEL_NODE: self.op.node_name,
4276
      }
4277

    
4278
  def Exec(self, feedback_fn):
4279
    """Computes the list of nodes and their attributes.
4280

4281
    """
4282
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4283
    result = self.rpc.call_storage_modify(self.op.node_name,
4284
                                          self.op.storage_type, st_args,
4285
                                          self.op.name, self.op.changes)
4286
    result.Raise("Failed to modify storage unit '%s' on %s" %
4287
                 (self.op.name, self.op.node_name))
4288

    
4289

    
4290
class LUNodeAdd(LogicalUnit):
4291
  """Logical unit for adding node to the cluster.
4292

4293
  """
4294
  HPATH = "node-add"
4295
  HTYPE = constants.HTYPE_NODE
4296
  _NFLAGS = ["master_capable", "vm_capable"]
4297

    
4298
  def CheckArguments(self):
4299
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4300
    # validate/normalize the node name
4301
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4302
                                         family=self.primary_ip_family)
4303
    self.op.node_name = self.hostname.name
4304

    
4305
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4306
      raise errors.OpPrereqError("Cannot readd the master node",
4307
                                 errors.ECODE_STATE)
4308

    
4309
    if self.op.readd and self.op.group:
4310
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4311
                                 " being readded", errors.ECODE_INVAL)
4312

    
4313
  def BuildHooksEnv(self):
4314
    """Build hooks env.
4315

4316
    This will run on all nodes before, and on all nodes + the new node after.
4317

4318
    """
4319
    return {
4320
      "OP_TARGET": self.op.node_name,
4321
      "NODE_NAME": self.op.node_name,
4322
      "NODE_PIP": self.op.primary_ip,
4323
      "NODE_SIP": self.op.secondary_ip,
4324
      "MASTER_CAPABLE": str(self.op.master_capable),
4325
      "VM_CAPABLE": str(self.op.vm_capable),
4326
      }
4327

    
4328
  def BuildHooksNodes(self):
4329
    """Build hooks nodes.
4330

4331
    """
4332
    # Exclude added node
4333
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4334
    post_nodes = pre_nodes + [self.op.node_name, ]
4335

    
4336
    return (pre_nodes, post_nodes)
4337

    
4338
  def CheckPrereq(self):
4339
    """Check prerequisites.
4340

4341
    This checks:
4342
     - the new node is not already in the config
4343
     - it is resolvable
4344
     - its parameters (single/dual homed) matches the cluster
4345

4346
    Any errors are signaled by raising errors.OpPrereqError.
4347

4348
    """
4349
    cfg = self.cfg
4350
    hostname = self.hostname
4351
    node = hostname.name
4352
    primary_ip = self.op.primary_ip = hostname.ip
4353
    if self.op.secondary_ip is None:
4354
      if self.primary_ip_family == netutils.IP6Address.family:
4355
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4356
                                   " IPv4 address must be given as secondary",
4357
                                   errors.ECODE_INVAL)
4358
      self.op.secondary_ip = primary_ip
4359

    
4360
    secondary_ip = self.op.secondary_ip
4361
    if not netutils.IP4Address.IsValid(secondary_ip):
4362
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4363
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4364

    
4365
    node_list = cfg.GetNodeList()
4366
    if not self.op.readd and node in node_list:
4367
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4368
                                 node, errors.ECODE_EXISTS)
4369
    elif self.op.readd and node not in node_list:
4370
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4371
                                 errors.ECODE_NOENT)
4372

    
4373
    self.changed_primary_ip = False
4374

    
4375
    for existing_node_name in node_list:
4376
      existing_node = cfg.GetNodeInfo(existing_node_name)
4377

    
4378
      if self.op.readd and node == existing_node_name:
4379
        if existing_node.secondary_ip != secondary_ip:
4380
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4381
                                     " address configuration as before",
4382
                                     errors.ECODE_INVAL)
4383
        if existing_node.primary_ip != primary_ip:
4384
          self.changed_primary_ip = True
4385

    
4386
        continue
4387

    
4388
      if (existing_node.primary_ip == primary_ip or
4389
          existing_node.secondary_ip == primary_ip or
4390
          existing_node.primary_ip == secondary_ip or
4391
          existing_node.secondary_ip == secondary_ip):
4392
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4393
                                   " existing node %s" % existing_node.name,
4394
                                   errors.ECODE_NOTUNIQUE)
4395

    
4396
    # After this 'if' block, None is no longer a valid value for the
4397
    # _capable op attributes
4398
    if self.op.readd:
4399
      old_node = self.cfg.GetNodeInfo(node)
4400
      assert old_node is not None, "Can't retrieve locked node %s" % node
4401
      for attr in self._NFLAGS:
4402
        if getattr(self.op, attr) is None:
4403
          setattr(self.op, attr, getattr(old_node, attr))
4404
    else:
4405
      for attr in self._NFLAGS:
4406
        if getattr(self.op, attr) is None:
4407
          setattr(self.op, attr, True)
4408

    
4409
    if self.op.readd and not self.op.vm_capable:
4410
      pri, sec = cfg.GetNodeInstances(node)
4411
      if pri or sec:
4412
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4413
                                   " flag set to false, but it already holds"
4414
                                   " instances" % node,
4415
                                   errors.ECODE_STATE)
4416

    
4417
    # check that the type of the node (single versus dual homed) is the
4418
    # same as for the master
4419
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4420
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4421
    newbie_singlehomed = secondary_ip == primary_ip
4422
    if master_singlehomed != newbie_singlehomed:
4423
      if master_singlehomed:
4424
        raise errors.OpPrereqError("The master has no secondary ip but the"
4425
                                   " new node has one",
4426
                                   errors.ECODE_INVAL)
4427
      else:
4428
        raise errors.OpPrereqError("The master has a secondary ip but the"
4429
                                   " new node doesn't have one",
4430
                                   errors.ECODE_INVAL)
4431

    
4432
    # checks reachability
4433
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4434
      raise errors.OpPrereqError("Node not reachable by ping",
4435
                                 errors.ECODE_ENVIRON)
4436

    
4437
    if not newbie_singlehomed:
4438
      # check reachability from my secondary ip to newbie's secondary ip
4439
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4440
                           source=myself.secondary_ip):
4441
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4442
                                   " based ping to node daemon port",
4443
                                   errors.ECODE_ENVIRON)
4444

    
4445
    if self.op.readd:
4446
      exceptions = [node]
4447
    else:
4448
      exceptions = []
4449

    
4450
    if self.op.master_capable:
4451
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4452
    else:
4453
      self.master_candidate = False
4454

    
4455
    if self.op.readd:
4456
      self.new_node = old_node
4457
    else:
4458
      node_group = cfg.LookupNodeGroup(self.op.group)
4459
      self.new_node = objects.Node(name=node,
4460
                                   primary_ip=primary_ip,
4461
                                   secondary_ip=secondary_ip,
4462
                                   master_candidate=self.master_candidate,
4463
                                   offline=False, drained=False,
4464
                                   group=node_group)
4465

    
4466
    if self.op.ndparams:
4467
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4468

    
4469
  def Exec(self, feedback_fn):
4470
    """Adds the new node to the cluster.
4471

4472
    """
4473
    new_node = self.new_node
4474
    node = new_node.name
4475

    
4476
    # We adding a new node so we assume it's powered
4477
    new_node.powered = True
4478

    
4479
    # for re-adds, reset the offline/drained/master-candidate flags;
4480
    # we need to reset here, otherwise offline would prevent RPC calls
4481
    # later in the procedure; this also means that if the re-add
4482
    # fails, we are left with a non-offlined, broken node
4483
    if self.op.readd:
4484
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4485
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4486
      # if we demote the node, we do cleanup later in the procedure
4487
      new_node.master_candidate = self.master_candidate
4488
      if self.changed_primary_ip:
4489
        new_node.primary_ip = self.op.primary_ip
4490

    
4491
    # copy the master/vm_capable flags
4492
    for attr in self._NFLAGS:
4493
      setattr(new_node, attr, getattr(self.op, attr))
4494

    
4495
    # notify the user about any possible mc promotion
4496
    if new_node.master_candidate:
4497
      self.LogInfo("Node will be a master candidate")
4498

    
4499
    if self.op.ndparams:
4500
      new_node.ndparams = self.op.ndparams
4501
    else:
4502
      new_node.ndparams = {}
4503

    
4504
    # check connectivity
4505
    result = self.rpc.call_version([node])[node]
4506
    result.Raise("Can't get version information from node %s" % node)
4507
    if constants.PROTOCOL_VERSION == result.payload:
4508
      logging.info("Communication to node %s fine, sw version %s match",
4509
                   node, result.payload)
4510
    else:
4511
      raise errors.OpExecError("Version mismatch master version %s,"
4512
                               " node version %s" %
4513
                               (constants.PROTOCOL_VERSION, result.payload))
4514

    
4515
    # Add node to our /etc/hosts, and add key to known_hosts
4516
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4517
      master_node = self.cfg.GetMasterNode()
4518
      result = self.rpc.call_etc_hosts_modify(master_node,
4519
                                              constants.ETC_HOSTS_ADD,
4520
                                              self.hostname.name,
4521
                                              self.hostname.ip)
4522
      result.Raise("Can't update hosts file with new host data")
4523

    
4524
    if new_node.secondary_ip != new_node.primary_ip:
4525
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4526
                               False)
4527

    
4528
    node_verify_list = [self.cfg.GetMasterNode()]
4529
    node_verify_param = {
4530
      constants.NV_NODELIST: [node],
4531
      # TODO: do a node-net-test as well?
4532
    }
4533

    
4534
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4535
                                       self.cfg.GetClusterName())
4536
    for verifier in node_verify_list:
4537
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4538
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4539
      if nl_payload:
4540
        for failed in nl_payload:
4541
          feedback_fn("ssh/hostname verification failed"
4542
                      " (checking from %s): %s" %
4543
                      (verifier, nl_payload[failed]))
4544
        raise errors.OpExecError("ssh/hostname verification failed")
4545

    
4546
    if self.op.readd:
4547
      _RedistributeAncillaryFiles(self)
4548
      self.context.ReaddNode(new_node)
4549
      # make sure we redistribute the config
4550
      self.cfg.Update(new_node, feedback_fn)
4551
      # and make sure the new node will not have old files around
4552
      if not new_node.master_candidate:
4553
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4554
        msg = result.fail_msg
4555
        if msg:
4556
          self.LogWarning("Node failed to demote itself from master"
4557
                          " candidate status: %s" % msg)
4558
    else:
4559
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4560
                                  additional_vm=self.op.vm_capable)
4561
      self.context.AddNode(new_node, self.proc.GetECId())
4562

    
4563

    
4564
class LUNodeSetParams(LogicalUnit):
4565
  """Modifies the parameters of a node.
4566

4567
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4568
      to the node role (as _ROLE_*)
4569
  @cvar _R2F: a dictionary from node role to tuples of flags
4570
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4571

4572
  """
4573
  HPATH = "node-modify"
4574
  HTYPE = constants.HTYPE_NODE
4575
  REQ_BGL = False
4576
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4577
  _F2R = {
4578
    (True, False, False): _ROLE_CANDIDATE,
4579
    (False, True, False): _ROLE_DRAINED,
4580
    (False, False, True): _ROLE_OFFLINE,
4581
    (False, False, False): _ROLE_REGULAR,
4582
    }
4583
  _R2F = dict((v, k) for k, v in _F2R.items())
4584
  _FLAGS = ["master_candidate", "drained", "offline"]
4585

    
4586
  def CheckArguments(self):
4587
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4588
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4589
                self.op.master_capable, self.op.vm_capable,
4590
                self.op.secondary_ip, self.op.ndparams]
4591
    if all_mods.count(None) == len(all_mods):
4592
      raise errors.OpPrereqError("Please pass at least one modification",
4593
                                 errors.ECODE_INVAL)
4594
    if all_mods.count(True) > 1:
4595
      raise errors.OpPrereqError("Can't set the node into more than one"
4596
                                 " state at the same time",
4597
                                 errors.ECODE_INVAL)
4598

    
4599
    # Boolean value that tells us whether we might be demoting from MC
4600
    self.might_demote = (self.op.master_candidate == False or
4601
                         self.op.offline == True or
4602
                         self.op.drained == True or
4603
                         self.op.master_capable == False)
4604

    
4605
    if self.op.secondary_ip:
4606
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4607
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4608
                                   " address" % self.op.secondary_ip,
4609
                                   errors.ECODE_INVAL)
4610

    
4611
    self.lock_all = self.op.auto_promote and self.might_demote
4612
    self.lock_instances = self.op.secondary_ip is not None
4613

    
4614
  def ExpandNames(self):
4615
    if self.lock_all:
4616
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4617
    else:
4618
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4619

    
4620
    if self.lock_instances:
4621
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4622

    
4623
  def DeclareLocks(self, level):
4624
    # If we have locked all instances, before waiting to lock nodes, release
4625
    # all the ones living on nodes unrelated to the current operation.
4626
    if level == locking.LEVEL_NODE and self.lock_instances:
4627
      self.affected_instances = []
4628
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4629
        instances_keep = []
4630

    
4631
        # Build list of instances to release
4632
        for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
4633
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4634
          if (instance.disk_template in constants.DTS_INT_MIRROR and
4635
              self.op.node_name in instance.all_nodes):
4636
            instances_keep.append(instance_name)
4637
            self.affected_instances.append(instance)
4638

    
4639
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
4640

    
4641
        assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
4642
                set(instances_keep))
4643

    
4644
  def BuildHooksEnv(self):
4645
    """Build hooks env.
4646

4647
    This runs on the master node.
4648

4649
    """
4650
    return {
4651
      "OP_TARGET": self.op.node_name,
4652
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4653
      "OFFLINE": str(self.op.offline),
4654
      "DRAINED": str(self.op.drained),
4655
      "MASTER_CAPABLE": str(self.op.master_capable),
4656
      "VM_CAPABLE": str(self.op.vm_capable),
4657
      }
4658

    
4659
  def BuildHooksNodes(self):
4660
    """Build hooks nodes.
4661

4662
    """
4663
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
4664
    return (nl, nl)
4665

    
4666
  def CheckPrereq(self):
4667
    """Check prerequisites.
4668

4669
    This only checks the instance list against the existing names.
4670

4671
    """
4672
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4673

    
4674
    if (self.op.master_candidate is not None or
4675
        self.op.drained is not None or
4676
        self.op.offline is not None):
4677
      # we can't change the master's node flags
4678
      if self.op.node_name == self.cfg.GetMasterNode():
4679
        raise errors.OpPrereqError("The master role can be changed"
4680
                                   " only via master-failover",
4681
                                   errors.ECODE_INVAL)
4682

    
4683
    if self.op.master_candidate and not node.master_capable:
4684
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4685
                                 " it a master candidate" % node.name,
4686
                                 errors.ECODE_STATE)
4687

    
4688
    if self.op.vm_capable == False:
4689
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4690
      if ipri or isec:
4691
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4692
                                   " the vm_capable flag" % node.name,
4693
                                   errors.ECODE_STATE)
4694

    
4695
    if node.master_candidate and self.might_demote and not self.lock_all:
4696
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
4697
      # check if after removing the current node, we're missing master
4698
      # candidates
4699
      (mc_remaining, mc_should, _) = \
4700
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4701
      if mc_remaining < mc_should:
4702
        raise errors.OpPrereqError("Not enough master candidates, please"
4703
                                   " pass auto promote option to allow"
4704
                                   " promotion", errors.ECODE_STATE)
4705

    
4706
    self.old_flags = old_flags = (node.master_candidate,
4707
                                  node.drained, node.offline)
4708
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4709
    self.old_role = old_role = self._F2R[old_flags]
4710

    
4711
    # Check for ineffective changes
4712
    for attr in self._FLAGS:
4713
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4714
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4715
        setattr(self.op, attr, None)
4716

    
4717
    # Past this point, any flag change to False means a transition
4718
    # away from the respective state, as only real changes are kept
4719

    
4720
    # TODO: We might query the real power state if it supports OOB
4721
    if _SupportsOob(self.cfg, node):
4722
      if self.op.offline is False and not (node.powered or
4723
                                           self.op.powered == True):
4724
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
4725
                                    " offline status can be reset") %
4726
                                   self.op.node_name)
4727
    elif self.op.powered is not None:
4728
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
4729
                                  " as it does not support out-of-band"
4730
                                  " handling") % self.op.node_name)
4731

    
4732
    # If we're being deofflined/drained, we'll MC ourself if needed
4733
    if (self.op.drained == False or self.op.offline == False or
4734
        (self.op.master_capable and not node.master_capable)):
4735
      if _DecideSelfPromotion(self):
4736
        self.op.master_candidate = True
4737
        self.LogInfo("Auto-promoting node to master candidate")
4738

    
4739
    # If we're no longer master capable, we'll demote ourselves from MC
4740
    if self.op.master_capable == False and node.master_candidate:
4741
      self.LogInfo("Demoting from master candidate")
4742
      self.op.master_candidate = False
4743

    
4744
    # Compute new role
4745
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4746
    if self.op.master_candidate:
4747
      new_role = self._ROLE_CANDIDATE
4748
    elif self.op.drained:
4749
      new_role = self._ROLE_DRAINED
4750
    elif self.op.offline:
4751
      new_role = self._ROLE_OFFLINE
4752
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4753
      # False is still in new flags, which means we're un-setting (the
4754
      # only) True flag
4755
      new_role = self._ROLE_REGULAR
4756
    else: # no new flags, nothing, keep old role
4757
      new_role = old_role
4758

    
4759
    self.new_role = new_role
4760

    
4761
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4762
      # Trying to transition out of offline status
4763
      result = self.rpc.call_version([node.name])[node.name]
4764
      if result.fail_msg:
4765
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4766
                                   " to report its version: %s" %
4767
                                   (node.name, result.fail_msg),
4768
                                   errors.ECODE_STATE)
4769
      else:
4770
        self.LogWarning("Transitioning node from offline to online state"
4771
                        " without using re-add. Please make sure the node"
4772
                        " is healthy!")
4773

    
4774
    if self.op.secondary_ip:
4775
      # Ok even without locking, because this can't be changed by any LU
4776
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4777
      master_singlehomed = master.secondary_ip == master.primary_ip
4778
      if master_singlehomed and self.op.secondary_ip:
4779
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4780
                                   " homed cluster", errors.ECODE_INVAL)
4781

    
4782
      if node.offline:
4783
        if self.affected_instances:
4784
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4785
                                     " node has instances (%s) configured"
4786
                                     " to use it" % self.affected_instances)
4787
      else:
4788
        # On online nodes, check that no instances are running, and that
4789
        # the node has the new ip and we can reach it.
4790
        for instance in self.affected_instances:
4791
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4792

    
4793
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4794
        if master.name != node.name:
4795
          # check reachability from master secondary ip to new secondary ip
4796
          if not netutils.TcpPing(self.op.secondary_ip,
4797
                                  constants.DEFAULT_NODED_PORT,
4798
                                  source=master.secondary_ip):
4799
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4800
                                       " based ping to node daemon port",
4801
                                       errors.ECODE_ENVIRON)
4802

    
4803
    if self.op.ndparams:
4804
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4805
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4806
      self.new_ndparams = new_ndparams
4807

    
4808
  def Exec(self, feedback_fn):
4809
    """Modifies a node.
4810

4811
    """
4812
    node = self.node
4813
    old_role = self.old_role
4814
    new_role = self.new_role
4815

    
4816
    result = []
4817

    
4818
    if self.op.ndparams:
4819
      node.ndparams = self.new_ndparams
4820

    
4821
    if self.op.powered is not None:
4822
      node.powered = self.op.powered
4823

    
4824
    for attr in ["master_capable", "vm_capable"]:
4825
      val = getattr(self.op, attr)
4826
      if val is not None:
4827
        setattr(node, attr, val)
4828
        result.append((attr, str(val)))
4829

    
4830
    if new_role != old_role:
4831
      # Tell the node to demote itself, if no longer MC and not offline
4832
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4833
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4834
        if msg:
4835
          self.LogWarning("Node failed to demote itself: %s", msg)
4836

    
4837
      new_flags = self._R2F[new_role]
4838
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4839
        if of != nf:
4840
          result.append((desc, str(nf)))
4841
      (node.master_candidate, node.drained, node.offline) = new_flags
4842

    
4843
      # we locked all nodes, we adjust the CP before updating this node
4844
      if self.lock_all:
4845
        _AdjustCandidatePool(self, [node.name])
4846

    
4847
    if self.op.secondary_ip:
4848
      node.secondary_ip = self.op.secondary_ip
4849
      result.append(("secondary_ip", self.op.secondary_ip))
4850

    
4851
    # this will trigger configuration file update, if needed
4852
    self.cfg.Update(node, feedback_fn)
4853

    
4854
    # this will trigger job queue propagation or cleanup if the mc
4855
    # flag changed
4856
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4857
      self.context.ReaddNode(node)
4858

    
4859
    return result
4860

    
4861

    
4862
class LUNodePowercycle(NoHooksLU):
4863
  """Powercycles a node.
4864

4865
  """
4866
  REQ_BGL = False
4867

    
4868
  def CheckArguments(self):
4869
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4870
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4871
      raise errors.OpPrereqError("The node is the master and the force"
4872
                                 " parameter was not set",
4873
                                 errors.ECODE_INVAL)
4874

    
4875
  def ExpandNames(self):
4876
    """Locking for PowercycleNode.
4877

4878
    This is a last-resort option and shouldn't block on other
4879
    jobs. Therefore, we grab no locks.
4880

4881
    """
4882
    self.needed_locks = {}
4883

    
4884
  def Exec(self, feedback_fn):
4885
    """Reboots a node.
4886

4887
    """
4888
    result = self.rpc.call_node_powercycle(self.op.node_name,
4889
                                           self.cfg.GetHypervisorType())
4890
    result.Raise("Failed to schedule the reboot")
4891
    return result.payload
4892

    
4893

    
4894
class LUClusterQuery(NoHooksLU):
4895
  """Query cluster configuration.
4896

4897
  """
4898
  REQ_BGL = False
4899

    
4900
  def ExpandNames(self):
4901
    self.needed_locks = {}
4902

    
4903
  def Exec(self, feedback_fn):
4904
    """Return cluster config.
4905

4906
    """
4907
    cluster = self.cfg.GetClusterInfo()
4908
    os_hvp = {}
4909

    
4910
    # Filter just for enabled hypervisors
4911
    for os_name, hv_dict in cluster.os_hvp.items():
4912
      os_hvp[os_name] = {}
4913
      for hv_name, hv_params in hv_dict.items():
4914
        if hv_name in cluster.enabled_hypervisors:
4915
          os_hvp[os_name][hv_name] = hv_params
4916

    
4917
    # Convert ip_family to ip_version
4918
    primary_ip_version = constants.IP4_VERSION
4919
    if cluster.primary_ip_family == netutils.IP6Address.family:
4920
      primary_ip_version = constants.IP6_VERSION
4921

    
4922
    result = {
4923
      "software_version": constants.RELEASE_VERSION,
4924
      "protocol_version": constants.PROTOCOL_VERSION,
4925
      "config_version": constants.CONFIG_VERSION,
4926
      "os_api_version": max(constants.OS_API_VERSIONS),
4927
      "export_version": constants.EXPORT_VERSION,
4928
      "architecture": (platform.architecture()[0], platform.machine()),
4929
      "name": cluster.cluster_name,
4930
      "master": cluster.master_node,
4931
      "default_hypervisor": cluster.enabled_hypervisors[0],
4932
      "enabled_hypervisors": cluster.enabled_hypervisors,
4933
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4934
                        for hypervisor_name in cluster.enabled_hypervisors]),
4935
      "os_hvp": os_hvp,
4936
      "beparams": cluster.beparams,
4937
      "osparams": cluster.osparams,
4938
      "nicparams": cluster.nicparams,
4939
      "ndparams": cluster.ndparams,
4940
      "candidate_pool_size": cluster.candidate_pool_size,
4941
      "master_netdev": cluster.master_netdev,
4942
      "volume_group_name": cluster.volume_group_name,
4943
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4944
      "file_storage_dir": cluster.file_storage_dir,
4945
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
4946
      "maintain_node_health": cluster.maintain_node_health,
4947
      "ctime": cluster.ctime,
4948
      "mtime": cluster.mtime,
4949
      "uuid": cluster.uuid,
4950
      "tags": list(cluster.GetTags()),
4951
      "uid_pool": cluster.uid_pool,
4952
      "default_iallocator": cluster.default_iallocator,
4953
      "reserved_lvs": cluster.reserved_lvs,
4954
      "primary_ip_version": primary_ip_version,
4955
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4956
      "hidden_os": cluster.hidden_os,
4957
      "blacklisted_os": cluster.blacklisted_os,
4958
      }
4959

    
4960
    return result
4961

    
4962

    
4963
class LUClusterConfigQuery(NoHooksLU):
4964
  """Return configuration values.
4965

4966
  """
4967
  REQ_BGL = False
4968
  _FIELDS_DYNAMIC = utils.FieldSet()
4969
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4970
                                  "watcher_pause", "volume_group_name")
4971

    
4972
  def CheckArguments(self):
4973
    _CheckOutputFields(static=self._FIELDS_STATIC,
4974
                       dynamic=self._FIELDS_DYNAMIC,
4975
                       selected=self.op.output_fields)
4976

    
4977
  def ExpandNames(self):
4978
    self.needed_locks = {}
4979

    
4980
  def Exec(self, feedback_fn):
4981
    """Dump a representation of the cluster config to the standard output.
4982

4983
    """
4984
    values = []
4985
    for field in self.op.output_fields:
4986
      if field == "cluster_name":
4987
        entry = self.cfg.GetClusterName()
4988
      elif field == "master_node":
4989
        entry = self.cfg.GetMasterNode()
4990
      elif field == "drain_flag":
4991
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4992
      elif field == "watcher_pause":
4993
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4994
      elif field == "volume_group_name":
4995
        entry = self.cfg.GetVGName()
4996
      else:
4997
        raise errors.ParameterError(field)
4998
      values.append(entry)
4999
    return values
5000

    
5001

    
5002
class LUInstanceActivateDisks(NoHooksLU):
5003
  """Bring up an instance's disks.
5004

5005
  """
5006
  REQ_BGL = False
5007

    
5008
  def ExpandNames(self):
5009
    self._ExpandAndLockInstance()
5010
    self.needed_locks[locking.LEVEL_NODE] = []
5011
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5012

    
5013
  def DeclareLocks(self, level):
5014
    if level == locking.LEVEL_NODE:
5015
      self._LockInstancesNodes()
5016

    
5017
  def CheckPrereq(self):
5018
    """Check prerequisites.
5019

5020
    This checks that the instance is in the cluster.
5021

5022
    """
5023
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5024
    assert self.instance is not None, \
5025
      "Cannot retrieve locked instance %s" % self.op.instance_name
5026
    _CheckNodeOnline(self, self.instance.primary_node)
5027

    
5028
  def Exec(self, feedback_fn):
5029
    """Activate the disks.
5030

5031
    """
5032
    disks_ok, disks_info = \
5033
              _AssembleInstanceDisks(self, self.instance,
5034
                                     ignore_size=self.op.ignore_size)
5035
    if not disks_ok:
5036
      raise errors.OpExecError("Cannot activate block devices")
5037

    
5038
    return disks_info
5039

    
5040

    
5041
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5042
                           ignore_size=False):
5043
  """Prepare the block devices for an instance.
5044

5045
  This sets up the block devices on all nodes.
5046

5047
  @type lu: L{LogicalUnit}
5048
  @param lu: the logical unit on whose behalf we execute
5049
  @type instance: L{objects.Instance}
5050
  @param instance: the instance for whose disks we assemble
5051
  @type disks: list of L{objects.Disk} or None
5052
  @param disks: which disks to assemble (or all, if None)
5053
  @type ignore_secondaries: boolean
5054
  @param ignore_secondaries: if true, errors on secondary nodes
5055
      won't result in an error return from the function
5056
  @type ignore_size: boolean
5057
  @param ignore_size: if true, the current known size of the disk
5058
      will not be used during the disk activation, useful for cases
5059
      when the size is wrong
5060
  @return: False if the operation failed, otherwise a list of
5061
      (host, instance_visible_name, node_visible_name)
5062
      with the mapping from node devices to instance devices
5063

5064
  """
5065
  device_info = []
5066
  disks_ok = True
5067
  iname = instance.name
5068
  disks = _ExpandCheckDisks(instance, disks)
5069

    
5070
  # With the two passes mechanism we try to reduce the window of
5071
  # opportunity for the race condition of switching DRBD to primary
5072
  # before handshaking occured, but we do not eliminate it
5073

    
5074
  # The proper fix would be to wait (with some limits) until the
5075
  # connection has been made and drbd transitions from WFConnection
5076
  # into any other network-connected state (Connected, SyncTarget,
5077
  # SyncSource, etc.)
5078

    
5079
  # 1st pass, assemble on all nodes in secondary mode
5080
  for idx, inst_disk in enumerate(disks):
5081
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5082
      if ignore_size:
5083
        node_disk = node_disk.Copy()
5084
        node_disk.UnsetSize()
5085
      lu.cfg.SetDiskID(node_disk, node)
5086
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5087
      msg = result.fail_msg
5088
      if msg:
5089
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5090
                           " (is_primary=False, pass=1): %s",
5091
                           inst_disk.iv_name, node, msg)
5092
        if not ignore_secondaries:
5093
          disks_ok = False
5094

    
5095
  # FIXME: race condition on drbd migration to primary
5096

    
5097
  # 2nd pass, do only the primary node
5098
  for idx, inst_disk in enumerate(disks):
5099
    dev_path = None
5100

    
5101
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5102
      if node != instance.primary_node:
5103
        continue
5104
      if ignore_size:
5105
        node_disk = node_disk.Copy()
5106
        node_disk.UnsetSize()
5107
      lu.cfg.SetDiskID(node_disk, node)
5108
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5109
      msg = result.fail_msg
5110
      if msg:
5111
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5112
                           " (is_primary=True, pass=2): %s",
5113
                           inst_disk.iv_name, node, msg)
5114
        disks_ok = False
5115
      else:
5116
        dev_path = result.payload
5117

    
5118
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5119

    
5120
  # leave the disks configured for the primary node
5121
  # this is a workaround that would be fixed better by
5122
  # improving the logical/physical id handling
5123
  for disk in disks:
5124
    lu.cfg.SetDiskID(disk, instance.primary_node)
5125

    
5126
  return disks_ok, device_info
5127

    
5128

    
5129
def _StartInstanceDisks(lu, instance, force):
5130
  """Start the disks of an instance.
5131

5132
  """
5133
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5134
                                           ignore_secondaries=force)
5135
  if not disks_ok:
5136
    _ShutdownInstanceDisks(lu, instance)
5137
    if force is not None and not force:
5138
      lu.proc.LogWarning("", hint="If the message above refers to a"
5139
                         " secondary node,"
5140
                         " you can retry the operation using '--force'.")
5141
    raise errors.OpExecError("Disk consistency error")
5142

    
5143

    
5144
class LUInstanceDeactivateDisks(NoHooksLU):
5145
  """Shutdown an instance's disks.
5146

5147
  """
5148
  REQ_BGL = False
5149

    
5150
  def ExpandNames(self):
5151
    self._ExpandAndLockInstance()
5152
    self.needed_locks[locking.LEVEL_NODE] = []
5153
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5154

    
5155
  def DeclareLocks(self, level):
5156
    if level == locking.LEVEL_NODE:
5157
      self._LockInstancesNodes()
5158

    
5159
  def CheckPrereq(self):
5160
    """Check prerequisites.
5161

5162
    This checks that the instance is in the cluster.
5163

5164
    """
5165
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5166
    assert self.instance is not None, \
5167
      "Cannot retrieve locked instance %s" % self.op.instance_name
5168

    
5169
  def Exec(self, feedback_fn):
5170
    """Deactivate the disks
5171

5172
    """
5173
    instance = self.instance
5174
    if self.op.force:
5175
      _ShutdownInstanceDisks(self, instance)
5176
    else:
5177
      _SafeShutdownInstanceDisks(self, instance)
5178

    
5179

    
5180
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5181
  """Shutdown block devices of an instance.
5182

5183
  This function checks if an instance is running, before calling
5184
  _ShutdownInstanceDisks.
5185

5186
  """
5187
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5188
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5189

    
5190

    
5191
def _ExpandCheckDisks(instance, disks):
5192
  """Return the instance disks selected by the disks list
5193

5194
  @type disks: list of L{objects.Disk} or None
5195
  @param disks: selected disks
5196
  @rtype: list of L{objects.Disk}
5197
  @return: selected instance disks to act on
5198

5199
  """
5200
  if disks is None:
5201
    return instance.disks
5202
  else:
5203
    if not set(disks).issubset(instance.disks):
5204
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5205
                                   " target instance")
5206
    return disks
5207

    
5208

    
5209
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5210
  """Shutdown block devices of an instance.
5211

5212
  This does the shutdown on all nodes of the instance.
5213

5214
  If the ignore_primary is false, errors on the primary node are
5215
  ignored.
5216

5217
  """
5218
  all_result = True
5219
  disks = _ExpandCheckDisks(instance, disks)
5220

    
5221
  for disk in disks:
5222
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5223
      lu.cfg.SetDiskID(top_disk, node)
5224
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5225
      msg = result.fail_msg
5226
      if msg:
5227
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5228
                      disk.iv_name, node, msg)
5229
        if ((node == instance.primary_node and not ignore_primary) or
5230
            (node != instance.primary_node and not result.offline)):
5231
          all_result = False
5232
  return all_result
5233

    
5234

    
5235
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5236
  """Checks if a node has enough free memory.
5237

5238
  This function check if a given node has the needed amount of free
5239
  memory. In case the node has less memory or we cannot get the
5240
  information from the node, this function raise an OpPrereqError
5241
  exception.
5242

5243
  @type lu: C{LogicalUnit}
5244
  @param lu: a logical unit from which we get configuration data
5245
  @type node: C{str}
5246
  @param node: the node to check
5247
  @type reason: C{str}
5248
  @param reason: string to use in the error message
5249
  @type requested: C{int}
5250
  @param requested: the amount of memory in MiB to check for
5251
  @type hypervisor_name: C{str}
5252
  @param hypervisor_name: the hypervisor to ask for memory stats
5253
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5254
      we cannot check the node
5255

5256
  """
5257
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5258
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5259
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5260
  free_mem = nodeinfo[node].payload.get('memory_free', None)
5261
  if not isinstance(free_mem, int):
5262
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5263
                               " was '%s'" % (node, free_mem),
5264
                               errors.ECODE_ENVIRON)
5265
  if requested > free_mem:
5266
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5267
                               " needed %s MiB, available %s MiB" %
5268
                               (node, reason, requested, free_mem),
5269
                               errors.ECODE_NORES)
5270

    
5271

    
5272
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5273
  """Checks if nodes have enough free disk space in the all VGs.
5274

5275
  This function check if all given nodes have the needed amount of
5276
  free disk. In case any node has less disk or we cannot get the
5277
  information from the node, this function raise an OpPrereqError
5278
  exception.
5279

5280
  @type lu: C{LogicalUnit}
5281
  @param lu: a logical unit from which we get configuration data
5282
  @type nodenames: C{list}
5283
  @param nodenames: the list of node names to check
5284
  @type req_sizes: C{dict}
5285
  @param req_sizes: the hash of vg and corresponding amount of disk in
5286
      MiB to check for
5287
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5288
      or we cannot check the node
5289

5290
  """
5291
  for vg, req_size in req_sizes.items():
5292
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5293

    
5294

    
5295
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5296
  """Checks if nodes have enough free disk space in the specified VG.
5297

5298
  This function check if all given nodes have the needed amount of
5299
  free disk. In case any node has less disk or we cannot get the
5300
  information from the node, this function raise an OpPrereqError
5301
  exception.
5302

5303
  @type lu: C{LogicalUnit}
5304
  @param lu: a logical unit from which we get configuration data
5305
  @type nodenames: C{list}
5306
  @param nodenames: the list of node names to check
5307
  @type vg: C{str}
5308
  @param vg: the volume group to check
5309
  @type requested: C{int}
5310
  @param requested: the amount of disk in MiB to check for
5311
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5312
      or we cannot check the node
5313

5314
  """
5315
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5316
  for node in nodenames:
5317
    info = nodeinfo[node]
5318
    info.Raise("Cannot get current information from node %s" % node,
5319
               prereq=True, ecode=errors.ECODE_ENVIRON)
5320
    vg_free = info.payload.get("vg_free", None)
5321
    if not isinstance(vg_free, int):
5322
      raise errors.OpPrereqError("Can't compute free disk space on node"
5323
                                 " %s for vg %s, result was '%s'" %
5324
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5325
    if requested > vg_free:
5326
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5327
                                 " vg %s: required %d MiB, available %d MiB" %
5328
                                 (node, vg, requested, vg_free),
5329
                                 errors.ECODE_NORES)
5330

    
5331

    
5332
class LUInstanceStartup(LogicalUnit):
5333
  """Starts an instance.
5334

5335
  """
5336
  HPATH = "instance-start"
5337
  HTYPE = constants.HTYPE_INSTANCE
5338
  REQ_BGL = False
5339

    
5340
  def CheckArguments(self):
5341
    # extra beparams
5342
    if self.op.beparams:
5343
      # fill the beparams dict
5344
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5345

    
5346
  def ExpandNames(self):
5347
    self._ExpandAndLockInstance()
5348

    
5349
  def BuildHooksEnv(self):
5350
    """Build hooks env.
5351

5352
    This runs on master, primary and secondary nodes of the instance.
5353

5354
    """
5355
    env = {
5356
      "FORCE": self.op.force,
5357
      }
5358

    
5359
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5360

    
5361
    return env
5362

    
5363
  def BuildHooksNodes(self):
5364
    """Build hooks nodes.
5365

5366
    """
5367
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5368
    return (nl, nl)
5369

    
5370
  def CheckPrereq(self):
5371
    """Check prerequisites.
5372

5373
    This checks that the instance is in the cluster.
5374

5375
    """
5376
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5377
    assert self.instance is not None, \
5378
      "Cannot retrieve locked instance %s" % self.op.instance_name
5379

    
5380
    # extra hvparams
5381
    if self.op.hvparams:
5382
      # check hypervisor parameter syntax (locally)
5383
      cluster = self.cfg.GetClusterInfo()
5384
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5385
      filled_hvp = cluster.FillHV(instance)
5386
      filled_hvp.update(self.op.hvparams)
5387
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5388
      hv_type.CheckParameterSyntax(filled_hvp)
5389
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5390

    
5391
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5392

    
5393
    if self.primary_offline and self.op.ignore_offline_nodes:
5394
      self.proc.LogWarning("Ignoring offline primary node")
5395

    
5396
      if self.op.hvparams or self.op.beparams:
5397
        self.proc.LogWarning("Overridden parameters are ignored")
5398
    else:
5399
      _CheckNodeOnline(self, instance.primary_node)
5400

    
5401
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5402

    
5403
      # check bridges existence
5404
      _CheckInstanceBridgesExist(self, instance)
5405

    
5406
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5407
                                                instance.name,
5408
                                                instance.hypervisor)
5409
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5410
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5411
      if not remote_info.payload: # not running already
5412
        _CheckNodeFreeMemory(self, instance.primary_node,
5413
                             "starting instance %s" % instance.name,
5414
                             bep[constants.BE_MEMORY], instance.hypervisor)
5415

    
5416
  def Exec(self, feedback_fn):
5417
    """Start the instance.
5418

5419
    """
5420
    instance = self.instance
5421
    force = self.op.force
5422

    
5423
    self.cfg.MarkInstanceUp(instance.name)
5424

    
5425
    if self.primary_offline:
5426
      assert self.op.ignore_offline_nodes
5427
      self.proc.LogInfo("Primary node offline, marked instance as started")
5428
    else:
5429
      node_current = instance.primary_node
5430

    
5431
      _StartInstanceDisks(self, instance, force)
5432

    
5433
      result = self.rpc.call_instance_start(node_current, instance,
5434
                                            self.op.hvparams, self.op.beparams)
5435
      msg = result.fail_msg
5436
      if msg:
5437
        _ShutdownInstanceDisks(self, instance)
5438
        raise errors.OpExecError("Could not start instance: %s" % msg)
5439

    
5440

    
5441
class LUInstanceReboot(LogicalUnit):
5442
  """Reboot an instance.
5443

5444
  """
5445
  HPATH = "instance-reboot"
5446
  HTYPE = constants.HTYPE_INSTANCE
5447
  REQ_BGL = False
5448

    
5449
  def ExpandNames(self):
5450
    self._ExpandAndLockInstance()
5451

    
5452
  def BuildHooksEnv(self):
5453
    """Build hooks env.
5454

5455
    This runs on master, primary and secondary nodes of the instance.
5456

5457
    """
5458
    env = {
5459
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5460
      "REBOOT_TYPE": self.op.reboot_type,
5461
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5462
      }
5463

    
5464
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5465

    
5466
    return env
5467

    
5468
  def BuildHooksNodes(self):
5469
    """Build hooks nodes.
5470

5471
    """
5472
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5473
    return (nl, nl)
5474

    
5475
  def CheckPrereq(self):
5476
    """Check prerequisites.
5477

5478
    This checks that the instance is in the cluster.
5479

5480
    """
5481
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5482
    assert self.instance is not None, \
5483
      "Cannot retrieve locked instance %s" % self.op.instance_name
5484

    
5485
    _CheckNodeOnline(self, instance.primary_node)
5486

    
5487
    # check bridges existence
5488
    _CheckInstanceBridgesExist(self, instance)
5489

    
5490
  def Exec(self, feedback_fn):
5491
    """Reboot the instance.
5492

5493
    """
5494
    instance = self.instance
5495
    ignore_secondaries = self.op.ignore_secondaries
5496
    reboot_type = self.op.reboot_type
5497

    
5498
    remote_info = self.rpc.call_instance_info(instance.primary_node,
5499
                                              instance.name,
5500
                                              instance.hypervisor)
5501
    remote_info.Raise("Error checking node %s" % instance.primary_node)
5502
    instance_running = bool(remote_info.payload)
5503

    
5504
    node_current = instance.primary_node
5505

    
5506
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5507
                                            constants.INSTANCE_REBOOT_HARD]:
5508
      for disk in instance.disks:
5509
        self.cfg.SetDiskID(disk, node_current)
5510
      result = self.rpc.call_instance_reboot(node_current, instance,
5511
                                             reboot_type,
5512
                                             self.op.shutdown_timeout)
5513
      result.Raise("Could not reboot instance")
5514
    else:
5515
      if instance_running:
5516
        result = self.rpc.call_instance_shutdown(node_current, instance,
5517
                                                 self.op.shutdown_timeout)
5518
        result.Raise("Could not shutdown instance for full reboot")
5519
        _ShutdownInstanceDisks(self, instance)
5520
      else:
5521
        self.LogInfo("Instance %s was already stopped, starting now",
5522
                     instance.name)
5523
      _StartInstanceDisks(self, instance, ignore_secondaries)
5524
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5525
      msg = result.fail_msg
5526
      if msg:
5527
        _ShutdownInstanceDisks(self, instance)
5528
        raise errors.OpExecError("Could not start instance for"
5529
                                 " full reboot: %s" % msg)
5530

    
5531
    self.cfg.MarkInstanceUp(instance.name)
5532

    
5533

    
5534
class LUInstanceShutdown(LogicalUnit):
5535
  """Shutdown an instance.
5536

5537
  """
5538
  HPATH = "instance-stop"
5539
  HTYPE = constants.HTYPE_INSTANCE
5540
  REQ_BGL = False
5541

    
5542
  def ExpandNames(self):
5543
    self._ExpandAndLockInstance()
5544

    
5545
  def BuildHooksEnv(self):
5546
    """Build hooks env.
5547

5548
    This runs on master, primary and secondary nodes of the instance.
5549

5550
    """
5551
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5552
    env["TIMEOUT"] = self.op.timeout
5553
    return env
5554

    
5555
  def BuildHooksNodes(self):
5556
    """Build hooks nodes.
5557

5558
    """
5559
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5560
    return (nl, nl)
5561

    
5562
  def CheckPrereq(self):
5563
    """Check prerequisites.
5564

5565
    This checks that the instance is in the cluster.
5566

5567
    """
5568
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5569
    assert self.instance is not None, \
5570
      "Cannot retrieve locked instance %s" % self.op.instance_name
5571

    
5572
    self.primary_offline = \
5573
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5574

    
5575
    if self.primary_offline and self.op.ignore_offline_nodes:
5576
      self.proc.LogWarning("Ignoring offline primary node")
5577
    else:
5578
      _CheckNodeOnline(self, self.instance.primary_node)
5579

    
5580
  def Exec(self, feedback_fn):
5581
    """Shutdown the instance.
5582

5583
    """
5584
    instance = self.instance
5585
    node_current = instance.primary_node
5586
    timeout = self.op.timeout
5587

    
5588
    self.cfg.MarkInstanceDown(instance.name)
5589

    
5590
    if self.primary_offline:
5591
      assert self.op.ignore_offline_nodes
5592
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5593
    else:
5594
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5595
      msg = result.fail_msg
5596
      if msg:
5597
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5598

    
5599
      _ShutdownInstanceDisks(self, instance)
5600

    
5601

    
5602
class LUInstanceReinstall(LogicalUnit):
5603
  """Reinstall an instance.
5604

5605
  """
5606
  HPATH = "instance-reinstall"
5607
  HTYPE = constants.HTYPE_INSTANCE
5608
  REQ_BGL = False
5609

    
5610
  def ExpandNames(self):
5611
    self._ExpandAndLockInstance()
5612

    
5613
  def BuildHooksEnv(self):
5614
    """Build hooks env.
5615

5616
    This runs on master, primary and secondary nodes of the instance.
5617

5618
    """
5619
    return _BuildInstanceHookEnvByObject(self, self.instance)
5620

    
5621
  def BuildHooksNodes(self):
5622
    """Build hooks nodes.
5623

5624
    """
5625
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5626
    return (nl, nl)
5627

    
5628
  def CheckPrereq(self):
5629
    """Check prerequisites.
5630

5631
    This checks that the instance is in the cluster and is not running.
5632

5633
    """
5634
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5635
    assert instance is not None, \
5636
      "Cannot retrieve locked instance %s" % self.op.instance_name
5637
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5638
                     " offline, cannot reinstall")
5639
    for node in instance.secondary_nodes:
5640
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5641
                       " cannot reinstall")
5642

    
5643
    if instance.disk_template == constants.DT_DISKLESS:
5644
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5645
                                 self.op.instance_name,
5646
                                 errors.ECODE_INVAL)
5647
    _CheckInstanceDown(self, instance, "cannot reinstall")
5648

    
5649
    if self.op.os_type is not None:
5650
      # OS verification
5651
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5652
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5653
      instance_os = self.op.os_type
5654
    else:
5655
      instance_os = instance.os
5656

    
5657
    nodelist = list(instance.all_nodes)
5658

    
5659
    if self.op.osparams:
5660
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5661
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5662
      self.os_inst = i_osdict # the new dict (without defaults)
5663
    else:
5664
      self.os_inst = None
5665

    
5666
    self.instance = instance
5667

    
5668
  def Exec(self, feedback_fn):
5669
    """Reinstall the instance.
5670

5671
    """
5672
    inst = self.instance
5673

    
5674
    if self.op.os_type is not None:
5675
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5676
      inst.os = self.op.os_type
5677
      # Write to configuration
5678
      self.cfg.Update(inst, feedback_fn)
5679

    
5680
    _StartInstanceDisks(self, inst, None)
5681
    try:
5682
      feedback_fn("Running the instance OS create scripts...")
5683
      # FIXME: pass debug option from opcode to backend
5684
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5685
                                             self.op.debug_level,
5686
                                             osparams=self.os_inst)
5687
      result.Raise("Could not install OS for instance %s on node %s" %
5688
                   (inst.name, inst.primary_node))
5689
    finally:
5690
      _ShutdownInstanceDisks(self, inst)
5691

    
5692

    
5693
class LUInstanceRecreateDisks(LogicalUnit):
5694
  """Recreate an instance's missing disks.
5695

5696
  """
5697
  HPATH = "instance-recreate-disks"
5698
  HTYPE = constants.HTYPE_INSTANCE
5699
  REQ_BGL = False
5700

    
5701
  def ExpandNames(self):
5702
    self._ExpandAndLockInstance()
5703

    
5704
  def BuildHooksEnv(self):
5705
    """Build hooks env.
5706

5707
    This runs on master, primary and secondary nodes of the instance.
5708

5709
    """
5710
    return _BuildInstanceHookEnvByObject(self, self.instance)
5711

    
5712
  def BuildHooksNodes(self):
5713
    """Build hooks nodes.
5714

5715
    """
5716
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5717
    return (nl, nl)
5718

    
5719
  def CheckPrereq(self):
5720
    """Check prerequisites.
5721

5722
    This checks that the instance is in the cluster and is not running.
5723

5724
    """
5725
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5726
    assert instance is not None, \
5727
      "Cannot retrieve locked instance %s" % self.op.instance_name
5728
    _CheckNodeOnline(self, instance.primary_node)
5729

    
5730
    if instance.disk_template == constants.DT_DISKLESS:
5731
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5732
                                 self.op.instance_name, errors.ECODE_INVAL)
5733
    _CheckInstanceDown(self, instance, "cannot recreate disks")
5734

    
5735
    if not self.op.disks:
5736
      self.op.disks = range(len(instance.disks))
5737
    else:
5738
      for idx in self.op.disks:
5739
        if idx >= len(instance.disks):
5740
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
5741
                                     errors.ECODE_INVAL)
5742

    
5743
    self.instance = instance
5744

    
5745
  def Exec(self, feedback_fn):
5746
    """Recreate the disks.
5747

5748
    """
5749
    to_skip = []
5750
    for idx, _ in enumerate(self.instance.disks):
5751
      if idx not in self.op.disks: # disk idx has not been passed in
5752
        to_skip.append(idx)
5753
        continue
5754

    
5755
    _CreateDisks(self, self.instance, to_skip=to_skip)
5756

    
5757

    
5758
class LUInstanceRename(LogicalUnit):
5759
  """Rename an instance.
5760

5761
  """
5762
  HPATH = "instance-rename"
5763
  HTYPE = constants.HTYPE_INSTANCE
5764

    
5765
  def CheckArguments(self):
5766
    """Check arguments.
5767

5768
    """
5769
    if self.op.ip_check and not self.op.name_check:
5770
      # TODO: make the ip check more flexible and not depend on the name check
5771
      raise errors.OpPrereqError("IP address check requires a name check",
5772
                                 errors.ECODE_INVAL)
5773

    
5774
  def BuildHooksEnv(self):
5775
    """Build hooks env.
5776

5777
    This runs on master, primary and secondary nodes of the instance.
5778

5779
    """
5780
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5781
    env["INSTANCE_NEW_NAME"] = self.op.new_name
5782
    return env
5783

    
5784
  def BuildHooksNodes(self):
5785
    """Build hooks nodes.
5786

5787
    """
5788
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5789
    return (nl, nl)
5790

    
5791
  def CheckPrereq(self):
5792
    """Check prerequisites.
5793

5794
    This checks that the instance is in the cluster and is not running.
5795

5796
    """
5797
    self.op.instance_name = _ExpandInstanceName(self.cfg,
5798
                                                self.op.instance_name)
5799
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5800
    assert instance is not None
5801
    _CheckNodeOnline(self, instance.primary_node)
5802
    _CheckInstanceDown(self, instance, "cannot rename")
5803
    self.instance = instance
5804

    
5805
    new_name = self.op.new_name
5806
    if self.op.name_check:
5807
      hostname = netutils.GetHostname(name=new_name)
5808
      self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5809
                   hostname.name)
5810
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5811
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5812
                                    " same as given hostname '%s'") %
5813
                                    (hostname.name, self.op.new_name),
5814
                                    errors.ECODE_INVAL)
5815
      new_name = self.op.new_name = hostname.name
5816
      if (self.op.ip_check and
5817
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5818
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5819
                                   (hostname.ip, new_name),
5820
                                   errors.ECODE_NOTUNIQUE)
5821

    
5822
    instance_list = self.cfg.GetInstanceList()
5823
    if new_name in instance_list and new_name != instance.name:
5824
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5825
                                 new_name, errors.ECODE_EXISTS)
5826

    
5827
  def Exec(self, feedback_fn):
5828
    """Rename the instance.
5829

5830
    """
5831
    inst = self.instance
5832
    old_name = inst.name
5833

    
5834
    rename_file_storage = False
5835
    if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5836
        self.op.new_name != inst.name):
5837
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5838
      rename_file_storage = True
5839

    
5840
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5841
    # Change the instance lock. This is definitely safe while we hold the BGL.
5842
    # Otherwise the new lock would have to be added in acquired mode.
5843
    assert self.REQ_BGL
5844
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
5845
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5846

    
5847
    # re-read the instance from the configuration after rename
5848
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5849

    
5850
    if rename_file_storage:
5851
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5852
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5853
                                                     old_file_storage_dir,
5854
                                                     new_file_storage_dir)
5855
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5856
                   " (but the instance has been renamed in Ganeti)" %
5857
                   (inst.primary_node, old_file_storage_dir,
5858
                    new_file_storage_dir))
5859

    
5860
    _StartInstanceDisks(self, inst, None)
5861
    try:
5862
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5863
                                                 old_name, self.op.debug_level)
5864
      msg = result.fail_msg
5865
      if msg:
5866
        msg = ("Could not run OS rename script for instance %s on node %s"
5867
               " (but the instance has been renamed in Ganeti): %s" %
5868
               (inst.name, inst.primary_node, msg))
5869
        self.proc.LogWarning(msg)
5870
    finally:
5871
      _ShutdownInstanceDisks(self, inst)
5872

    
5873
    return inst.name
5874

    
5875

    
5876
class LUInstanceRemove(LogicalUnit):
5877
  """Remove an instance.
5878

5879
  """
5880
  HPATH = "instance-remove"
5881
  HTYPE = constants.HTYPE_INSTANCE
5882
  REQ_BGL = False
5883

    
5884
  def ExpandNames(self):
5885
    self._ExpandAndLockInstance()
5886
    self.needed_locks[locking.LEVEL_NODE] = []
5887
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5888

    
5889
  def DeclareLocks(self, level):
5890
    if level == locking.LEVEL_NODE:
5891
      self._LockInstancesNodes()
5892

    
5893
  def BuildHooksEnv(self):
5894
    """Build hooks env.
5895

5896
    This runs on master, primary and secondary nodes of the instance.
5897

5898
    """
5899
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5900
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5901
    return env
5902

    
5903
  def BuildHooksNodes(self):
5904
    """Build hooks nodes.
5905

5906
    """
5907
    nl = [self.cfg.GetMasterNode()]
5908
    nl_post = list(self.instance.all_nodes) + nl
5909
    return (nl, nl_post)
5910

    
5911
  def CheckPrereq(self):
5912
    """Check prerequisites.
5913

5914
    This checks that the instance is in the cluster.
5915

5916
    """
5917
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5918
    assert self.instance is not None, \
5919
      "Cannot retrieve locked instance %s" % self.op.instance_name
5920

    
5921
  def Exec(self, feedback_fn):
5922
    """Remove the instance.
5923

5924
    """
5925
    instance = self.instance
5926
    logging.info("Shutting down instance %s on node %s",
5927
                 instance.name, instance.primary_node)
5928

    
5929
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5930
                                             self.op.shutdown_timeout)
5931
    msg = result.fail_msg
5932
    if msg:
5933
      if self.op.ignore_failures:
5934
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5935
      else:
5936
        raise errors.OpExecError("Could not shutdown instance %s on"
5937
                                 " node %s: %s" %
5938
                                 (instance.name, instance.primary_node, msg))
5939

    
5940
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5941

    
5942

    
5943
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5944
  """Utility function to remove an instance.
5945

5946
  """
5947
  logging.info("Removing block devices for instance %s", instance.name)
5948

    
5949
  if not _RemoveDisks(lu, instance):
5950
    if not ignore_failures:
5951
      raise errors.OpExecError("Can't remove instance's disks")
5952
    feedback_fn("Warning: can't remove instance's disks")
5953

    
5954
  logging.info("Removing instance %s out of cluster config", instance.name)
5955

    
5956
  lu.cfg.RemoveInstance(instance.name)
5957

    
5958
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5959
    "Instance lock removal conflict"
5960

    
5961
  # Remove lock for the instance
5962
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5963

    
5964

    
5965
class LUInstanceQuery(NoHooksLU):
5966
  """Logical unit for querying instances.
5967

5968
  """
5969
  # pylint: disable-msg=W0142
5970
  REQ_BGL = False
5971

    
5972
  def CheckArguments(self):
5973
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5974
                             self.op.output_fields, self.op.use_locking)
5975

    
5976
  def ExpandNames(self):
5977
    self.iq.ExpandNames(self)
5978

    
5979
  def DeclareLocks(self, level):
5980
    self.iq.DeclareLocks(self, level)
5981

    
5982
  def Exec(self, feedback_fn):
5983
    return self.iq.OldStyleQuery(self)
5984

    
5985

    
5986
class LUInstanceFailover(LogicalUnit):
5987
  """Failover an instance.
5988

5989
  """
5990
  HPATH = "instance-failover"
5991
  HTYPE = constants.HTYPE_INSTANCE
5992
  REQ_BGL = False
5993

    
5994
  def CheckArguments(self):
5995
    """Check the arguments.
5996

5997
    """
5998
    self.iallocator = getattr(self.op, "iallocator", None)
5999
    self.target_node = getattr(self.op, "target_node", None)
6000

    
6001
  def ExpandNames(self):
6002
    self._ExpandAndLockInstance()
6003

    
6004
    if self.op.target_node is not None:
6005
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6006

    
6007
    self.needed_locks[locking.LEVEL_NODE] = []
6008
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6009

    
6010
    ignore_consistency = self.op.ignore_consistency
6011
    shutdown_timeout = self.op.shutdown_timeout
6012
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6013
                                       cleanup=False,
6014
                                       failover=True,
6015
                                       ignore_consistency=ignore_consistency,
6016
                                       shutdown_timeout=shutdown_timeout)
6017
    self.tasklets = [self._migrater]
6018

    
6019
  def DeclareLocks(self, level):
6020
    if level == locking.LEVEL_NODE:
6021
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6022
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6023
        if self.op.target_node is None:
6024
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6025
        else:
6026
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6027
                                                   self.op.target_node]
6028
        del self.recalculate_locks[locking.LEVEL_NODE]
6029
      else:
6030
        self._LockInstancesNodes()
6031

    
6032
  def BuildHooksEnv(self):
6033
    """Build hooks env.
6034

6035
    This runs on master, primary and secondary nodes of the instance.
6036

6037
    """
6038
    instance = self._migrater.instance
6039
    source_node = instance.primary_node
6040
    target_node = self.op.target_node
6041
    env = {
6042
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6043
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6044
      "OLD_PRIMARY": source_node,
6045
      "NEW_PRIMARY": target_node,
6046
      }
6047

    
6048
    if instance.disk_template in constants.DTS_INT_MIRROR:
6049
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6050
      env["NEW_SECONDARY"] = source_node
6051
    else:
6052
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6053

    
6054
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6055

    
6056
    return env
6057

    
6058
  def BuildHooksNodes(self):
6059
    """Build hooks nodes.
6060

6061
    """
6062
    instance = self._migrater.instance
6063
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6064
    return (nl, nl + [instance.primary_node])
6065

    
6066

    
6067
class LUInstanceMigrate(LogicalUnit):
6068
  """Migrate an instance.
6069

6070
  This is migration without shutting down, compared to the failover,
6071
  which is done with shutdown.
6072

6073
  """
6074
  HPATH = "instance-migrate"
6075
  HTYPE = constants.HTYPE_INSTANCE
6076
  REQ_BGL = False
6077

    
6078
  def ExpandNames(self):
6079
    self._ExpandAndLockInstance()
6080

    
6081
    if self.op.target_node is not None:
6082
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6083

    
6084
    self.needed_locks[locking.LEVEL_NODE] = []
6085
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6086

    
6087
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6088
                                       cleanup=self.op.cleanup,
6089
                                       failover=False,
6090
                                       fallback=self.op.allow_failover)
6091
    self.tasklets = [self._migrater]
6092

    
6093
  def DeclareLocks(self, level):
6094
    if level == locking.LEVEL_NODE:
6095
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6096
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6097
        if self.op.target_node is None:
6098
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6099
        else:
6100
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6101
                                                   self.op.target_node]
6102
        del self.recalculate_locks[locking.LEVEL_NODE]
6103
      else:
6104
        self._LockInstancesNodes()
6105

    
6106
  def BuildHooksEnv(self):
6107
    """Build hooks env.
6108

6109
    This runs on master, primary and secondary nodes of the instance.
6110

6111
    """
6112
    instance = self._migrater.instance
6113
    source_node = instance.primary_node
6114
    target_node = self.op.target_node
6115
    env = _BuildInstanceHookEnvByObject(self, instance)
6116
    env.update({
6117
      "MIGRATE_LIVE": self._migrater.live,
6118
      "MIGRATE_CLEANUP": self.op.cleanup,
6119
      "OLD_PRIMARY": source_node,
6120
      "NEW_PRIMARY": target_node,
6121
      })
6122

    
6123
    if instance.disk_template in constants.DTS_INT_MIRROR:
6124
      env["OLD_SECONDARY"] = target_node
6125
      env["NEW_SECONDARY"] = source_node
6126
    else:
6127
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6128

    
6129
    return env
6130

    
6131
  def BuildHooksNodes(self):
6132
    """Build hooks nodes.
6133

6134
    """
6135
    instance = self._migrater.instance
6136
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6137
    return (nl, nl + [instance.primary_node])
6138

    
6139

    
6140
class LUInstanceMove(LogicalUnit):
6141
  """Move an instance by data-copying.
6142

6143
  """
6144
  HPATH = "instance-move"
6145
  HTYPE = constants.HTYPE_INSTANCE
6146
  REQ_BGL = False
6147

    
6148
  def ExpandNames(self):
6149
    self._ExpandAndLockInstance()
6150
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6151
    self.op.target_node = target_node
6152
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6153
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6154

    
6155
  def DeclareLocks(self, level):
6156
    if level == locking.LEVEL_NODE:
6157
      self._LockInstancesNodes(primary_only=True)
6158

    
6159
  def BuildHooksEnv(self):
6160
    """Build hooks env.
6161

6162
    This runs on master, primary and secondary nodes of the instance.
6163

6164
    """
6165
    env = {
6166
      "TARGET_NODE": self.op.target_node,
6167
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6168
      }
6169
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6170
    return env
6171

    
6172
  def BuildHooksNodes(self):
6173
    """Build hooks nodes.
6174

6175
    """
6176
    nl = [
6177
      self.cfg.GetMasterNode(),
6178
      self.instance.primary_node,
6179
      self.op.target_node,
6180
      ]
6181
    return (nl, nl)
6182

    
6183
  def CheckPrereq(self):
6184
    """Check prerequisites.
6185

6186
    This checks that the instance is in the cluster.
6187

6188
    """
6189
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6190
    assert self.instance is not None, \
6191
      "Cannot retrieve locked instance %s" % self.op.instance_name
6192

    
6193
    node = self.cfg.GetNodeInfo(self.op.target_node)
6194
    assert node is not None, \
6195
      "Cannot retrieve locked node %s" % self.op.target_node
6196

    
6197
    self.target_node = target_node = node.name
6198

    
6199
    if target_node == instance.primary_node:
6200
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6201
                                 (instance.name, target_node),
6202
                                 errors.ECODE_STATE)
6203

    
6204
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6205

    
6206
    for idx, dsk in enumerate(instance.disks):
6207
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6208
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6209
                                   " cannot copy" % idx, errors.ECODE_STATE)
6210

    
6211
    _CheckNodeOnline(self, target_node)
6212
    _CheckNodeNotDrained(self, target_node)
6213
    _CheckNodeVmCapable(self, target_node)
6214

    
6215
    if instance.admin_up:
6216
      # check memory requirements on the secondary node
6217
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6218
                           instance.name, bep[constants.BE_MEMORY],
6219
                           instance.hypervisor)
6220
    else:
6221
      self.LogInfo("Not checking memory on the secondary node as"
6222
                   " instance will not be started")
6223

    
6224
    # check bridge existance
6225
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6226

    
6227
  def Exec(self, feedback_fn):
6228
    """Move an instance.
6229

6230
    The move is done by shutting it down on its present node, copying
6231
    the data over (slow) and starting it on the new node.
6232

6233
    """
6234
    instance = self.instance
6235

    
6236
    source_node = instance.primary_node
6237
    target_node = self.target_node
6238

    
6239
    self.LogInfo("Shutting down instance %s on source node %s",
6240
                 instance.name, source_node)
6241

    
6242
    result = self.rpc.call_instance_shutdown(source_node, instance,
6243
                                             self.op.shutdown_timeout)
6244
    msg = result.fail_msg
6245
    if msg:
6246
      if self.op.ignore_consistency:
6247
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6248
                             " Proceeding anyway. Please make sure node"
6249
                             " %s is down. Error details: %s",
6250
                             instance.name, source_node, source_node, msg)
6251
      else:
6252
        raise errors.OpExecError("Could not shutdown instance %s on"
6253
                                 " node %s: %s" %
6254
                                 (instance.name, source_node, msg))
6255

    
6256
    # create the target disks
6257
    try:
6258
      _CreateDisks(self, instance, target_node=target_node)
6259
    except errors.OpExecError:
6260
      self.LogWarning("Device creation failed, reverting...")
6261
      try:
6262
        _RemoveDisks(self, instance, target_node=target_node)
6263
      finally:
6264
        self.cfg.ReleaseDRBDMinors(instance.name)
6265
        raise
6266

    
6267
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6268

    
6269
    errs = []
6270
    # activate, get path, copy the data over
6271
    for idx, disk in enumerate(instance.disks):
6272
      self.LogInfo("Copying data for disk %d", idx)
6273
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6274
                                               instance.name, True, idx)
6275
      if result.fail_msg:
6276
        self.LogWarning("Can't assemble newly created disk %d: %s",
6277
                        idx, result.fail_msg)
6278
        errs.append(result.fail_msg)
6279
        break
6280
      dev_path = result.payload
6281
      result = self.rpc.call_blockdev_export(source_node, disk,
6282
                                             target_node, dev_path,
6283
                                             cluster_name)
6284
      if result.fail_msg:
6285
        self.LogWarning("Can't copy data over for disk %d: %s",
6286
                        idx, result.fail_msg)
6287
        errs.append(result.fail_msg)
6288
        break
6289

    
6290
    if errs:
6291
      self.LogWarning("Some disks failed to copy, aborting")
6292
      try:
6293
        _RemoveDisks(self, instance, target_node=target_node)
6294
      finally:
6295
        self.cfg.ReleaseDRBDMinors(instance.name)
6296
        raise errors.OpExecError("Errors during disk copy: %s" %
6297
                                 (",".join(errs),))
6298

    
6299
    instance.primary_node = target_node
6300
    self.cfg.Update(instance, feedback_fn)
6301

    
6302
    self.LogInfo("Removing the disks on the original node")
6303
    _RemoveDisks(self, instance, target_node=source_node)
6304

    
6305
    # Only start the instance if it's marked as up
6306
    if instance.admin_up:
6307
      self.LogInfo("Starting instance %s on node %s",
6308
                   instance.name, target_node)
6309

    
6310
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6311
                                           ignore_secondaries=True)
6312
      if not disks_ok:
6313
        _ShutdownInstanceDisks(self, instance)
6314
        raise errors.OpExecError("Can't activate the instance's disks")
6315

    
6316
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6317
      msg = result.fail_msg
6318
      if msg:
6319
        _ShutdownInstanceDisks(self, instance)
6320
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6321
                                 (instance.name, target_node, msg))
6322

    
6323

    
6324
class LUNodeMigrate(LogicalUnit):
6325
  """Migrate all instances from a node.
6326

6327
  """
6328
  HPATH = "node-migrate"
6329
  HTYPE = constants.HTYPE_NODE
6330
  REQ_BGL = False
6331

    
6332
  def CheckArguments(self):
6333
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6334

    
6335
  def ExpandNames(self):
6336
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6337

    
6338
    self.needed_locks = {}
6339

    
6340
    # Create tasklets for migrating instances for all instances on this node
6341
    names = []
6342
    tasklets = []
6343

    
6344
    self.lock_all_nodes = False
6345

    
6346
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6347
      logging.debug("Migrating instance %s", inst.name)
6348
      names.append(inst.name)
6349

    
6350
      tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False))
6351

    
6352
      if inst.disk_template in constants.DTS_EXT_MIRROR:
6353
        # We need to lock all nodes, as the iallocator will choose the
6354
        # destination nodes afterwards
6355
        self.lock_all_nodes = True
6356

    
6357
    self.tasklets = tasklets
6358

    
6359
    # Declare node locks
6360
    if self.lock_all_nodes:
6361
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6362
    else:
6363
      self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6364
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6365

    
6366
    # Declare instance locks
6367
    self.needed_locks[locking.LEVEL_INSTANCE] = names
6368

    
6369
  def DeclareLocks(self, level):
6370
    if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6371
      self._LockInstancesNodes()
6372

    
6373
  def BuildHooksEnv(self):
6374
    """Build hooks env.
6375

6376
    This runs on the master, the primary and all the secondaries.
6377

6378
    """
6379
    return {
6380
      "NODE_NAME": self.op.node_name,
6381
      }
6382

    
6383
  def BuildHooksNodes(self):
6384
    """Build hooks nodes.
6385

6386
    """
6387
    nl = [self.cfg.GetMasterNode()]
6388
    return (nl, nl)
6389

    
6390

    
6391
class TLMigrateInstance(Tasklet):
6392
  """Tasklet class for instance migration.
6393

6394
  @type live: boolean
6395
  @ivar live: whether the migration will be done live or non-live;
6396
      this variable is initalized only after CheckPrereq has run
6397
  @type cleanup: boolean
6398
  @ivar cleanup: Wheater we cleanup from a failed migration
6399
  @type iallocator: string
6400
  @ivar iallocator: The iallocator used to determine target_node
6401
  @type target_node: string
6402
  @ivar target_node: If given, the target_node to reallocate the instance to
6403
  @type failover: boolean
6404
  @ivar failover: Whether operation results in failover or migration
6405
  @type fallback: boolean
6406
  @ivar fallback: Whether fallback to failover is allowed if migration not
6407
                  possible
6408
  @type ignore_consistency: boolean
6409
  @ivar ignore_consistency: Wheter we should ignore consistency between source
6410
                            and target node
6411
  @type shutdown_timeout: int
6412
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
6413

6414
  """
6415
  def __init__(self, lu, instance_name, cleanup=False,
6416
               failover=False, fallback=False,
6417
               ignore_consistency=False,
6418
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6419
    """Initializes this class.
6420

6421
    """
6422
    Tasklet.__init__(self, lu)
6423

    
6424
    # Parameters
6425
    self.instance_name = instance_name
6426
    self.cleanup = cleanup
6427
    self.live = False # will be overridden later
6428
    self.failover = failover
6429
    self.fallback = fallback
6430
    self.ignore_consistency = ignore_consistency
6431
    self.shutdown_timeout = shutdown_timeout
6432

    
6433
  def CheckPrereq(self):
6434
    """Check prerequisites.
6435

6436
    This checks that the instance is in the cluster.
6437

6438
    """
6439
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6440
    instance = self.cfg.GetInstanceInfo(instance_name)
6441
    assert instance is not None
6442
    self.instance = instance
6443

    
6444
    if (not self.cleanup and not instance.admin_up and not self.failover and
6445
        self.fallback):
6446
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6447
                      " to failover")
6448
      self.failover = True
6449

    
6450
    if instance.disk_template not in constants.DTS_MIRRORED:
6451
      if self.failover:
6452
        text = "failovers"
6453
      else:
6454
        text = "migrations"
6455
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6456
                                 " %s" % (instance.disk_template, text),
6457
                                 errors.ECODE_STATE)
6458

    
6459
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6460
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6461

    
6462
      if self.lu.op.iallocator:
6463
        self._RunAllocator()
6464
      else:
6465
        # We set set self.target_node as it is required by
6466
        # BuildHooksEnv
6467
        self.target_node = self.lu.op.target_node
6468

    
6469
      # self.target_node is already populated, either directly or by the
6470
      # iallocator run
6471
      target_node = self.target_node
6472

    
6473
      if len(self.lu.tasklets) == 1:
6474
        # It is safe to release locks only when we're the only tasklet
6475
        # in the LU
6476
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6477
                      keep=[instance.primary_node, self.target_node])
6478

    
6479
    else:
6480
      secondary_nodes = instance.secondary_nodes
6481
      if not secondary_nodes:
6482
        raise errors.ConfigurationError("No secondary node but using"
6483
                                        " %s disk template" %
6484
                                        instance.disk_template)
6485
      target_node = secondary_nodes[0]
6486
      if self.lu.op.iallocator or (self.lu.op.target_node and
6487
                                   self.lu.op.target_node != target_node):
6488
        if self.failover:
6489
          text = "failed over"
6490
        else:
6491
          text = "migrated"
6492
        raise errors.OpPrereqError("Instances with disk template %s cannot"
6493
                                   " be %s to arbitrary nodes"
6494
                                   " (neither an iallocator nor a target"
6495
                                   " node can be passed)" %
6496
                                   (instance.disk_template, text),
6497
                                   errors.ECODE_INVAL)
6498

    
6499
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6500

    
6501
    # check memory requirements on the secondary node
6502
    if not self.failover or instance.admin_up:
6503
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6504
                           instance.name, i_be[constants.BE_MEMORY],
6505
                           instance.hypervisor)
6506
    else:
6507
      self.lu.LogInfo("Not checking memory on the secondary node as"
6508
                      " instance will not be started")
6509

    
6510
    # check bridge existance
6511
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6512

    
6513
    if not self.cleanup:
6514
      _CheckNodeNotDrained(self.lu, target_node)
6515
      if not self.failover:
6516
        result = self.rpc.call_instance_migratable(instance.primary_node,
6517
                                                   instance)
6518
        if result.fail_msg and self.fallback:
6519
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6520
                          " failover")
6521
          self.failover = True
6522
        else:
6523
          result.Raise("Can't migrate, please use failover",
6524
                       prereq=True, ecode=errors.ECODE_STATE)
6525

    
6526
    assert not (self.failover and self.cleanup)
6527

    
6528
    if not self.failover:
6529
      if self.lu.op.live is not None and self.lu.op.mode is not None:
6530
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6531
                                   " parameters are accepted",
6532
                                   errors.ECODE_INVAL)
6533
      if self.lu.op.live is not None:
6534
        if self.lu.op.live:
6535
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
6536
        else:
6537
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6538
        # reset the 'live' parameter to None so that repeated
6539
        # invocations of CheckPrereq do not raise an exception
6540
        self.lu.op.live = None
6541
      elif self.lu.op.mode is None:
6542
        # read the default value from the hypervisor
6543
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6544
                                                skip_globals=False)
6545
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6546

    
6547
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6548
    else:
6549
      # Failover is never live
6550
      self.live = False
6551

    
6552
  def _RunAllocator(self):
6553
    """Run the allocator based on input opcode.
6554

6555
    """
6556
    ial = IAllocator(self.cfg, self.rpc,
6557
                     mode=constants.IALLOCATOR_MODE_RELOC,
6558
                     name=self.instance_name,
6559
                     # TODO See why hail breaks with a single node below
6560
                     relocate_from=[self.instance.primary_node,
6561
                                    self.instance.primary_node],
6562
                     )
6563

    
6564
    ial.Run(self.lu.op.iallocator)
6565

    
6566
    if not ial.success:
6567
      raise errors.OpPrereqError("Can't compute nodes using"
6568
                                 " iallocator '%s': %s" %
6569
                                 (self.lu.op.iallocator, ial.info),
6570
                                 errors.ECODE_NORES)
6571
    if len(ial.result) != ial.required_nodes:
6572
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6573
                                 " of nodes (%s), required %s" %
6574
                                 (self.lu.op.iallocator, len(ial.result),
6575
                                  ial.required_nodes), errors.ECODE_FAULT)
6576
    self.target_node = ial.result[0]
6577
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6578
                 self.instance_name, self.lu.op.iallocator,
6579
                 utils.CommaJoin(ial.result))
6580

    
6581
  def _WaitUntilSync(self):
6582
    """Poll with custom rpc for disk sync.
6583

6584
    This uses our own step-based rpc call.
6585

6586
    """
6587
    self.feedback_fn("* wait until resync is done")
6588
    all_done = False
6589
    while not all_done:
6590
      all_done = True
6591
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6592
                                            self.nodes_ip,
6593
                                            self.instance.disks)
6594
      min_percent = 100
6595
      for node, nres in result.items():
6596
        nres.Raise("Cannot resync disks on node %s" % node)
6597
        node_done, node_percent = nres.payload
6598
        all_done = all_done and node_done
6599
        if node_percent is not None:
6600
          min_percent = min(min_percent, node_percent)
6601
      if not all_done:
6602
        if min_percent < 100:
6603
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6604
        time.sleep(2)
6605

    
6606
  def _EnsureSecondary(self, node):
6607
    """Demote a node to secondary.
6608

6609
    """
6610
    self.feedback_fn("* switching node %s to secondary mode" % node)
6611

    
6612
    for dev in self.instance.disks:
6613
      self.cfg.SetDiskID(dev, node)
6614

    
6615
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6616
                                          self.instance.disks)
6617
    result.Raise("Cannot change disk to secondary on node %s" % node)
6618

    
6619
  def _GoStandalone(self):
6620
    """Disconnect from the network.
6621

6622
    """
6623
    self.feedback_fn("* changing into standalone mode")
6624
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6625
                                               self.instance.disks)
6626
    for node, nres in result.items():
6627
      nres.Raise("Cannot disconnect disks node %s" % node)
6628

    
6629
  def _GoReconnect(self, multimaster):
6630
    """Reconnect to the network.
6631

6632
    """
6633
    if multimaster:
6634
      msg = "dual-master"
6635
    else:
6636
      msg = "single-master"
6637
    self.feedback_fn("* changing disks into %s mode" % msg)
6638
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6639
                                           self.instance.disks,
6640
                                           self.instance.name, multimaster)
6641
    for node, nres in result.items():
6642
      nres.Raise("Cannot change disks config on node %s" % node)
6643

    
6644
  def _ExecCleanup(self):
6645
    """Try to cleanup after a failed migration.
6646

6647
    The cleanup is done by:
6648
      - check that the instance is running only on one node
6649
        (and update the config if needed)
6650
      - change disks on its secondary node to secondary
6651
      - wait until disks are fully synchronized
6652
      - disconnect from the network
6653
      - change disks into single-master mode
6654
      - wait again until disks are fully synchronized
6655

6656
    """
6657
    instance = self.instance
6658
    target_node = self.target_node
6659
    source_node = self.source_node
6660

    
6661
    # check running on only one node
6662
    self.feedback_fn("* checking where the instance actually runs"
6663
                     " (if this hangs, the hypervisor might be in"
6664
                     " a bad state)")
6665
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6666
    for node, result in ins_l.items():
6667
      result.Raise("Can't contact node %s" % node)
6668

    
6669
    runningon_source = instance.name in ins_l[source_node].payload
6670
    runningon_target = instance.name in ins_l[target_node].payload
6671

    
6672
    if runningon_source and runningon_target:
6673
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6674
                               " or the hypervisor is confused; you will have"
6675
                               " to ensure manually that it runs only on one"
6676
                               " and restart this operation")
6677

    
6678
    if not (runningon_source or runningon_target):
6679
      raise errors.OpExecError("Instance does not seem to be running at all;"
6680
                               " in this case it's safer to repair by"
6681
                               " running 'gnt-instance stop' to ensure disk"
6682
                               " shutdown, and then restarting it")
6683

    
6684
    if runningon_target:
6685
      # the migration has actually succeeded, we need to update the config
6686
      self.feedback_fn("* instance running on secondary node (%s),"
6687
                       " updating config" % target_node)
6688
      instance.primary_node = target_node
6689
      self.cfg.Update(instance, self.feedback_fn)
6690
      demoted_node = source_node
6691
    else:
6692
      self.feedback_fn("* instance confirmed to be running on its"
6693
                       " primary node (%s)" % source_node)
6694
      demoted_node = target_node
6695

    
6696
    if instance.disk_template in constants.DTS_INT_MIRROR:
6697
      self._EnsureSecondary(demoted_node)
6698
      try:
6699
        self._WaitUntilSync()
6700
      except errors.OpExecError:
6701
        # we ignore here errors, since if the device is standalone, it
6702
        # won't be able to sync
6703
        pass
6704
      self._GoStandalone()
6705
      self._GoReconnect(False)
6706
      self._WaitUntilSync()
6707

    
6708
    self.feedback_fn("* done")
6709

    
6710
  def _RevertDiskStatus(self):
6711
    """Try to revert the disk status after a failed migration.
6712

6713
    """
6714
    target_node = self.target_node
6715
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6716
      return
6717

    
6718
    try:
6719
      self._EnsureSecondary(target_node)
6720
      self._GoStandalone()
6721
      self._GoReconnect(False)
6722
      self._WaitUntilSync()
6723
    except errors.OpExecError, err:
6724
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
6725
                         " please try to recover the instance manually;"
6726
                         " error '%s'" % str(err))
6727

    
6728
  def _AbortMigration(self):
6729
    """Call the hypervisor code to abort a started migration.
6730

6731
    """
6732
    instance = self.instance
6733
    target_node = self.target_node
6734
    migration_info = self.migration_info
6735

    
6736
    abort_result = self.rpc.call_finalize_migration(target_node,
6737
                                                    instance,
6738
                                                    migration_info,
6739
                                                    False)
6740
    abort_msg = abort_result.fail_msg
6741
    if abort_msg:
6742
      logging.error("Aborting migration failed on target node %s: %s",
6743
                    target_node, abort_msg)
6744
      # Don't raise an exception here, as we stil have to try to revert the
6745
      # disk status, even if this step failed.
6746

    
6747
  def _ExecMigration(self):
6748
    """Migrate an instance.
6749

6750
    The migrate is done by:
6751
      - change the disks into dual-master mode
6752
      - wait until disks are fully synchronized again
6753
      - migrate the instance
6754
      - change disks on the new secondary node (the old primary) to secondary
6755
      - wait until disks are fully synchronized
6756
      - change disks into single-master mode
6757

6758
    """
6759
    instance = self.instance
6760
    target_node = self.target_node
6761
    source_node = self.source_node
6762

    
6763
    self.feedback_fn("* checking disk consistency between source and target")
6764
    for dev in instance.disks:
6765
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6766
        raise errors.OpExecError("Disk %s is degraded or not fully"
6767
                                 " synchronized on target node,"
6768
                                 " aborting migration" % dev.iv_name)
6769

    
6770
    # First get the migration information from the remote node
6771
    result = self.rpc.call_migration_info(source_node, instance)
6772
    msg = result.fail_msg
6773
    if msg:
6774
      log_err = ("Failed fetching source migration information from %s: %s" %
6775
                 (source_node, msg))
6776
      logging.error(log_err)
6777
      raise errors.OpExecError(log_err)
6778

    
6779
    self.migration_info = migration_info = result.payload
6780

    
6781
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6782
      # Then switch the disks to master/master mode
6783
      self._EnsureSecondary(target_node)
6784
      self._GoStandalone()
6785
      self._GoReconnect(True)
6786
      self._WaitUntilSync()
6787

    
6788
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6789
    result = self.rpc.call_accept_instance(target_node,
6790
                                           instance,
6791
                                           migration_info,
6792
                                           self.nodes_ip[target_node])
6793

    
6794
    msg = result.fail_msg
6795
    if msg:
6796
      logging.error("Instance pre-migration failed, trying to revert"
6797
                    " disk status: %s", msg)
6798
      self.feedback_fn("Pre-migration failed, aborting")
6799
      self._AbortMigration()
6800
      self._RevertDiskStatus()
6801
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6802
                               (instance.name, msg))
6803

    
6804
    self.feedback_fn("* migrating instance to %s" % target_node)
6805
    result = self.rpc.call_instance_migrate(source_node, instance,
6806
                                            self.nodes_ip[target_node],
6807
                                            self.live)
6808
    msg = result.fail_msg
6809
    if msg:
6810
      logging.error("Instance migration failed, trying to revert"
6811
                    " disk status: %s", msg)
6812
      self.feedback_fn("Migration failed, aborting")
6813
      self._AbortMigration()
6814
      self._RevertDiskStatus()
6815
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6816
                               (instance.name, msg))
6817

    
6818
    instance.primary_node = target_node
6819
    # distribute new instance config to the other nodes
6820
    self.cfg.Update(instance, self.feedback_fn)
6821

    
6822
    result = self.rpc.call_finalize_migration(target_node,
6823
                                              instance,
6824
                                              migration_info,
6825
                                              True)
6826
    msg = result.fail_msg
6827
    if msg:
6828
      logging.error("Instance migration succeeded, but finalization failed:"
6829
                    " %s", msg)
6830
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6831
                               msg)
6832

    
6833
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6834
      self._EnsureSecondary(source_node)
6835
      self._WaitUntilSync()
6836
      self._GoStandalone()
6837
      self._GoReconnect(False)
6838
      self._WaitUntilSync()
6839

    
6840
    self.feedback_fn("* done")
6841

    
6842
  def _ExecFailover(self):
6843
    """Failover an instance.
6844

6845
    The failover is done by shutting it down on its present node and
6846
    starting it on the secondary.
6847

6848
    """
6849
    instance = self.instance
6850
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
6851

    
6852
    source_node = instance.primary_node
6853
    target_node = self.target_node
6854

    
6855
    if instance.admin_up:
6856
      self.feedback_fn("* checking disk consistency between source and target")
6857
      for dev in instance.disks:
6858
        # for drbd, these are drbd over lvm
6859
        if not _CheckDiskConsistency(self, dev, target_node, False):
6860
          if not self.ignore_consistency:
6861
            raise errors.OpExecError("Disk %s is degraded on target node,"
6862
                                     " aborting failover" % dev.iv_name)
6863
    else:
6864
      self.feedback_fn("* not checking disk consistency as instance is not"
6865
                       " running")
6866

    
6867
    self.feedback_fn("* shutting down instance on source node")
6868
    logging.info("Shutting down instance %s on node %s",
6869
                 instance.name, source_node)
6870

    
6871
    result = self.rpc.call_instance_shutdown(source_node, instance,
6872
                                             self.shutdown_timeout)
6873
    msg = result.fail_msg
6874
    if msg:
6875
      if self.ignore_consistency or primary_node.offline:
6876
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
6877
                           " proceeding anyway; please make sure node"
6878
                           " %s is down; error details: %s",
6879
                           instance.name, source_node, source_node, msg)
6880
      else:
6881
        raise errors.OpExecError("Could not shutdown instance %s on"
6882
                                 " node %s: %s" %
6883
                                 (instance.name, source_node, msg))
6884

    
6885
    self.feedback_fn("* deactivating the instance's disks on source node")
6886
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
6887
      raise errors.OpExecError("Can't shut down the instance's disks.")
6888

    
6889
    instance.primary_node = target_node
6890
    # distribute new instance config to the other nodes
6891
    self.cfg.Update(instance, self.feedback_fn)
6892

    
6893
    # Only start the instance if it's marked as up
6894
    if instance.admin_up:
6895
      self.feedback_fn("* activating the instance's disks on target node")
6896
      logging.info("Starting instance %s on node %s",
6897
                   instance.name, target_node)
6898

    
6899
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6900
                                           ignore_secondaries=True)
6901
      if not disks_ok:
6902
        _ShutdownInstanceDisks(self, instance)
6903
        raise errors.OpExecError("Can't activate the instance's disks")
6904

    
6905
      self.feedback_fn("* starting the instance on the target node")
6906
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6907
      msg = result.fail_msg
6908
      if msg:
6909
        _ShutdownInstanceDisks(self, instance)
6910
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6911
                                 (instance.name, target_node, msg))
6912

    
6913
  def Exec(self, feedback_fn):
6914
    """Perform the migration.
6915

6916
    """
6917
    self.feedback_fn = feedback_fn
6918
    self.source_node = self.instance.primary_node
6919

    
6920
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6921
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
6922
      self.target_node = self.instance.secondary_nodes[0]
6923
      # Otherwise self.target_node has been populated either
6924
      # directly, or through an iallocator.
6925

    
6926
    self.all_nodes = [self.source_node, self.target_node]
6927
    self.nodes_ip = {
6928
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6929
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6930
      }
6931

    
6932
    if self.failover:
6933
      feedback_fn("Failover instance %s" % self.instance.name)
6934
      self._ExecFailover()
6935
    else:
6936
      feedback_fn("Migrating instance %s" % self.instance.name)
6937

    
6938
      if self.cleanup:
6939
        return self._ExecCleanup()
6940
      else:
6941
        return self._ExecMigration()
6942

    
6943

    
6944
def _CreateBlockDev(lu, node, instance, device, force_create,
6945
                    info, force_open):
6946
  """Create a tree of block devices on a given node.
6947

6948
  If this device type has to be created on secondaries, create it and
6949
  all its children.
6950

6951
  If not, just recurse to children keeping the same 'force' value.
6952

6953
  @param lu: the lu on whose behalf we execute
6954
  @param node: the node on which to create the device
6955
  @type instance: L{objects.Instance}
6956
  @param instance: the instance which owns the device
6957
  @type device: L{objects.Disk}
6958
  @param device: the device to create
6959
  @type force_create: boolean
6960
  @param force_create: whether to force creation of this device; this
6961
      will be change to True whenever we find a device which has
6962
      CreateOnSecondary() attribute
6963
  @param info: the extra 'metadata' we should attach to the device
6964
      (this will be represented as a LVM tag)
6965
  @type force_open: boolean
6966
  @param force_open: this parameter will be passes to the
6967
      L{backend.BlockdevCreate} function where it specifies
6968
      whether we run on primary or not, and it affects both
6969
      the child assembly and the device own Open() execution
6970

6971
  """
6972
  if device.CreateOnSecondary():
6973
    force_create = True
6974

    
6975
  if device.children:
6976
    for child in device.children:
6977
      _CreateBlockDev(lu, node, instance, child, force_create,
6978
                      info, force_open)
6979

    
6980
  if not force_create:
6981
    return
6982

    
6983
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6984

    
6985

    
6986
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6987
  """Create a single block device on a given node.
6988

6989
  This will not recurse over children of the device, so they must be
6990
  created in advance.
6991

6992
  @param lu: the lu on whose behalf we execute
6993
  @param node: the node on which to create the device
6994
  @type instance: L{objects.Instance}
6995
  @param instance: the instance which owns the device
6996
  @type device: L{objects.Disk}
6997
  @param device: the device to create
6998
  @param info: the extra 'metadata' we should attach to the device
6999
      (this will be represented as a LVM tag)
7000
  @type force_open: boolean
7001
  @param force_open: this parameter will be passes to the
7002
      L{backend.BlockdevCreate} function where it specifies
7003
      whether we run on primary or not, and it affects both
7004
      the child assembly and the device own Open() execution
7005

7006
  """
7007
  lu.cfg.SetDiskID(device, node)
7008
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7009
                                       instance.name, force_open, info)
7010
  result.Raise("Can't create block device %s on"
7011
               " node %s for instance %s" % (device, node, instance.name))
7012
  if device.physical_id is None:
7013
    device.physical_id = result.payload
7014

    
7015

    
7016
def _GenerateUniqueNames(lu, exts):
7017
  """Generate a suitable LV name.
7018

7019
  This will generate a logical volume name for the given instance.
7020

7021
  """
7022
  results = []
7023
  for val in exts:
7024
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7025
    results.append("%s%s" % (new_id, val))
7026
  return results
7027

    
7028

    
7029
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7030
                         iv_name, p_minor, s_minor):
7031
  """Generate a drbd8 device complete with its children.
7032

7033
  """
7034
  assert len(vgnames) == len(names) == 2
7035
  port = lu.cfg.AllocatePort()
7036
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7037
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7038
                          logical_id=(vgnames[0], names[0]))
7039
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7040
                          logical_id=(vgnames[1], names[1]))
7041
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7042
                          logical_id=(primary, secondary, port,
7043
                                      p_minor, s_minor,
7044
                                      shared_secret),
7045
                          children=[dev_data, dev_meta],
7046
                          iv_name=iv_name)
7047
  return drbd_dev
7048

    
7049

    
7050
def _GenerateDiskTemplate(lu, template_name,
7051
                          instance_name, primary_node,
7052
                          secondary_nodes, disk_info,
7053
                          file_storage_dir, file_driver,
7054
                          base_index, feedback_fn):
7055
  """Generate the entire disk layout for a given template type.
7056

7057
  """
7058
  #TODO: compute space requirements
7059

    
7060
  vgname = lu.cfg.GetVGName()
7061
  disk_count = len(disk_info)
7062
  disks = []
7063
  if template_name == constants.DT_DISKLESS:
7064
    pass
7065
  elif template_name == constants.DT_PLAIN:
7066
    if len(secondary_nodes) != 0:
7067
      raise errors.ProgrammerError("Wrong template configuration")
7068

    
7069
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7070
                                      for i in range(disk_count)])
7071
    for idx, disk in enumerate(disk_info):
7072
      disk_index = idx + base_index
7073
      vg = disk.get(constants.IDISK_VG, vgname)
7074
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7075
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7076
                              size=disk[constants.IDISK_SIZE],
7077
                              logical_id=(vg, names[idx]),
7078
                              iv_name="disk/%d" % disk_index,
7079
                              mode=disk[constants.IDISK_MODE])
7080
      disks.append(disk_dev)
7081
  elif template_name == constants.DT_DRBD8:
7082
    if len(secondary_nodes) != 1:
7083
      raise errors.ProgrammerError("Wrong template configuration")
7084
    remote_node = secondary_nodes[0]
7085
    minors = lu.cfg.AllocateDRBDMinor(
7086
      [primary_node, remote_node] * len(disk_info), instance_name)
7087

    
7088
    names = []
7089
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7090
                                               for i in range(disk_count)]):
7091
      names.append(lv_prefix + "_data")
7092
      names.append(lv_prefix + "_meta")
7093
    for idx, disk in enumerate(disk_info):
7094
      disk_index = idx + base_index
7095
      data_vg = disk.get(constants.IDISK_VG, vgname)
7096
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7097
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7098
                                      disk[constants.IDISK_SIZE],
7099
                                      [data_vg, meta_vg],
7100
                                      names[idx * 2:idx * 2 + 2],
7101
                                      "disk/%d" % disk_index,
7102
                                      minors[idx * 2], minors[idx * 2 + 1])
7103
      disk_dev.mode = disk[constants.IDISK_MODE]
7104
      disks.append(disk_dev)
7105
  elif template_name == constants.DT_FILE:
7106
    if len(secondary_nodes) != 0:
7107
      raise errors.ProgrammerError("Wrong template configuration")
7108

    
7109
    opcodes.RequireFileStorage()
7110

    
7111
    for idx, disk in enumerate(disk_info):
7112
      disk_index = idx + base_index
7113
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7114
                              size=disk[constants.IDISK_SIZE],
7115
                              iv_name="disk/%d" % disk_index,
7116
                              logical_id=(file_driver,
7117
                                          "%s/disk%d" % (file_storage_dir,
7118
                                                         disk_index)),
7119
                              mode=disk[constants.IDISK_MODE])
7120
      disks.append(disk_dev)
7121
  elif template_name == constants.DT_SHARED_FILE:
7122
    if len(secondary_nodes) != 0:
7123
      raise errors.ProgrammerError("Wrong template configuration")
7124

    
7125
    opcodes.RequireSharedFileStorage()
7126

    
7127
    for idx, disk in enumerate(disk_info):
7128
      disk_index = idx + base_index
7129
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7130
                              size=disk[constants.IDISK_SIZE],
7131
                              iv_name="disk/%d" % disk_index,
7132
                              logical_id=(file_driver,
7133
                                          "%s/disk%d" % (file_storage_dir,
7134
                                                         disk_index)),
7135
                              mode=disk[constants.IDISK_MODE])
7136
      disks.append(disk_dev)
7137
  elif template_name == constants.DT_BLOCK:
7138
    if len(secondary_nodes) != 0:
7139
      raise errors.ProgrammerError("Wrong template configuration")
7140

    
7141
    for idx, disk in enumerate(disk_info):
7142
      disk_index = idx + base_index
7143
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7144
                              size=disk[constants.IDISK_SIZE],
7145
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7146
                                          disk[constants.IDISK_ADOPT]),
7147
                              iv_name="disk/%d" % disk_index,
7148
                              mode=disk[constants.IDISK_MODE])
7149
      disks.append(disk_dev)
7150

    
7151
  else:
7152
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7153
  return disks
7154

    
7155

    
7156
def _GetInstanceInfoText(instance):
7157
  """Compute that text that should be added to the disk's metadata.
7158

7159
  """
7160
  return "originstname+%s" % instance.name
7161

    
7162

    
7163
def _CalcEta(time_taken, written, total_size):
7164
  """Calculates the ETA based on size written and total size.
7165

7166
  @param time_taken: The time taken so far
7167
  @param written: amount written so far
7168
  @param total_size: The total size of data to be written
7169
  @return: The remaining time in seconds
7170

7171
  """
7172
  avg_time = time_taken / float(written)
7173
  return (total_size - written) * avg_time
7174

    
7175

    
7176
def _WipeDisks(lu, instance):
7177
  """Wipes instance disks.
7178

7179
  @type lu: L{LogicalUnit}
7180
  @param lu: the logical unit on whose behalf we execute
7181
  @type instance: L{objects.Instance}
7182
  @param instance: the instance whose disks we should create
7183
  @return: the success of the wipe
7184

7185
  """
7186
  node = instance.primary_node
7187

    
7188
  for device in instance.disks:
7189
    lu.cfg.SetDiskID(device, node)
7190

    
7191
  logging.info("Pause sync of instance %s disks", instance.name)
7192
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7193

    
7194
  for idx, success in enumerate(result.payload):
7195
    if not success:
7196
      logging.warn("pause-sync of instance %s for disks %d failed",
7197
                   instance.name, idx)
7198

    
7199
  try:
7200
    for idx, device in enumerate(instance.disks):
7201
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7202
      # MAX_WIPE_CHUNK at max
7203
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7204
                            constants.MIN_WIPE_CHUNK_PERCENT)
7205
      # we _must_ make this an int, otherwise rounding errors will
7206
      # occur
7207
      wipe_chunk_size = int(wipe_chunk_size)
7208

    
7209
      lu.LogInfo("* Wiping disk %d", idx)
7210
      logging.info("Wiping disk %d for instance %s, node %s using"
7211
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7212

    
7213
      offset = 0
7214
      size = device.size
7215
      last_output = 0
7216
      start_time = time.time()
7217

    
7218
      while offset < size:
7219
        wipe_size = min(wipe_chunk_size, size - offset)
7220
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7221
                      idx, offset, wipe_size)
7222
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7223
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7224
                     (idx, offset, wipe_size))
7225
        now = time.time()
7226
        offset += wipe_size
7227
        if now - last_output >= 60:
7228
          eta = _CalcEta(now - start_time, offset, size)
7229
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7230
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7231
          last_output = now
7232
  finally:
7233
    logging.info("Resume sync of instance %s disks", instance.name)
7234

    
7235
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7236

    
7237
    for idx, success in enumerate(result.payload):
7238
      if not success:
7239
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7240
                      " look at the status and troubleshoot the issue", idx)
7241
        logging.warn("resume-sync of instance %s for disks %d failed",
7242
                     instance.name, idx)
7243

    
7244

    
7245
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7246
  """Create all disks for an instance.
7247

7248
  This abstracts away some work from AddInstance.
7249

7250
  @type lu: L{LogicalUnit}
7251
  @param lu: the logical unit on whose behalf we execute
7252
  @type instance: L{objects.Instance}
7253
  @param instance: the instance whose disks we should create
7254
  @type to_skip: list
7255
  @param to_skip: list of indices to skip
7256
  @type target_node: string
7257
  @param target_node: if passed, overrides the target node for creation
7258
  @rtype: boolean
7259
  @return: the success of the creation
7260

7261
  """
7262
  info = _GetInstanceInfoText(instance)
7263
  if target_node is None:
7264
    pnode = instance.primary_node
7265
    all_nodes = instance.all_nodes
7266
  else:
7267
    pnode = target_node
7268
    all_nodes = [pnode]
7269

    
7270
  if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7271
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7272
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7273

    
7274
    result.Raise("Failed to create directory '%s' on"
7275
                 " node %s" % (file_storage_dir, pnode))
7276

    
7277
  # Note: this needs to be kept in sync with adding of disks in
7278
  # LUInstanceSetParams
7279
  for idx, device in enumerate(instance.disks):
7280
    if to_skip and idx in to_skip:
7281
      continue
7282
    logging.info("Creating volume %s for instance %s",
7283
                 device.iv_name, instance.name)
7284
    #HARDCODE
7285
    for node in all_nodes:
7286
      f_create = node == pnode
7287
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7288

    
7289

    
7290
def _RemoveDisks(lu, instance, target_node=None):
7291
  """Remove all disks for an instance.
7292

7293
  This abstracts away some work from `AddInstance()` and
7294
  `RemoveInstance()`. Note that in case some of the devices couldn't
7295
  be removed, the removal will continue with the other ones (compare
7296
  with `_CreateDisks()`).
7297

7298
  @type lu: L{LogicalUnit}
7299
  @param lu: the logical unit on whose behalf we execute
7300
  @type instance: L{objects.Instance}
7301
  @param instance: the instance whose disks we should remove
7302
  @type target_node: string
7303
  @param target_node: used to override the node on which to remove the disks
7304
  @rtype: boolean
7305
  @return: the success of the removal
7306

7307
  """
7308
  logging.info("Removing block devices for instance %s", instance.name)
7309

    
7310
  all_result = True
7311
  for device in instance.disks:
7312
    if target_node:
7313
      edata = [(target_node, device)]
7314
    else:
7315
      edata = device.ComputeNodeTree(instance.primary_node)
7316
    for node, disk in edata:
7317
      lu.cfg.SetDiskID(disk, node)
7318
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7319
      if msg:
7320
        lu.LogWarning("Could not remove block device %s on node %s,"
7321
                      " continuing anyway: %s", device.iv_name, node, msg)
7322
        all_result = False
7323

    
7324
  if instance.disk_template == constants.DT_FILE:
7325
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7326
    if target_node:
7327
      tgt = target_node
7328
    else:
7329
      tgt = instance.primary_node
7330
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7331
    if result.fail_msg:
7332
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7333
                    file_storage_dir, instance.primary_node, result.fail_msg)
7334
      all_result = False
7335

    
7336
  return all_result
7337

    
7338

    
7339
def _ComputeDiskSizePerVG(disk_template, disks):
7340
  """Compute disk size requirements in the volume group
7341

7342
  """
7343
  def _compute(disks, payload):
7344
    """Universal algorithm.
7345

7346
    """
7347
    vgs = {}
7348
    for disk in disks:
7349
      vgs[disk[constants.IDISK_VG]] = \
7350
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7351

    
7352
    return vgs
7353

    
7354
  # Required free disk space as a function of disk and swap space
7355
  req_size_dict = {
7356
    constants.DT_DISKLESS: {},
7357
    constants.DT_PLAIN: _compute(disks, 0),
7358
    # 128 MB are added for drbd metadata for each disk
7359
    constants.DT_DRBD8: _compute(disks, 128),
7360
    constants.DT_FILE: {},
7361
    constants.DT_SHARED_FILE: {},
7362
  }
7363

    
7364
  if disk_template not in req_size_dict:
7365
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7366
                                 " is unknown" %  disk_template)
7367

    
7368
  return req_size_dict[disk_template]
7369

    
7370

    
7371
def _ComputeDiskSize(disk_template, disks):
7372
  """Compute disk size requirements in the volume group
7373

7374
  """
7375
  # Required free disk space as a function of disk and swap space
7376
  req_size_dict = {
7377
    constants.DT_DISKLESS: None,
7378
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7379
    # 128 MB are added for drbd metadata for each disk
7380
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7381
    constants.DT_FILE: None,
7382
    constants.DT_SHARED_FILE: 0,
7383
    constants.DT_BLOCK: 0,
7384
  }
7385

    
7386
  if disk_template not in req_size_dict:
7387
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7388
                                 " is unknown" %  disk_template)
7389

    
7390
  return req_size_dict[disk_template]
7391

    
7392

    
7393
def _FilterVmNodes(lu, nodenames):
7394
  """Filters out non-vm_capable nodes from a list.
7395

7396
  @type lu: L{LogicalUnit}
7397
  @param lu: the logical unit for which we check
7398
  @type nodenames: list
7399
  @param nodenames: the list of nodes on which we should check
7400
  @rtype: list
7401
  @return: the list of vm-capable nodes
7402

7403
  """
7404
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7405
  return [name for name in nodenames if name not in vm_nodes]
7406

    
7407

    
7408
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7409
  """Hypervisor parameter validation.
7410

7411
  This function abstract the hypervisor parameter validation to be
7412
  used in both instance create and instance modify.
7413

7414
  @type lu: L{LogicalUnit}
7415
  @param lu: the logical unit for which we check
7416
  @type nodenames: list
7417
  @param nodenames: the list of nodes on which we should check
7418
  @type hvname: string
7419
  @param hvname: the name of the hypervisor we should use
7420
  @type hvparams: dict
7421
  @param hvparams: the parameters which we need to check
7422
  @raise errors.OpPrereqError: if the parameters are not valid
7423

7424
  """
7425
  nodenames = _FilterVmNodes(lu, nodenames)
7426
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7427
                                                  hvname,
7428
                                                  hvparams)
7429
  for node in nodenames:
7430
    info = hvinfo[node]
7431
    if info.offline:
7432
      continue
7433
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7434

    
7435

    
7436
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7437
  """OS parameters validation.
7438

7439
  @type lu: L{LogicalUnit}
7440
  @param lu: the logical unit for which we check
7441
  @type required: boolean
7442
  @param required: whether the validation should fail if the OS is not
7443
      found
7444
  @type nodenames: list
7445
  @param nodenames: the list of nodes on which we should check
7446
  @type osname: string
7447
  @param osname: the name of the hypervisor we should use
7448
  @type osparams: dict
7449
  @param osparams: the parameters which we need to check
7450
  @raise errors.OpPrereqError: if the parameters are not valid
7451

7452
  """
7453
  nodenames = _FilterVmNodes(lu, nodenames)
7454
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7455
                                   [constants.OS_VALIDATE_PARAMETERS],
7456
                                   osparams)
7457
  for node, nres in result.items():
7458
    # we don't check for offline cases since this should be run only
7459
    # against the master node and/or an instance's nodes
7460
    nres.Raise("OS Parameters validation failed on node %s" % node)
7461
    if not nres.payload:
7462
      lu.LogInfo("OS %s not found on node %s, validation skipped",
7463
                 osname, node)
7464

    
7465

    
7466
class LUInstanceCreate(LogicalUnit):
7467
  """Create an instance.
7468

7469
  """
7470
  HPATH = "instance-add"
7471
  HTYPE = constants.HTYPE_INSTANCE
7472
  REQ_BGL = False
7473

    
7474
  def CheckArguments(self):
7475
    """Check arguments.
7476

7477
    """
7478
    # do not require name_check to ease forward/backward compatibility
7479
    # for tools
7480
    if self.op.no_install and self.op.start:
7481
      self.LogInfo("No-installation mode selected, disabling startup")
7482
      self.op.start = False
7483
    # validate/normalize the instance name
7484
    self.op.instance_name = \
7485
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7486

    
7487
    if self.op.ip_check and not self.op.name_check:
7488
      # TODO: make the ip check more flexible and not depend on the name check
7489
      raise errors.OpPrereqError("Cannot do IP address check without a name"
7490
                                 " check", errors.ECODE_INVAL)
7491

    
7492
    # check nics' parameter names
7493
    for nic in self.op.nics:
7494
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7495

    
7496
    # check disks. parameter names and consistent adopt/no-adopt strategy
7497
    has_adopt = has_no_adopt = False
7498
    for disk in self.op.disks:
7499
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7500
      if constants.IDISK_ADOPT in disk:
7501
        has_adopt = True
7502
      else:
7503
        has_no_adopt = True
7504
    if has_adopt and has_no_adopt:
7505
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7506
                                 errors.ECODE_INVAL)
7507
    if has_adopt:
7508
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7509
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7510
                                   " '%s' disk template" %
7511
                                   self.op.disk_template,
7512
                                   errors.ECODE_INVAL)
7513
      if self.op.iallocator is not None:
7514
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7515
                                   " iallocator script", errors.ECODE_INVAL)
7516
      if self.op.mode == constants.INSTANCE_IMPORT:
7517
        raise errors.OpPrereqError("Disk adoption not allowed for"
7518
                                   " instance import", errors.ECODE_INVAL)
7519
    else:
7520
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
7521
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7522
                                   " but no 'adopt' parameter given" %
7523
                                   self.op.disk_template,
7524
                                   errors.ECODE_INVAL)
7525

    
7526
    self.adopt_disks = has_adopt
7527

    
7528
    # instance name verification
7529
    if self.op.name_check:
7530
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7531
      self.op.instance_name = self.hostname1.name
7532
      # used in CheckPrereq for ip ping check
7533
      self.check_ip = self.hostname1.ip
7534
    else:
7535
      self.check_ip = None
7536

    
7537
    # file storage checks
7538
    if (self.op.file_driver and
7539
        not self.op.file_driver in constants.FILE_DRIVER):
7540
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7541
                                 self.op.file_driver, errors.ECODE_INVAL)
7542

    
7543
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7544
      raise errors.OpPrereqError("File storage directory path not absolute",
7545
                                 errors.ECODE_INVAL)
7546

    
7547
    ### Node/iallocator related checks
7548
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7549

    
7550
    if self.op.pnode is not None:
7551
      if self.op.disk_template in constants.DTS_INT_MIRROR:
7552
        if self.op.snode is None:
7553
          raise errors.OpPrereqError("The networked disk templates need"
7554
                                     " a mirror node", errors.ECODE_INVAL)
7555
      elif self.op.snode:
7556
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7557
                        " template")
7558
        self.op.snode = None
7559

    
7560
    self._cds = _GetClusterDomainSecret()
7561

    
7562
    if self.op.mode == constants.INSTANCE_IMPORT:
7563
      # On import force_variant must be True, because if we forced it at
7564
      # initial install, our only chance when importing it back is that it
7565
      # works again!
7566
      self.op.force_variant = True
7567

    
7568
      if self.op.no_install:
7569
        self.LogInfo("No-installation mode has no effect during import")
7570

    
7571
    elif self.op.mode == constants.INSTANCE_CREATE:
7572
      if self.op.os_type is None:
7573
        raise errors.OpPrereqError("No guest OS specified",
7574
                                   errors.ECODE_INVAL)
7575
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7576
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7577
                                   " installation" % self.op.os_type,
7578
                                   errors.ECODE_STATE)
7579
      if self.op.disk_template is None:
7580
        raise errors.OpPrereqError("No disk template specified",
7581
                                   errors.ECODE_INVAL)
7582

    
7583
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7584
      # Check handshake to ensure both clusters have the same domain secret
7585
      src_handshake = self.op.source_handshake
7586
      if not src_handshake:
7587
        raise errors.OpPrereqError("Missing source handshake",
7588
                                   errors.ECODE_INVAL)
7589

    
7590
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7591
                                                           src_handshake)
7592
      if errmsg:
7593
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7594
                                   errors.ECODE_INVAL)
7595

    
7596
      # Load and check source CA
7597
      self.source_x509_ca_pem = self.op.source_x509_ca
7598
      if not self.source_x509_ca_pem:
7599
        raise errors.OpPrereqError("Missing source X509 CA",
7600
                                   errors.ECODE_INVAL)
7601

    
7602
      try:
7603
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7604
                                                    self._cds)
7605
      except OpenSSL.crypto.Error, err:
7606
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7607
                                   (err, ), errors.ECODE_INVAL)
7608

    
7609
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7610
      if errcode is not None:
7611
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7612
                                   errors.ECODE_INVAL)
7613

    
7614
      self.source_x509_ca = cert
7615

    
7616
      src_instance_name = self.op.source_instance_name
7617
      if not src_instance_name:
7618
        raise errors.OpPrereqError("Missing source instance name",
7619
                                   errors.ECODE_INVAL)
7620

    
7621
      self.source_instance_name = \
7622
          netutils.GetHostname(name=src_instance_name).name
7623

    
7624
    else:
7625
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
7626
                                 self.op.mode, errors.ECODE_INVAL)
7627

    
7628
  def ExpandNames(self):
7629
    """ExpandNames for CreateInstance.
7630

7631
    Figure out the right locks for instance creation.
7632

7633
    """
7634
    self.needed_locks = {}
7635

    
7636
    instance_name = self.op.instance_name
7637
    # this is just a preventive check, but someone might still add this
7638
    # instance in the meantime, and creation will fail at lock-add time
7639
    if instance_name in self.cfg.GetInstanceList():
7640
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7641
                                 instance_name, errors.ECODE_EXISTS)
7642

    
7643
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7644

    
7645
    if self.op.iallocator:
7646
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7647
    else:
7648
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7649
      nodelist = [self.op.pnode]
7650
      if self.op.snode is not None:
7651
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7652
        nodelist.append(self.op.snode)
7653
      self.needed_locks[locking.LEVEL_NODE] = nodelist
7654

    
7655
    # in case of import lock the source node too
7656
    if self.op.mode == constants.INSTANCE_IMPORT:
7657
      src_node = self.op.src_node
7658
      src_path = self.op.src_path
7659

    
7660
      if src_path is None:
7661
        self.op.src_path = src_path = self.op.instance_name
7662

    
7663
      if src_node is None:
7664
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7665
        self.op.src_node = None
7666
        if os.path.isabs(src_path):
7667
          raise errors.OpPrereqError("Importing an instance from an absolute"
7668
                                     " path requires a source node option",
7669
                                     errors.ECODE_INVAL)
7670
      else:
7671
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7672
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7673
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
7674
        if not os.path.isabs(src_path):
7675
          self.op.src_path = src_path = \
7676
            utils.PathJoin(constants.EXPORT_DIR, src_path)
7677

    
7678
  def _RunAllocator(self):
7679
    """Run the allocator based on input opcode.
7680

7681
    """
7682
    nics = [n.ToDict() for n in self.nics]
7683
    ial = IAllocator(self.cfg, self.rpc,
7684
                     mode=constants.IALLOCATOR_MODE_ALLOC,
7685
                     name=self.op.instance_name,
7686
                     disk_template=self.op.disk_template,
7687
                     tags=[],
7688
                     os=self.op.os_type,
7689
                     vcpus=self.be_full[constants.BE_VCPUS],
7690
                     mem_size=self.be_full[constants.BE_MEMORY],
7691
                     disks=self.disks,
7692
                     nics=nics,
7693
                     hypervisor=self.op.hypervisor,
7694
                     )
7695

    
7696
    ial.Run(self.op.iallocator)
7697

    
7698
    if not ial.success:
7699
      raise errors.OpPrereqError("Can't compute nodes using"
7700
                                 " iallocator '%s': %s" %
7701
                                 (self.op.iallocator, ial.info),
7702
                                 errors.ECODE_NORES)
7703
    if len(ial.result) != ial.required_nodes:
7704
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7705
                                 " of nodes (%s), required %s" %
7706
                                 (self.op.iallocator, len(ial.result),
7707
                                  ial.required_nodes), errors.ECODE_FAULT)
7708
    self.op.pnode = ial.result[0]
7709
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7710
                 self.op.instance_name, self.op.iallocator,
7711
                 utils.CommaJoin(ial.result))
7712
    if ial.required_nodes == 2:
7713
      self.op.snode = ial.result[1]
7714

    
7715
  def BuildHooksEnv(self):
7716
    """Build hooks env.
7717

7718
    This runs on master, primary and secondary nodes of the instance.
7719

7720
    """
7721
    env = {
7722
      "ADD_MODE": self.op.mode,
7723
      }
7724
    if self.op.mode == constants.INSTANCE_IMPORT:
7725
      env["SRC_NODE"] = self.op.src_node
7726
      env["SRC_PATH"] = self.op.src_path
7727
      env["SRC_IMAGES"] = self.src_images
7728

    
7729
    env.update(_BuildInstanceHookEnv(
7730
      name=self.op.instance_name,
7731
      primary_node=self.op.pnode,
7732
      secondary_nodes=self.secondaries,
7733
      status=self.op.start,
7734
      os_type=self.op.os_type,
7735
      memory=self.be_full[constants.BE_MEMORY],
7736
      vcpus=self.be_full[constants.BE_VCPUS],
7737
      nics=_NICListToTuple(self, self.nics),
7738
      disk_template=self.op.disk_template,
7739
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
7740
             for d in self.disks],
7741
      bep=self.be_full,
7742
      hvp=self.hv_full,
7743
      hypervisor_name=self.op.hypervisor,
7744
    ))
7745

    
7746
    return env
7747

    
7748
  def BuildHooksNodes(self):
7749
    """Build hooks nodes.
7750

7751
    """
7752
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7753
    return nl, nl
7754

    
7755
  def _ReadExportInfo(self):
7756
    """Reads the export information from disk.
7757

7758
    It will override the opcode source node and path with the actual
7759
    information, if these two were not specified before.
7760

7761
    @return: the export information
7762

7763
    """
7764
    assert self.op.mode == constants.INSTANCE_IMPORT
7765

    
7766
    src_node = self.op.src_node
7767
    src_path = self.op.src_path
7768

    
7769
    if src_node is None:
7770
      locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
7771
      exp_list = self.rpc.call_export_list(locked_nodes)
7772
      found = False
7773
      for node in exp_list:
7774
        if exp_list[node].fail_msg:
7775
          continue
7776
        if src_path in exp_list[node].payload:
7777
          found = True
7778
          self.op.src_node = src_node = node
7779
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7780
                                                       src_path)
7781
          break
7782
      if not found:
7783
        raise errors.OpPrereqError("No export found for relative path %s" %
7784
                                    src_path, errors.ECODE_INVAL)
7785

    
7786
    _CheckNodeOnline(self, src_node)
7787
    result = self.rpc.call_export_info(src_node, src_path)
7788
    result.Raise("No export or invalid export found in dir %s" % src_path)
7789

    
7790
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7791
    if not export_info.has_section(constants.INISECT_EXP):
7792
      raise errors.ProgrammerError("Corrupted export config",
7793
                                   errors.ECODE_ENVIRON)
7794

    
7795
    ei_version = export_info.get(constants.INISECT_EXP, "version")
7796
    if (int(ei_version) != constants.EXPORT_VERSION):
7797
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7798
                                 (ei_version, constants.EXPORT_VERSION),
7799
                                 errors.ECODE_ENVIRON)
7800
    return export_info
7801

    
7802
  def _ReadExportParams(self, einfo):
7803
    """Use export parameters as defaults.
7804

7805
    In case the opcode doesn't specify (as in override) some instance
7806
    parameters, then try to use them from the export information, if
7807
    that declares them.
7808

7809
    """
7810
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7811

    
7812
    if self.op.disk_template is None:
7813
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
7814
        self.op.disk_template = einfo.get(constants.INISECT_INS,
7815
                                          "disk_template")
7816
      else:
7817
        raise errors.OpPrereqError("No disk template specified and the export"
7818
                                   " is missing the disk_template information",
7819
                                   errors.ECODE_INVAL)
7820

    
7821
    if not self.op.disks:
7822
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7823
        disks = []
7824
        # TODO: import the disk iv_name too
7825
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7826
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7827
          disks.append({constants.IDISK_SIZE: disk_sz})
7828
        self.op.disks = disks
7829
      else:
7830
        raise errors.OpPrereqError("No disk info specified and the export"
7831
                                   " is missing the disk information",
7832
                                   errors.ECODE_INVAL)
7833

    
7834
    if (not self.op.nics and
7835
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7836
      nics = []
7837
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7838
        ndict = {}
7839
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7840
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7841
          ndict[name] = v
7842
        nics.append(ndict)
7843
      self.op.nics = nics
7844

    
7845
    if (self.op.hypervisor is None and
7846
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7847
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7848
    if einfo.has_section(constants.INISECT_HYP):
7849
      # use the export parameters but do not override the ones
7850
      # specified by the user
7851
      for name, value in einfo.items(constants.INISECT_HYP):
7852
        if name not in self.op.hvparams:
7853
          self.op.hvparams[name] = value
7854

    
7855
    if einfo.has_section(constants.INISECT_BEP):
7856
      # use the parameters, without overriding
7857
      for name, value in einfo.items(constants.INISECT_BEP):
7858
        if name not in self.op.beparams:
7859
          self.op.beparams[name] = value
7860
    else:
7861
      # try to read the parameters old style, from the main section
7862
      for name in constants.BES_PARAMETERS:
7863
        if (name not in self.op.beparams and
7864
            einfo.has_option(constants.INISECT_INS, name)):
7865
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7866

    
7867
    if einfo.has_section(constants.INISECT_OSP):
7868
      # use the parameters, without overriding
7869
      for name, value in einfo.items(constants.INISECT_OSP):
7870
        if name not in self.op.osparams:
7871
          self.op.osparams[name] = value
7872

    
7873
  def _RevertToDefaults(self, cluster):
7874
    """Revert the instance parameters to the default values.
7875

7876
    """
7877
    # hvparams
7878
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7879
    for name in self.op.hvparams.keys():
7880
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7881
        del self.op.hvparams[name]
7882
    # beparams
7883
    be_defs = cluster.SimpleFillBE({})
7884
    for name in self.op.beparams.keys():
7885
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7886
        del self.op.beparams[name]
7887
    # nic params
7888
    nic_defs = cluster.SimpleFillNIC({})
7889
    for nic in self.op.nics:
7890
      for name in constants.NICS_PARAMETERS:
7891
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7892
          del nic[name]
7893
    # osparams
7894
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7895
    for name in self.op.osparams.keys():
7896
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7897
        del self.op.osparams[name]
7898

    
7899
  def CheckPrereq(self):
7900
    """Check prerequisites.
7901

7902
    """
7903
    if self.op.mode == constants.INSTANCE_IMPORT:
7904
      export_info = self._ReadExportInfo()
7905
      self._ReadExportParams(export_info)
7906

    
7907
    if (not self.cfg.GetVGName() and
7908
        self.op.disk_template not in constants.DTS_NOT_LVM):
7909
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7910
                                 " instances", errors.ECODE_STATE)
7911

    
7912
    if self.op.hypervisor is None:
7913
      self.op.hypervisor = self.cfg.GetHypervisorType()
7914

    
7915
    cluster = self.cfg.GetClusterInfo()
7916
    enabled_hvs = cluster.enabled_hypervisors
7917
    if self.op.hypervisor not in enabled_hvs:
7918
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7919
                                 " cluster (%s)" % (self.op.hypervisor,
7920
                                  ",".join(enabled_hvs)),
7921
                                 errors.ECODE_STATE)
7922

    
7923
    # check hypervisor parameter syntax (locally)
7924
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7925
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7926
                                      self.op.hvparams)
7927
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7928
    hv_type.CheckParameterSyntax(filled_hvp)
7929
    self.hv_full = filled_hvp
7930
    # check that we don't specify global parameters on an instance
7931
    _CheckGlobalHvParams(self.op.hvparams)
7932

    
7933
    # fill and remember the beparams dict
7934
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7935
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7936

    
7937
    # build os parameters
7938
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7939

    
7940
    # now that hvp/bep are in final format, let's reset to defaults,
7941
    # if told to do so
7942
    if self.op.identify_defaults:
7943
      self._RevertToDefaults(cluster)
7944

    
7945
    # NIC buildup
7946
    self.nics = []
7947
    for idx, nic in enumerate(self.op.nics):
7948
      nic_mode_req = nic.get(constants.INIC_MODE, None)
7949
      nic_mode = nic_mode_req
7950
      if nic_mode is None:
7951
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7952

    
7953
      # in routed mode, for the first nic, the default ip is 'auto'
7954
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7955
        default_ip_mode = constants.VALUE_AUTO
7956
      else:
7957
        default_ip_mode = constants.VALUE_NONE
7958

    
7959
      # ip validity checks
7960
      ip = nic.get(constants.INIC_IP, default_ip_mode)
7961
      if ip is None or ip.lower() == constants.VALUE_NONE:
7962
        nic_ip = None
7963
      elif ip.lower() == constants.VALUE_AUTO:
7964
        if not self.op.name_check:
7965
          raise errors.OpPrereqError("IP address set to auto but name checks"
7966
                                     " have been skipped",
7967
                                     errors.ECODE_INVAL)
7968
        nic_ip = self.hostname1.ip
7969
      else:
7970
        if not netutils.IPAddress.IsValid(ip):
7971
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7972
                                     errors.ECODE_INVAL)
7973
        nic_ip = ip
7974

    
7975
      # TODO: check the ip address for uniqueness
7976
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7977
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7978
                                   errors.ECODE_INVAL)
7979

    
7980
      # MAC address verification
7981
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
7982
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7983
        mac = utils.NormalizeAndValidateMac(mac)
7984

    
7985
        try:
7986
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7987
        except errors.ReservationError:
7988
          raise errors.OpPrereqError("MAC address %s already in use"
7989
                                     " in cluster" % mac,
7990
                                     errors.ECODE_NOTUNIQUE)
7991

    
7992
      #  Build nic parameters
7993
      link = nic.get(constants.INIC_LINK, None)
7994
      nicparams = {}
7995
      if nic_mode_req:
7996
        nicparams[constants.NIC_MODE] = nic_mode_req
7997
      if link:
7998
        nicparams[constants.NIC_LINK] = link
7999

    
8000
      check_params = cluster.SimpleFillNIC(nicparams)
8001
      objects.NIC.CheckParameterSyntax(check_params)
8002
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8003

    
8004
    # disk checks/pre-build
8005
    default_vg = self.cfg.GetVGName()
8006
    self.disks = []
8007
    for disk in self.op.disks:
8008
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8009
      if mode not in constants.DISK_ACCESS_SET:
8010
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8011
                                   mode, errors.ECODE_INVAL)
8012
      size = disk.get(constants.IDISK_SIZE, None)
8013
      if size is None:
8014
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8015
      try:
8016
        size = int(size)
8017
      except (TypeError, ValueError):
8018
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8019
                                   errors.ECODE_INVAL)
8020

    
8021
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8022
      new_disk = {
8023
        constants.IDISK_SIZE: size,
8024
        constants.IDISK_MODE: mode,
8025
        constants.IDISK_VG: data_vg,
8026
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8027
        }
8028
      if constants.IDISK_ADOPT in disk:
8029
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8030
      self.disks.append(new_disk)
8031

    
8032
    if self.op.mode == constants.INSTANCE_IMPORT:
8033

    
8034
      # Check that the new instance doesn't have less disks than the export
8035
      instance_disks = len(self.disks)
8036
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8037
      if instance_disks < export_disks:
8038
        raise errors.OpPrereqError("Not enough disks to import."
8039
                                   " (instance: %d, export: %d)" %
8040
                                   (instance_disks, export_disks),
8041
                                   errors.ECODE_INVAL)
8042

    
8043
      disk_images = []
8044
      for idx in range(export_disks):
8045
        option = 'disk%d_dump' % idx
8046
        if export_info.has_option(constants.INISECT_INS, option):
8047
          # FIXME: are the old os-es, disk sizes, etc. useful?
8048
          export_name = export_info.get(constants.INISECT_INS, option)
8049
          image = utils.PathJoin(self.op.src_path, export_name)
8050
          disk_images.append(image)
8051
        else:
8052
          disk_images.append(False)
8053

    
8054
      self.src_images = disk_images
8055

    
8056
      old_name = export_info.get(constants.INISECT_INS, 'name')
8057
      try:
8058
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8059
      except (TypeError, ValueError), err:
8060
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8061
                                   " an integer: %s" % str(err),
8062
                                   errors.ECODE_STATE)
8063
      if self.op.instance_name == old_name:
8064
        for idx, nic in enumerate(self.nics):
8065
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8066
            nic_mac_ini = 'nic%d_mac' % idx
8067
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8068

    
8069
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8070

    
8071
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8072
    if self.op.ip_check:
8073
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8074
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8075
                                   (self.check_ip, self.op.instance_name),
8076
                                   errors.ECODE_NOTUNIQUE)
8077

    
8078
    #### mac address generation
8079
    # By generating here the mac address both the allocator and the hooks get
8080
    # the real final mac address rather than the 'auto' or 'generate' value.
8081
    # There is a race condition between the generation and the instance object
8082
    # creation, which means that we know the mac is valid now, but we're not
8083
    # sure it will be when we actually add the instance. If things go bad
8084
    # adding the instance will abort because of a duplicate mac, and the
8085
    # creation job will fail.
8086
    for nic in self.nics:
8087
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8088
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8089

    
8090
    #### allocator run
8091

    
8092
    if self.op.iallocator is not None:
8093
      self._RunAllocator()
8094

    
8095
    #### node related checks
8096

    
8097
    # check primary node
8098
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8099
    assert self.pnode is not None, \
8100
      "Cannot retrieve locked node %s" % self.op.pnode
8101
    if pnode.offline:
8102
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8103
                                 pnode.name, errors.ECODE_STATE)
8104
    if pnode.drained:
8105
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8106
                                 pnode.name, errors.ECODE_STATE)
8107
    if not pnode.vm_capable:
8108
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8109
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8110

    
8111
    self.secondaries = []
8112

    
8113
    # mirror node verification
8114
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8115
      if self.op.snode == pnode.name:
8116
        raise errors.OpPrereqError("The secondary node cannot be the"
8117
                                   " primary node", errors.ECODE_INVAL)
8118
      _CheckNodeOnline(self, self.op.snode)
8119
      _CheckNodeNotDrained(self, self.op.snode)
8120
      _CheckNodeVmCapable(self, self.op.snode)
8121
      self.secondaries.append(self.op.snode)
8122

    
8123
    nodenames = [pnode.name] + self.secondaries
8124

    
8125
    if not self.adopt_disks:
8126
      # Check lv size requirements, if not adopting
8127
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8128
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8129

    
8130
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8131
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8132
                                disk[constants.IDISK_ADOPT])
8133
                     for disk in self.disks])
8134
      if len(all_lvs) != len(self.disks):
8135
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8136
                                   errors.ECODE_INVAL)
8137
      for lv_name in all_lvs:
8138
        try:
8139
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8140
          # to ReserveLV uses the same syntax
8141
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8142
        except errors.ReservationError:
8143
          raise errors.OpPrereqError("LV named %s used by another instance" %
8144
                                     lv_name, errors.ECODE_NOTUNIQUE)
8145

    
8146
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8147
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8148

    
8149
      node_lvs = self.rpc.call_lv_list([pnode.name],
8150
                                       vg_names.payload.keys())[pnode.name]
8151
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8152
      node_lvs = node_lvs.payload
8153

    
8154
      delta = all_lvs.difference(node_lvs.keys())
8155
      if delta:
8156
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8157
                                   utils.CommaJoin(delta),
8158
                                   errors.ECODE_INVAL)
8159
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8160
      if online_lvs:
8161
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8162
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8163
                                   errors.ECODE_STATE)
8164
      # update the size of disk based on what is found
8165
      for dsk in self.disks:
8166
        dsk[constants.IDISK_SIZE] = \
8167
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8168
                                        dsk[constants.IDISK_ADOPT])][0]))
8169

    
8170
    elif self.op.disk_template == constants.DT_BLOCK:
8171
      # Normalize and de-duplicate device paths
8172
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8173
                       for disk in self.disks])
8174
      if len(all_disks) != len(self.disks):
8175
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8176
                                   errors.ECODE_INVAL)
8177
      baddisks = [d for d in all_disks
8178
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8179
      if baddisks:
8180
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8181
                                   " cannot be adopted" %
8182
                                   (", ".join(baddisks),
8183
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8184
                                   errors.ECODE_INVAL)
8185

    
8186
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8187
                                            list(all_disks))[pnode.name]
8188
      node_disks.Raise("Cannot get block device information from node %s" %
8189
                       pnode.name)
8190
      node_disks = node_disks.payload
8191
      delta = all_disks.difference(node_disks.keys())
8192
      if delta:
8193
        raise errors.OpPrereqError("Missing block device(s): %s" %
8194
                                   utils.CommaJoin(delta),
8195
                                   errors.ECODE_INVAL)
8196
      for dsk in self.disks:
8197
        dsk[constants.IDISK_SIZE] = \
8198
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8199

    
8200
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8201

    
8202
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8203
    # check OS parameters (remotely)
8204
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8205

    
8206
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8207

    
8208
    # memory check on primary node
8209
    if self.op.start:
8210
      _CheckNodeFreeMemory(self, self.pnode.name,
8211
                           "creating instance %s" % self.op.instance_name,
8212
                           self.be_full[constants.BE_MEMORY],
8213
                           self.op.hypervisor)
8214

    
8215
    self.dry_run_result = list(nodenames)
8216

    
8217
  def Exec(self, feedback_fn):
8218
    """Create and add the instance to the cluster.
8219

8220
    """
8221
    instance = self.op.instance_name
8222
    pnode_name = self.pnode.name
8223

    
8224
    ht_kind = self.op.hypervisor
8225
    if ht_kind in constants.HTS_REQ_PORT:
8226
      network_port = self.cfg.AllocatePort()
8227
    else:
8228
      network_port = None
8229

    
8230
    if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8231
      # this is needed because os.path.join does not accept None arguments
8232
      if self.op.file_storage_dir is None:
8233
        string_file_storage_dir = ""
8234
      else:
8235
        string_file_storage_dir = self.op.file_storage_dir
8236

    
8237
      # build the full file storage dir path
8238
      if self.op.disk_template == constants.DT_SHARED_FILE:
8239
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8240
      else:
8241
        get_fsd_fn = self.cfg.GetFileStorageDir
8242

    
8243
      file_storage_dir = utils.PathJoin(get_fsd_fn(),
8244
                                        string_file_storage_dir, instance)
8245
    else:
8246
      file_storage_dir = ""
8247

    
8248
    disks = _GenerateDiskTemplate(self,
8249
                                  self.op.disk_template,
8250
                                  instance, pnode_name,
8251
                                  self.secondaries,
8252
                                  self.disks,
8253
                                  file_storage_dir,
8254
                                  self.op.file_driver,
8255
                                  0,
8256
                                  feedback_fn)
8257

    
8258
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8259
                            primary_node=pnode_name,
8260
                            nics=self.nics, disks=disks,
8261
                            disk_template=self.op.disk_template,
8262
                            admin_up=False,
8263
                            network_port=network_port,
8264
                            beparams=self.op.beparams,
8265
                            hvparams=self.op.hvparams,
8266
                            hypervisor=self.op.hypervisor,
8267
                            osparams=self.op.osparams,
8268
                            )
8269

    
8270
    if self.adopt_disks:
8271
      if self.op.disk_template == constants.DT_PLAIN:
8272
        # rename LVs to the newly-generated names; we need to construct
8273
        # 'fake' LV disks with the old data, plus the new unique_id
8274
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8275
        rename_to = []
8276
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8277
          rename_to.append(t_dsk.logical_id)
8278
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8279
          self.cfg.SetDiskID(t_dsk, pnode_name)
8280
        result = self.rpc.call_blockdev_rename(pnode_name,
8281
                                               zip(tmp_disks, rename_to))
8282
        result.Raise("Failed to rename adoped LVs")
8283
    else:
8284
      feedback_fn("* creating instance disks...")
8285
      try:
8286
        _CreateDisks(self, iobj)
8287
      except errors.OpExecError:
8288
        self.LogWarning("Device creation failed, reverting...")
8289
        try:
8290
          _RemoveDisks(self, iobj)
8291
        finally:
8292
          self.cfg.ReleaseDRBDMinors(instance)
8293
          raise
8294

    
8295
    feedback_fn("adding instance %s to cluster config" % instance)
8296

    
8297
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8298

    
8299
    # Declare that we don't want to remove the instance lock anymore, as we've
8300
    # added the instance to the config
8301
    del self.remove_locks[locking.LEVEL_INSTANCE]
8302

    
8303
    if self.op.mode == constants.INSTANCE_IMPORT:
8304
      # Release unused nodes
8305
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8306
    else:
8307
      # Release all nodes
8308
      _ReleaseLocks(self, locking.LEVEL_NODE)
8309

    
8310
    disk_abort = False
8311
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8312
      feedback_fn("* wiping instance disks...")
8313
      try:
8314
        _WipeDisks(self, iobj)
8315
      except errors.OpExecError, err:
8316
        logging.exception("Wiping disks failed")
8317
        self.LogWarning("Wiping instance disks failed (%s)", err)
8318
        disk_abort = True
8319

    
8320
    if disk_abort:
8321
      # Something is already wrong with the disks, don't do anything else
8322
      pass
8323
    elif self.op.wait_for_sync:
8324
      disk_abort = not _WaitForSync(self, iobj)
8325
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8326
      # make sure the disks are not degraded (still sync-ing is ok)
8327
      time.sleep(15)
8328
      feedback_fn("* checking mirrors status")
8329
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8330
    else:
8331
      disk_abort = False
8332

    
8333
    if disk_abort:
8334
      _RemoveDisks(self, iobj)
8335
      self.cfg.RemoveInstance(iobj.name)
8336
      # Make sure the instance lock gets removed
8337
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8338
      raise errors.OpExecError("There are some degraded disks for"
8339
                               " this instance")
8340

    
8341
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8342
      if self.op.mode == constants.INSTANCE_CREATE:
8343
        if not self.op.no_install:
8344
          feedback_fn("* running the instance OS create scripts...")
8345
          # FIXME: pass debug option from opcode to backend
8346
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8347
                                                 self.op.debug_level)
8348
          result.Raise("Could not add os for instance %s"
8349
                       " on node %s" % (instance, pnode_name))
8350

    
8351
      elif self.op.mode == constants.INSTANCE_IMPORT:
8352
        feedback_fn("* running the instance OS import scripts...")
8353

    
8354
        transfers = []
8355

    
8356
        for idx, image in enumerate(self.src_images):
8357
          if not image:
8358
            continue
8359

    
8360
          # FIXME: pass debug option from opcode to backend
8361
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8362
                                             constants.IEIO_FILE, (image, ),
8363
                                             constants.IEIO_SCRIPT,
8364
                                             (iobj.disks[idx], idx),
8365
                                             None)
8366
          transfers.append(dt)
8367

    
8368
        import_result = \
8369
          masterd.instance.TransferInstanceData(self, feedback_fn,
8370
                                                self.op.src_node, pnode_name,
8371
                                                self.pnode.secondary_ip,
8372
                                                iobj, transfers)
8373
        if not compat.all(import_result):
8374
          self.LogWarning("Some disks for instance %s on node %s were not"
8375
                          " imported successfully" % (instance, pnode_name))
8376

    
8377
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8378
        feedback_fn("* preparing remote import...")
8379
        # The source cluster will stop the instance before attempting to make a
8380
        # connection. In some cases stopping an instance can take a long time,
8381
        # hence the shutdown timeout is added to the connection timeout.
8382
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8383
                           self.op.source_shutdown_timeout)
8384
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8385

    
8386
        assert iobj.primary_node == self.pnode.name
8387
        disk_results = \
8388
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8389
                                        self.source_x509_ca,
8390
                                        self._cds, timeouts)
8391
        if not compat.all(disk_results):
8392
          # TODO: Should the instance still be started, even if some disks
8393
          # failed to import (valid for local imports, too)?
8394
          self.LogWarning("Some disks for instance %s on node %s were not"
8395
                          " imported successfully" % (instance, pnode_name))
8396

    
8397
        # Run rename script on newly imported instance
8398
        assert iobj.name == instance
8399
        feedback_fn("Running rename script for %s" % instance)
8400
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8401
                                                   self.source_instance_name,
8402
                                                   self.op.debug_level)
8403
        if result.fail_msg:
8404
          self.LogWarning("Failed to run rename script for %s on node"
8405
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
8406

    
8407
      else:
8408
        # also checked in the prereq part
8409
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8410
                                     % self.op.mode)
8411

    
8412
    if self.op.start:
8413
      iobj.admin_up = True
8414
      self.cfg.Update(iobj, feedback_fn)
8415
      logging.info("Starting instance %s on node %s", instance, pnode_name)
8416
      feedback_fn("* starting instance...")
8417
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8418
      result.Raise("Could not start instance")
8419

    
8420
    return list(iobj.all_nodes)
8421

    
8422

    
8423
class LUInstanceConsole(NoHooksLU):
8424
  """Connect to an instance's console.
8425

8426
  This is somewhat special in that it returns the command line that
8427
  you need to run on the master node in order to connect to the
8428
  console.
8429

8430
  """
8431
  REQ_BGL = False
8432

    
8433
  def ExpandNames(self):
8434
    self._ExpandAndLockInstance()
8435

    
8436
  def CheckPrereq(self):
8437
    """Check prerequisites.
8438

8439
    This checks that the instance is in the cluster.
8440

8441
    """
8442
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8443
    assert self.instance is not None, \
8444
      "Cannot retrieve locked instance %s" % self.op.instance_name
8445
    _CheckNodeOnline(self, self.instance.primary_node)
8446

    
8447
  def Exec(self, feedback_fn):
8448
    """Connect to the console of an instance
8449

8450
    """
8451
    instance = self.instance
8452
    node = instance.primary_node
8453

    
8454
    node_insts = self.rpc.call_instance_list([node],
8455
                                             [instance.hypervisor])[node]
8456
    node_insts.Raise("Can't get node information from %s" % node)
8457

    
8458
    if instance.name not in node_insts.payload:
8459
      if instance.admin_up:
8460
        state = constants.INSTST_ERRORDOWN
8461
      else:
8462
        state = constants.INSTST_ADMINDOWN
8463
      raise errors.OpExecError("Instance %s is not running (state %s)" %
8464
                               (instance.name, state))
8465

    
8466
    logging.debug("Connecting to console of %s on %s", instance.name, node)
8467

    
8468
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8469

    
8470

    
8471
def _GetInstanceConsole(cluster, instance):
8472
  """Returns console information for an instance.
8473

8474
  @type cluster: L{objects.Cluster}
8475
  @type instance: L{objects.Instance}
8476
  @rtype: dict
8477

8478
  """
8479
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
8480
  # beparams and hvparams are passed separately, to avoid editing the
8481
  # instance and then saving the defaults in the instance itself.
8482
  hvparams = cluster.FillHV(instance)
8483
  beparams = cluster.FillBE(instance)
8484
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8485

    
8486
  assert console.instance == instance.name
8487
  assert console.Validate()
8488

    
8489
  return console.ToDict()
8490

    
8491

    
8492
class LUInstanceReplaceDisks(LogicalUnit):
8493
  """Replace the disks of an instance.
8494

8495
  """
8496
  HPATH = "mirrors-replace"
8497
  HTYPE = constants.HTYPE_INSTANCE
8498
  REQ_BGL = False
8499

    
8500
  def CheckArguments(self):
8501
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8502
                                  self.op.iallocator)
8503

    
8504
  def ExpandNames(self):
8505
    self._ExpandAndLockInstance()
8506

    
8507
    assert locking.LEVEL_NODE not in self.needed_locks
8508
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
8509

    
8510
    assert self.op.iallocator is None or self.op.remote_node is None, \
8511
      "Conflicting options"
8512

    
8513
    if self.op.remote_node is not None:
8514
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8515

    
8516
      # Warning: do not remove the locking of the new secondary here
8517
      # unless DRBD8.AddChildren is changed to work in parallel;
8518
      # currently it doesn't since parallel invocations of
8519
      # FindUnusedMinor will conflict
8520
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
8521
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8522
    else:
8523
      self.needed_locks[locking.LEVEL_NODE] = []
8524
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8525

    
8526
      if self.op.iallocator is not None:
8527
        # iallocator will select a new node in the same group
8528
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
8529

    
8530
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8531
                                   self.op.iallocator, self.op.remote_node,
8532
                                   self.op.disks, False, self.op.early_release)
8533

    
8534
    self.tasklets = [self.replacer]
8535

    
8536
  def DeclareLocks(self, level):
8537
    if level == locking.LEVEL_NODEGROUP:
8538
      assert self.op.remote_node is None
8539
      assert self.op.iallocator is not None
8540
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
8541

    
8542
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
8543
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
8544
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8545

    
8546
    elif level == locking.LEVEL_NODE:
8547
      if self.op.iallocator is not None:
8548
        assert self.op.remote_node is None
8549
        assert not self.needed_locks[locking.LEVEL_NODE]
8550

    
8551
        # Lock member nodes of all locked groups
8552
        self.needed_locks[locking.LEVEL_NODE] = [node_name
8553
          for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
8554
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
8555
      else:
8556
        self._LockInstancesNodes()
8557

    
8558
  def BuildHooksEnv(self):
8559
    """Build hooks env.
8560

8561
    This runs on the master, the primary and all the secondaries.
8562

8563
    """
8564
    instance = self.replacer.instance
8565
    env = {
8566
      "MODE": self.op.mode,
8567
      "NEW_SECONDARY": self.op.remote_node,
8568
      "OLD_SECONDARY": instance.secondary_nodes[0],
8569
      }
8570
    env.update(_BuildInstanceHookEnvByObject(self, instance))
8571
    return env
8572

    
8573
  def BuildHooksNodes(self):
8574
    """Build hooks nodes.
8575

8576
    """
8577
    instance = self.replacer.instance
8578
    nl = [
8579
      self.cfg.GetMasterNode(),
8580
      instance.primary_node,
8581
      ]
8582
    if self.op.remote_node is not None:
8583
      nl.append(self.op.remote_node)
8584
    return nl, nl
8585

    
8586
  def CheckPrereq(self):
8587
    """Check prerequisites.
8588

8589
    """
8590
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
8591
            self.op.iallocator is None)
8592

    
8593
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
8594
    if owned_groups:
8595
      groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8596
      if owned_groups != groups:
8597
        raise errors.OpExecError("Node groups used by instance '%s' changed"
8598
                                 " since lock was acquired, current list is %r,"
8599
                                 " used to be '%s'" %
8600
                                 (self.op.instance_name,
8601
                                  utils.CommaJoin(groups),
8602
                                  utils.CommaJoin(owned_groups)))
8603

    
8604
    return LogicalUnit.CheckPrereq(self)
8605

    
8606

    
8607
class TLReplaceDisks(Tasklet):
8608
  """Replaces disks for an instance.
8609

8610
  Note: Locking is not within the scope of this class.
8611

8612
  """
8613
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8614
               disks, delay_iallocator, early_release):
8615
    """Initializes this class.
8616

8617
    """
8618
    Tasklet.__init__(self, lu)
8619

    
8620
    # Parameters
8621
    self.instance_name = instance_name
8622
    self.mode = mode
8623
    self.iallocator_name = iallocator_name
8624
    self.remote_node = remote_node
8625
    self.disks = disks
8626
    self.delay_iallocator = delay_iallocator
8627
    self.early_release = early_release
8628

    
8629
    # Runtime data
8630
    self.instance = None
8631
    self.new_node = None
8632
    self.target_node = None
8633
    self.other_node = None
8634
    self.remote_node_info = None
8635
    self.node_secondary_ip = None
8636

    
8637
  @staticmethod
8638
  def CheckArguments(mode, remote_node, iallocator):
8639
    """Helper function for users of this class.
8640

8641
    """
8642
    # check for valid parameter combination
8643
    if mode == constants.REPLACE_DISK_CHG:
8644
      if remote_node is None and iallocator is None:
8645
        raise errors.OpPrereqError("When changing the secondary either an"
8646
                                   " iallocator script must be used or the"
8647
                                   " new node given", errors.ECODE_INVAL)
8648

    
8649
      if remote_node is not None and iallocator is not None:
8650
        raise errors.OpPrereqError("Give either the iallocator or the new"
8651
                                   " secondary, not both", errors.ECODE_INVAL)
8652

    
8653
    elif remote_node is not None or iallocator is not None:
8654
      # Not replacing the secondary
8655
      raise errors.OpPrereqError("The iallocator and new node options can"
8656
                                 " only be used when changing the"
8657
                                 " secondary node", errors.ECODE_INVAL)
8658

    
8659
  @staticmethod
8660
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8661
    """Compute a new secondary node using an IAllocator.
8662

8663
    """
8664
    ial = IAllocator(lu.cfg, lu.rpc,
8665
                     mode=constants.IALLOCATOR_MODE_RELOC,
8666
                     name=instance_name,
8667
                     relocate_from=relocate_from)
8668

    
8669
    ial.Run(iallocator_name)
8670

    
8671
    if not ial.success:
8672
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8673
                                 " %s" % (iallocator_name, ial.info),
8674
                                 errors.ECODE_NORES)
8675

    
8676
    if len(ial.result) != ial.required_nodes:
8677
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8678
                                 " of nodes (%s), required %s" %
8679
                                 (iallocator_name,
8680
                                  len(ial.result), ial.required_nodes),
8681
                                 errors.ECODE_FAULT)
8682

    
8683
    remote_node_name = ial.result[0]
8684

    
8685
    lu.LogInfo("Selected new secondary for instance '%s': %s",
8686
               instance_name, remote_node_name)
8687

    
8688
    return remote_node_name
8689

    
8690
  def _FindFaultyDisks(self, node_name):
8691
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8692
                                    node_name, True)
8693

    
8694
  def _CheckDisksActivated(self, instance):
8695
    """Checks if the instance disks are activated.
8696

8697
    @param instance: The instance to check disks
8698
    @return: True if they are activated, False otherwise
8699

8700
    """
8701
    nodes = instance.all_nodes
8702

    
8703
    for idx, dev in enumerate(instance.disks):
8704
      for node in nodes:
8705
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8706
        self.cfg.SetDiskID(dev, node)
8707

    
8708
        result = self.rpc.call_blockdev_find(node, dev)
8709

    
8710
        if result.offline:
8711
          continue
8712
        elif result.fail_msg or not result.payload:
8713
          return False
8714

    
8715
    return True
8716

    
8717
  def CheckPrereq(self):
8718
    """Check prerequisites.
8719

8720
    This checks that the instance is in the cluster.
8721

8722
    """
8723
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8724
    assert instance is not None, \
8725
      "Cannot retrieve locked instance %s" % self.instance_name
8726

    
8727
    if instance.disk_template != constants.DT_DRBD8:
8728
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8729
                                 " instances", errors.ECODE_INVAL)
8730

    
8731
    if len(instance.secondary_nodes) != 1:
8732
      raise errors.OpPrereqError("The instance has a strange layout,"
8733
                                 " expected one secondary but found %d" %
8734
                                 len(instance.secondary_nodes),
8735
                                 errors.ECODE_FAULT)
8736

    
8737
    if not self.delay_iallocator:
8738
      self._CheckPrereq2()
8739

    
8740
  def _CheckPrereq2(self):
8741
    """Check prerequisites, second part.
8742

8743
    This function should always be part of CheckPrereq. It was separated and is
8744
    now called from Exec because during node evacuation iallocator was only
8745
    called with an unmodified cluster model, not taking planned changes into
8746
    account.
8747

8748
    """
8749
    instance = self.instance
8750
    secondary_node = instance.secondary_nodes[0]
8751

    
8752
    if self.iallocator_name is None:
8753
      remote_node = self.remote_node
8754
    else:
8755
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8756
                                       instance.name, instance.secondary_nodes)
8757

    
8758
    if remote_node is None:
8759
      self.remote_node_info = None
8760
    else:
8761
      assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
8762
             "Remote node '%s' is not locked" % remote_node
8763

    
8764
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8765
      assert self.remote_node_info is not None, \
8766
        "Cannot retrieve locked node %s" % remote_node
8767

    
8768
    if remote_node == self.instance.primary_node:
8769
      raise errors.OpPrereqError("The specified node is the primary node of"
8770
                                 " the instance", errors.ECODE_INVAL)
8771

    
8772
    if remote_node == secondary_node:
8773
      raise errors.OpPrereqError("The specified node is already the"
8774
                                 " secondary node of the instance",
8775
                                 errors.ECODE_INVAL)
8776

    
8777
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8778
                                    constants.REPLACE_DISK_CHG):
8779
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
8780
                                 errors.ECODE_INVAL)
8781

    
8782
    if self.mode == constants.REPLACE_DISK_AUTO:
8783
      if not self._CheckDisksActivated(instance):
8784
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
8785
                                   " first" % self.instance_name,
8786
                                   errors.ECODE_STATE)
8787
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
8788
      faulty_secondary = self._FindFaultyDisks(secondary_node)
8789

    
8790
      if faulty_primary and faulty_secondary:
8791
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8792
                                   " one node and can not be repaired"
8793
                                   " automatically" % self.instance_name,
8794
                                   errors.ECODE_STATE)
8795

    
8796
      if faulty_primary:
8797
        self.disks = faulty_primary
8798
        self.target_node = instance.primary_node
8799
        self.other_node = secondary_node
8800
        check_nodes = [self.target_node, self.other_node]
8801
      elif faulty_secondary:
8802
        self.disks = faulty_secondary
8803
        self.target_node = secondary_node
8804
        self.other_node = instance.primary_node
8805
        check_nodes = [self.target_node, self.other_node]
8806
      else:
8807
        self.disks = []
8808
        check_nodes = []
8809

    
8810
    else:
8811
      # Non-automatic modes
8812
      if self.mode == constants.REPLACE_DISK_PRI:
8813
        self.target_node = instance.primary_node
8814
        self.other_node = secondary_node
8815
        check_nodes = [self.target_node, self.other_node]
8816

    
8817
      elif self.mode == constants.REPLACE_DISK_SEC:
8818
        self.target_node = secondary_node
8819
        self.other_node = instance.primary_node
8820
        check_nodes = [self.target_node, self.other_node]
8821

    
8822
      elif self.mode == constants.REPLACE_DISK_CHG:
8823
        self.new_node = remote_node
8824
        self.other_node = instance.primary_node
8825
        self.target_node = secondary_node
8826
        check_nodes = [self.new_node, self.other_node]
8827

    
8828
        _CheckNodeNotDrained(self.lu, remote_node)
8829
        _CheckNodeVmCapable(self.lu, remote_node)
8830

    
8831
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
8832
        assert old_node_info is not None
8833
        if old_node_info.offline and not self.early_release:
8834
          # doesn't make sense to delay the release
8835
          self.early_release = True
8836
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8837
                          " early-release mode", secondary_node)
8838

    
8839
      else:
8840
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8841
                                     self.mode)
8842

    
8843
      # If not specified all disks should be replaced
8844
      if not self.disks:
8845
        self.disks = range(len(self.instance.disks))
8846

    
8847
    for node in check_nodes:
8848
      _CheckNodeOnline(self.lu, node)
8849

    
8850
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
8851
                                                          self.other_node,
8852
                                                          self.target_node]
8853
                              if node_name is not None)
8854

    
8855
    # Release unneeded node locks
8856
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
8857

    
8858
    # Release any owned node group
8859
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
8860
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
8861

    
8862
    # Check whether disks are valid
8863
    for disk_idx in self.disks:
8864
      instance.FindDisk(disk_idx)
8865

    
8866
    # Get secondary node IP addresses
8867
    self.node_secondary_ip = \
8868
      dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
8869
           for node_name in touched_nodes)
8870

    
8871
  def Exec(self, feedback_fn):
8872
    """Execute disk replacement.
8873

8874
    This dispatches the disk replacement to the appropriate handler.
8875

8876
    """
8877
    if self.delay_iallocator:
8878
      self._CheckPrereq2()
8879

    
8880
    if __debug__:
8881
      # Verify owned locks before starting operation
8882
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
8883
      assert set(owned_locks) == set(self.node_secondary_ip), \
8884
          ("Incorrect node locks, owning %s, expected %s" %
8885
           (owned_locks, self.node_secondary_ip.keys()))
8886

    
8887
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
8888
      assert list(owned_locks) == [self.instance_name], \
8889
          "Instance '%s' not locked" % self.instance_name
8890

    
8891
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
8892
          "Should not own any node group lock at this point"
8893

    
8894
    if not self.disks:
8895
      feedback_fn("No disks need replacement")
8896
      return
8897

    
8898
    feedback_fn("Replacing disk(s) %s for %s" %
8899
                (utils.CommaJoin(self.disks), self.instance.name))
8900

    
8901
    activate_disks = (not self.instance.admin_up)
8902

    
8903
    # Activate the instance disks if we're replacing them on a down instance
8904
    if activate_disks:
8905
      _StartInstanceDisks(self.lu, self.instance, True)
8906

    
8907
    try:
8908
      # Should we replace the secondary node?
8909
      if self.new_node is not None:
8910
        fn = self._ExecDrbd8Secondary
8911
      else:
8912
        fn = self._ExecDrbd8DiskOnly
8913

    
8914
      result = fn(feedback_fn)
8915
    finally:
8916
      # Deactivate the instance disks if we're replacing them on a
8917
      # down instance
8918
      if activate_disks:
8919
        _SafeShutdownInstanceDisks(self.lu, self.instance)
8920

    
8921
    if __debug__:
8922
      # Verify owned locks
8923
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
8924
      nodes = frozenset(self.node_secondary_ip)
8925
      assert ((self.early_release and not owned_locks) or
8926
              (not self.early_release and not (set(owned_locks) - nodes))), \
8927
        ("Not owning the correct locks, early_release=%s, owned=%r,"
8928
         " nodes=%r" % (self.early_release, owned_locks, nodes))
8929

    
8930
    return result
8931

    
8932
  def _CheckVolumeGroup(self, nodes):
8933
    self.lu.LogInfo("Checking volume groups")
8934

    
8935
    vgname = self.cfg.GetVGName()
8936

    
8937
    # Make sure volume group exists on all involved nodes
8938
    results = self.rpc.call_vg_list(nodes)
8939
    if not results:
8940
      raise errors.OpExecError("Can't list volume groups on the nodes")
8941

    
8942
    for node in nodes:
8943
      res = results[node]
8944
      res.Raise("Error checking node %s" % node)
8945
      if vgname not in res.payload:
8946
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
8947
                                 (vgname, node))
8948

    
8949
  def _CheckDisksExistence(self, nodes):
8950
    # Check disk existence
8951
    for idx, dev in enumerate(self.instance.disks):
8952
      if idx not in self.disks:
8953
        continue
8954

    
8955
      for node in nodes:
8956
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8957
        self.cfg.SetDiskID(dev, node)
8958

    
8959
        result = self.rpc.call_blockdev_find(node, dev)
8960

    
8961
        msg = result.fail_msg
8962
        if msg or not result.payload:
8963
          if not msg:
8964
            msg = "disk not found"
8965
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8966
                                   (idx, node, msg))
8967

    
8968
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8969
    for idx, dev in enumerate(self.instance.disks):
8970
      if idx not in self.disks:
8971
        continue
8972

    
8973
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8974
                      (idx, node_name))
8975

    
8976
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8977
                                   ldisk=ldisk):
8978
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8979
                                 " replace disks for instance %s" %
8980
                                 (node_name, self.instance.name))
8981

    
8982
  def _CreateNewStorage(self, node_name):
8983
    iv_names = {}
8984

    
8985
    for idx, dev in enumerate(self.instance.disks):
8986
      if idx not in self.disks:
8987
        continue
8988

    
8989
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8990

    
8991
      self.cfg.SetDiskID(dev, node_name)
8992

    
8993
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8994
      names = _GenerateUniqueNames(self.lu, lv_names)
8995

    
8996
      vg_data = dev.children[0].logical_id[0]
8997
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8998
                             logical_id=(vg_data, names[0]))
8999
      vg_meta = dev.children[1].logical_id[0]
9000
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9001
                             logical_id=(vg_meta, names[1]))
9002

    
9003
      new_lvs = [lv_data, lv_meta]
9004
      old_lvs = dev.children
9005
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9006

    
9007
      # we pass force_create=True to force the LVM creation
9008
      for new_lv in new_lvs:
9009
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9010
                        _GetInstanceInfoText(self.instance), False)
9011

    
9012
    return iv_names
9013

    
9014
  def _CheckDevices(self, node_name, iv_names):
9015
    for name, (dev, _, _) in iv_names.iteritems():
9016
      self.cfg.SetDiskID(dev, node_name)
9017

    
9018
      result = self.rpc.call_blockdev_find(node_name, dev)
9019

    
9020
      msg = result.fail_msg
9021
      if msg or not result.payload:
9022
        if not msg:
9023
          msg = "disk not found"
9024
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9025
                                 (name, msg))
9026

    
9027
      if result.payload.is_degraded:
9028
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9029

    
9030
  def _RemoveOldStorage(self, node_name, iv_names):
9031
    for name, (_, old_lvs, _) in iv_names.iteritems():
9032
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9033

    
9034
      for lv in old_lvs:
9035
        self.cfg.SetDiskID(lv, node_name)
9036

    
9037
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9038
        if msg:
9039
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9040
                             hint="remove unused LVs manually")
9041

    
9042
  def _ExecDrbd8DiskOnly(self, feedback_fn):
9043
    """Replace a disk on the primary or secondary for DRBD 8.
9044

9045
    The algorithm for replace is quite complicated:
9046

9047
      1. for each disk to be replaced:
9048

9049
        1. create new LVs on the target node with unique names
9050
        1. detach old LVs from the drbd device
9051
        1. rename old LVs to name_replaced.<time_t>
9052
        1. rename new LVs to old LVs
9053
        1. attach the new LVs (with the old names now) to the drbd device
9054

9055
      1. wait for sync across all devices
9056

9057
      1. for each modified disk:
9058

9059
        1. remove old LVs (which have the name name_replaces.<time_t>)
9060

9061
    Failures are not very well handled.
9062

9063
    """
9064
    steps_total = 6
9065

    
9066
    # Step: check device activation
9067
    self.lu.LogStep(1, steps_total, "Check device existence")
9068
    self._CheckDisksExistence([self.other_node, self.target_node])
9069
    self._CheckVolumeGroup([self.target_node, self.other_node])
9070

    
9071
    # Step: check other node consistency
9072
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9073
    self._CheckDisksConsistency(self.other_node,
9074
                                self.other_node == self.instance.primary_node,
9075
                                False)
9076

    
9077
    # Step: create new storage
9078
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9079
    iv_names = self._CreateNewStorage(self.target_node)
9080

    
9081
    # Step: for each lv, detach+rename*2+attach
9082
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9083
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9084
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9085

    
9086
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9087
                                                     old_lvs)
9088
      result.Raise("Can't detach drbd from local storage on node"
9089
                   " %s for device %s" % (self.target_node, dev.iv_name))
9090
      #dev.children = []
9091
      #cfg.Update(instance)
9092

    
9093
      # ok, we created the new LVs, so now we know we have the needed
9094
      # storage; as such, we proceed on the target node to rename
9095
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9096
      # using the assumption that logical_id == physical_id (which in
9097
      # turn is the unique_id on that node)
9098

    
9099
      # FIXME(iustin): use a better name for the replaced LVs
9100
      temp_suffix = int(time.time())
9101
      ren_fn = lambda d, suff: (d.physical_id[0],
9102
                                d.physical_id[1] + "_replaced-%s" % suff)
9103

    
9104
      # Build the rename list based on what LVs exist on the node
9105
      rename_old_to_new = []
9106
      for to_ren in old_lvs:
9107
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9108
        if not result.fail_msg and result.payload:
9109
          # device exists
9110
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9111

    
9112
      self.lu.LogInfo("Renaming the old LVs on the target node")
9113
      result = self.rpc.call_blockdev_rename(self.target_node,
9114
                                             rename_old_to_new)
9115
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9116

    
9117
      # Now we rename the new LVs to the old LVs
9118
      self.lu.LogInfo("Renaming the new LVs on the target node")
9119
      rename_new_to_old = [(new, old.physical_id)
9120
                           for old, new in zip(old_lvs, new_lvs)]
9121
      result = self.rpc.call_blockdev_rename(self.target_node,
9122
                                             rename_new_to_old)
9123
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9124

    
9125
      for old, new in zip(old_lvs, new_lvs):
9126
        new.logical_id = old.logical_id
9127
        self.cfg.SetDiskID(new, self.target_node)
9128

    
9129
      for disk in old_lvs:
9130
        disk.logical_id = ren_fn(disk, temp_suffix)
9131
        self.cfg.SetDiskID(disk, self.target_node)
9132

    
9133
      # Now that the new lvs have the old name, we can add them to the device
9134
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9135
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9136
                                                  new_lvs)
9137
      msg = result.fail_msg
9138
      if msg:
9139
        for new_lv in new_lvs:
9140
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9141
                                               new_lv).fail_msg
9142
          if msg2:
9143
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9144
                               hint=("cleanup manually the unused logical"
9145
                                     "volumes"))
9146
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9147

    
9148
      dev.children = new_lvs
9149

    
9150
      self.cfg.Update(self.instance, feedback_fn)
9151

    
9152
    cstep = 5
9153
    if self.early_release:
9154
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9155
      cstep += 1
9156
      self._RemoveOldStorage(self.target_node, iv_names)
9157
      # WARNING: we release both node locks here, do not do other RPCs
9158
      # than WaitForSync to the primary node
9159
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9160
                    names=[self.target_node, self.other_node])
9161

    
9162
    # Wait for sync
9163
    # This can fail as the old devices are degraded and _WaitForSync
9164
    # does a combined result over all disks, so we don't check its return value
9165
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9166
    cstep += 1
9167
    _WaitForSync(self.lu, self.instance)
9168

    
9169
    # Check all devices manually
9170
    self._CheckDevices(self.instance.primary_node, iv_names)
9171

    
9172
    # Step: remove old storage
9173
    if not self.early_release:
9174
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9175
      cstep += 1
9176
      self._RemoveOldStorage(self.target_node, iv_names)
9177

    
9178
  def _ExecDrbd8Secondary(self, feedback_fn):
9179
    """Replace the secondary node for DRBD 8.
9180

9181
    The algorithm for replace is quite complicated:
9182
      - for all disks of the instance:
9183
        - create new LVs on the new node with same names
9184
        - shutdown the drbd device on the old secondary
9185
        - disconnect the drbd network on the primary
9186
        - create the drbd device on the new secondary
9187
        - network attach the drbd on the primary, using an artifice:
9188
          the drbd code for Attach() will connect to the network if it
9189
          finds a device which is connected to the good local disks but
9190
          not network enabled
9191
      - wait for sync across all devices
9192
      - remove all disks from the old secondary
9193

9194
    Failures are not very well handled.
9195

9196
    """
9197
    steps_total = 6
9198

    
9199
    # Step: check device activation
9200
    self.lu.LogStep(1, steps_total, "Check device existence")
9201
    self._CheckDisksExistence([self.instance.primary_node])
9202
    self._CheckVolumeGroup([self.instance.primary_node])
9203

    
9204
    # Step: check other node consistency
9205
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9206
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9207

    
9208
    # Step: create new storage
9209
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9210
    for idx, dev in enumerate(self.instance.disks):
9211
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9212
                      (self.new_node, idx))
9213
      # we pass force_create=True to force LVM creation
9214
      for new_lv in dev.children:
9215
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9216
                        _GetInstanceInfoText(self.instance), False)
9217

    
9218
    # Step 4: dbrd minors and drbd setups changes
9219
    # after this, we must manually remove the drbd minors on both the
9220
    # error and the success paths
9221
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9222
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9223
                                         for dev in self.instance.disks],
9224
                                        self.instance.name)
9225
    logging.debug("Allocated minors %r", minors)
9226

    
9227
    iv_names = {}
9228
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9229
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9230
                      (self.new_node, idx))
9231
      # create new devices on new_node; note that we create two IDs:
9232
      # one without port, so the drbd will be activated without
9233
      # networking information on the new node at this stage, and one
9234
      # with network, for the latter activation in step 4
9235
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9236
      if self.instance.primary_node == o_node1:
9237
        p_minor = o_minor1
9238
      else:
9239
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9240
        p_minor = o_minor2
9241

    
9242
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9243
                      p_minor, new_minor, o_secret)
9244
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9245
                    p_minor, new_minor, o_secret)
9246

    
9247
      iv_names[idx] = (dev, dev.children, new_net_id)
9248
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9249
                    new_net_id)
9250
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9251
                              logical_id=new_alone_id,
9252
                              children=dev.children,
9253
                              size=dev.size)
9254
      try:
9255
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9256
                              _GetInstanceInfoText(self.instance), False)
9257
      except errors.GenericError:
9258
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9259
        raise
9260

    
9261
    # We have new devices, shutdown the drbd on the old secondary
9262
    for idx, dev in enumerate(self.instance.disks):
9263
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9264
      self.cfg.SetDiskID(dev, self.target_node)
9265
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9266
      if msg:
9267
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9268
                           "node: %s" % (idx, msg),
9269
                           hint=("Please cleanup this device manually as"
9270
                                 " soon as possible"))
9271

    
9272
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9273
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9274
                                               self.node_secondary_ip,
9275
                                               self.instance.disks)\
9276
                                              [self.instance.primary_node]
9277

    
9278
    msg = result.fail_msg
9279
    if msg:
9280
      # detaches didn't succeed (unlikely)
9281
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9282
      raise errors.OpExecError("Can't detach the disks from the network on"
9283
                               " old node: %s" % (msg,))
9284

    
9285
    # if we managed to detach at least one, we update all the disks of
9286
    # the instance to point to the new secondary
9287
    self.lu.LogInfo("Updating instance configuration")
9288
    for dev, _, new_logical_id in iv_names.itervalues():
9289
      dev.logical_id = new_logical_id
9290
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9291

    
9292
    self.cfg.Update(self.instance, feedback_fn)
9293

    
9294
    # and now perform the drbd attach
9295
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9296
                    " (standalone => connected)")
9297
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9298
                                            self.new_node],
9299
                                           self.node_secondary_ip,
9300
                                           self.instance.disks,
9301
                                           self.instance.name,
9302
                                           False)
9303
    for to_node, to_result in result.items():
9304
      msg = to_result.fail_msg
9305
      if msg:
9306
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9307
                           to_node, msg,
9308
                           hint=("please do a gnt-instance info to see the"
9309
                                 " status of disks"))
9310
    cstep = 5
9311
    if self.early_release:
9312
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9313
      cstep += 1
9314
      self._RemoveOldStorage(self.target_node, iv_names)
9315
      # WARNING: we release all node locks here, do not do other RPCs
9316
      # than WaitForSync to the primary node
9317
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9318
                    names=[self.instance.primary_node,
9319
                           self.target_node,
9320
                           self.new_node])
9321

    
9322
    # Wait for sync
9323
    # This can fail as the old devices are degraded and _WaitForSync
9324
    # does a combined result over all disks, so we don't check its return value
9325
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9326
    cstep += 1
9327
    _WaitForSync(self.lu, self.instance)
9328

    
9329
    # Check all devices manually
9330
    self._CheckDevices(self.instance.primary_node, iv_names)
9331

    
9332
    # Step: remove old storage
9333
    if not self.early_release:
9334
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9335
      self._RemoveOldStorage(self.target_node, iv_names)
9336

    
9337

    
9338
class LURepairNodeStorage(NoHooksLU):
9339
  """Repairs the volume group on a node.
9340

9341
  """
9342
  REQ_BGL = False
9343

    
9344
  def CheckArguments(self):
9345
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9346

    
9347
    storage_type = self.op.storage_type
9348

    
9349
    if (constants.SO_FIX_CONSISTENCY not in
9350
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9351
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
9352
                                 " repaired" % storage_type,
9353
                                 errors.ECODE_INVAL)
9354

    
9355
  def ExpandNames(self):
9356
    self.needed_locks = {
9357
      locking.LEVEL_NODE: [self.op.node_name],
9358
      }
9359

    
9360
  def _CheckFaultyDisks(self, instance, node_name):
9361
    """Ensure faulty disks abort the opcode or at least warn."""
9362
    try:
9363
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9364
                                  node_name, True):
9365
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9366
                                   " node '%s'" % (instance.name, node_name),
9367
                                   errors.ECODE_STATE)
9368
    except errors.OpPrereqError, err:
9369
      if self.op.ignore_consistency:
9370
        self.proc.LogWarning(str(err.args[0]))
9371
      else:
9372
        raise
9373

    
9374
  def CheckPrereq(self):
9375
    """Check prerequisites.
9376

9377
    """
9378
    # Check whether any instance on this node has faulty disks
9379
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9380
      if not inst.admin_up:
9381
        continue
9382
      check_nodes = set(inst.all_nodes)
9383
      check_nodes.discard(self.op.node_name)
9384
      for inst_node_name in check_nodes:
9385
        self._CheckFaultyDisks(inst, inst_node_name)
9386

    
9387
  def Exec(self, feedback_fn):
9388
    feedback_fn("Repairing storage unit '%s' on %s ..." %
9389
                (self.op.name, self.op.node_name))
9390

    
9391
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9392
    result = self.rpc.call_storage_execute(self.op.node_name,
9393
                                           self.op.storage_type, st_args,
9394
                                           self.op.name,
9395
                                           constants.SO_FIX_CONSISTENCY)
9396
    result.Raise("Failed to repair storage unit '%s' on %s" %
9397
                 (self.op.name, self.op.node_name))
9398

    
9399

    
9400
class LUNodeEvacStrategy(NoHooksLU):
9401
  """Computes the node evacuation strategy.
9402

9403
  """
9404
  REQ_BGL = False
9405

    
9406
  def CheckArguments(self):
9407
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9408

    
9409
  def ExpandNames(self):
9410
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9411
    self.needed_locks = locks = {}
9412
    if self.op.remote_node is None:
9413
      locks[locking.LEVEL_NODE] = locking.ALL_SET
9414
    else:
9415
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9416
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9417

    
9418
  def Exec(self, feedback_fn):
9419
    if self.op.remote_node is not None:
9420
      instances = []
9421
      for node in self.op.nodes:
9422
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9423
      result = []
9424
      for i in instances:
9425
        if i.primary_node == self.op.remote_node:
9426
          raise errors.OpPrereqError("Node %s is the primary node of"
9427
                                     " instance %s, cannot use it as"
9428
                                     " secondary" %
9429
                                     (self.op.remote_node, i.name),
9430
                                     errors.ECODE_INVAL)
9431
        result.append([i.name, self.op.remote_node])
9432
    else:
9433
      ial = IAllocator(self.cfg, self.rpc,
9434
                       mode=constants.IALLOCATOR_MODE_MEVAC,
9435
                       evac_nodes=self.op.nodes)
9436
      ial.Run(self.op.iallocator, validate=True)
9437
      if not ial.success:
9438
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9439
                                 errors.ECODE_NORES)
9440
      result = ial.result
9441
    return result
9442

    
9443

    
9444
class LUInstanceGrowDisk(LogicalUnit):
9445
  """Grow a disk of an instance.
9446

9447
  """
9448
  HPATH = "disk-grow"
9449
  HTYPE = constants.HTYPE_INSTANCE
9450
  REQ_BGL = False
9451

    
9452
  def ExpandNames(self):
9453
    self._ExpandAndLockInstance()
9454
    self.needed_locks[locking.LEVEL_NODE] = []
9455
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9456

    
9457
  def DeclareLocks(self, level):
9458
    if level == locking.LEVEL_NODE:
9459
      self._LockInstancesNodes()
9460

    
9461
  def BuildHooksEnv(self):
9462
    """Build hooks env.
9463

9464
    This runs on the master, the primary and all the secondaries.
9465

9466
    """
9467
    env = {
9468
      "DISK": self.op.disk,
9469
      "AMOUNT": self.op.amount,
9470
      }
9471
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9472
    return env
9473

    
9474
  def BuildHooksNodes(self):
9475
    """Build hooks nodes.
9476

9477
    """
9478
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9479
    return (nl, nl)
9480

    
9481
  def CheckPrereq(self):
9482
    """Check prerequisites.
9483

9484
    This checks that the instance is in the cluster.
9485

9486
    """
9487
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9488
    assert instance is not None, \
9489
      "Cannot retrieve locked instance %s" % self.op.instance_name
9490
    nodenames = list(instance.all_nodes)
9491
    for node in nodenames:
9492
      _CheckNodeOnline(self, node)
9493

    
9494
    self.instance = instance
9495

    
9496
    if instance.disk_template not in constants.DTS_GROWABLE:
9497
      raise errors.OpPrereqError("Instance's disk layout does not support"
9498
                                 " growing", errors.ECODE_INVAL)
9499

    
9500
    self.disk = instance.FindDisk(self.op.disk)
9501

    
9502
    if instance.disk_template not in (constants.DT_FILE,
9503
                                      constants.DT_SHARED_FILE):
9504
      # TODO: check the free disk space for file, when that feature will be
9505
      # supported
9506
      _CheckNodesFreeDiskPerVG(self, nodenames,
9507
                               self.disk.ComputeGrowth(self.op.amount))
9508

    
9509
  def Exec(self, feedback_fn):
9510
    """Execute disk grow.
9511

9512
    """
9513
    instance = self.instance
9514
    disk = self.disk
9515

    
9516
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9517
    if not disks_ok:
9518
      raise errors.OpExecError("Cannot activate block device to grow")
9519

    
9520
    for node in instance.all_nodes:
9521
      self.cfg.SetDiskID(disk, node)
9522
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
9523
      result.Raise("Grow request failed to node %s" % node)
9524

    
9525
      # TODO: Rewrite code to work properly
9526
      # DRBD goes into sync mode for a short amount of time after executing the
9527
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9528
      # calling "resize" in sync mode fails. Sleeping for a short amount of
9529
      # time is a work-around.
9530
      time.sleep(5)
9531

    
9532
    disk.RecordGrow(self.op.amount)
9533
    self.cfg.Update(instance, feedback_fn)
9534
    if self.op.wait_for_sync:
9535
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
9536
      if disk_abort:
9537
        self.proc.LogWarning("Disk sync-ing has not returned a good"
9538
                             " status; please check the instance")
9539
      if not instance.admin_up:
9540
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9541
    elif not instance.admin_up:
9542
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
9543
                           " not supposed to be running because no wait for"
9544
                           " sync mode was requested")
9545

    
9546

    
9547
class LUInstanceQueryData(NoHooksLU):
9548
  """Query runtime instance data.
9549

9550
  """
9551
  REQ_BGL = False
9552

    
9553
  def ExpandNames(self):
9554
    self.needed_locks = {}
9555

    
9556
    # Use locking if requested or when non-static information is wanted
9557
    if not (self.op.static or self.op.use_locking):
9558
      self.LogWarning("Non-static data requested, locks need to be acquired")
9559
      self.op.use_locking = True
9560

    
9561
    if self.op.instances or not self.op.use_locking:
9562
      # Expand instance names right here
9563
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
9564
    else:
9565
      # Will use acquired locks
9566
      self.wanted_names = None
9567

    
9568
    if self.op.use_locking:
9569
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9570

    
9571
      if self.wanted_names is None:
9572
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9573
      else:
9574
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9575

    
9576
      self.needed_locks[locking.LEVEL_NODE] = []
9577
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9578
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9579

    
9580
  def DeclareLocks(self, level):
9581
    if self.op.use_locking and level == locking.LEVEL_NODE:
9582
      self._LockInstancesNodes()
9583

    
9584
  def CheckPrereq(self):
9585
    """Check prerequisites.
9586

9587
    This only checks the optional instance list against the existing names.
9588

9589
    """
9590
    if self.wanted_names is None:
9591
      assert self.op.use_locking, "Locking was not used"
9592
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
9593

    
9594
    self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9595
                             for name in self.wanted_names]
9596

    
9597
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
9598
    """Returns the status of a block device
9599

9600
    """
9601
    if self.op.static or not node:
9602
      return None
9603

    
9604
    self.cfg.SetDiskID(dev, node)
9605

    
9606
    result = self.rpc.call_blockdev_find(node, dev)
9607
    if result.offline:
9608
      return None
9609

    
9610
    result.Raise("Can't compute disk status for %s" % instance_name)
9611

    
9612
    status = result.payload
9613
    if status is None:
9614
      return None
9615

    
9616
    return (status.dev_path, status.major, status.minor,
9617
            status.sync_percent, status.estimated_time,
9618
            status.is_degraded, status.ldisk_status)
9619

    
9620
  def _ComputeDiskStatus(self, instance, snode, dev):
9621
    """Compute block device status.
9622

9623
    """
9624
    if dev.dev_type in constants.LDS_DRBD:
9625
      # we change the snode then (otherwise we use the one passed in)
9626
      if dev.logical_id[0] == instance.primary_node:
9627
        snode = dev.logical_id[1]
9628
      else:
9629
        snode = dev.logical_id[0]
9630

    
9631
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9632
                                              instance.name, dev)
9633
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9634

    
9635
    if dev.children:
9636
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
9637
                      for child in dev.children]
9638
    else:
9639
      dev_children = []
9640

    
9641
    return {
9642
      "iv_name": dev.iv_name,
9643
      "dev_type": dev.dev_type,
9644
      "logical_id": dev.logical_id,
9645
      "physical_id": dev.physical_id,
9646
      "pstatus": dev_pstatus,
9647
      "sstatus": dev_sstatus,
9648
      "children": dev_children,
9649
      "mode": dev.mode,
9650
      "size": dev.size,
9651
      }
9652

    
9653
  def Exec(self, feedback_fn):
9654
    """Gather and return data"""
9655
    result = {}
9656

    
9657
    cluster = self.cfg.GetClusterInfo()
9658

    
9659
    for instance in self.wanted_instances:
9660
      if not self.op.static:
9661
        remote_info = self.rpc.call_instance_info(instance.primary_node,
9662
                                                  instance.name,
9663
                                                  instance.hypervisor)
9664
        remote_info.Raise("Error checking node %s" % instance.primary_node)
9665
        remote_info = remote_info.payload
9666
        if remote_info and "state" in remote_info:
9667
          remote_state = "up"
9668
        else:
9669
          remote_state = "down"
9670
      else:
9671
        remote_state = None
9672
      if instance.admin_up:
9673
        config_state = "up"
9674
      else:
9675
        config_state = "down"
9676

    
9677
      disks = [self._ComputeDiskStatus(instance, None, device)
9678
               for device in instance.disks]
9679

    
9680
      result[instance.name] = {
9681
        "name": instance.name,
9682
        "config_state": config_state,
9683
        "run_state": remote_state,
9684
        "pnode": instance.primary_node,
9685
        "snodes": instance.secondary_nodes,
9686
        "os": instance.os,
9687
        # this happens to be the same format used for hooks
9688
        "nics": _NICListToTuple(self, instance.nics),
9689
        "disk_template": instance.disk_template,
9690
        "disks": disks,
9691
        "hypervisor": instance.hypervisor,
9692
        "network_port": instance.network_port,
9693
        "hv_instance": instance.hvparams,
9694
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
9695
        "be_instance": instance.beparams,
9696
        "be_actual": cluster.FillBE(instance),
9697
        "os_instance": instance.osparams,
9698
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9699
        "serial_no": instance.serial_no,
9700
        "mtime": instance.mtime,
9701
        "ctime": instance.ctime,
9702
        "uuid": instance.uuid,
9703
        }
9704

    
9705
    return result
9706

    
9707

    
9708
class LUInstanceSetParams(LogicalUnit):
9709
  """Modifies an instances's parameters.
9710

9711
  """
9712
  HPATH = "instance-modify"
9713
  HTYPE = constants.HTYPE_INSTANCE
9714
  REQ_BGL = False
9715

    
9716
  def CheckArguments(self):
9717
    if not (self.op.nics or self.op.disks or self.op.disk_template or
9718
            self.op.hvparams or self.op.beparams or self.op.os_name):
9719
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9720

    
9721
    if self.op.hvparams:
9722
      _CheckGlobalHvParams(self.op.hvparams)
9723

    
9724
    # Disk validation
9725
    disk_addremove = 0
9726
    for disk_op, disk_dict in self.op.disks:
9727
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9728
      if disk_op == constants.DDM_REMOVE:
9729
        disk_addremove += 1
9730
        continue
9731
      elif disk_op == constants.DDM_ADD:
9732
        disk_addremove += 1
9733
      else:
9734
        if not isinstance(disk_op, int):
9735
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9736
        if not isinstance(disk_dict, dict):
9737
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9738
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9739

    
9740
      if disk_op == constants.DDM_ADD:
9741
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9742
        if mode not in constants.DISK_ACCESS_SET:
9743
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9744
                                     errors.ECODE_INVAL)
9745
        size = disk_dict.get(constants.IDISK_SIZE, None)
9746
        if size is None:
9747
          raise errors.OpPrereqError("Required disk parameter size missing",
9748
                                     errors.ECODE_INVAL)
9749
        try:
9750
          size = int(size)
9751
        except (TypeError, ValueError), err:
9752
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9753
                                     str(err), errors.ECODE_INVAL)
9754
        disk_dict[constants.IDISK_SIZE] = size
9755
      else:
9756
        # modification of disk
9757
        if constants.IDISK_SIZE in disk_dict:
9758
          raise errors.OpPrereqError("Disk size change not possible, use"
9759
                                     " grow-disk", errors.ECODE_INVAL)
9760

    
9761
    if disk_addremove > 1:
9762
      raise errors.OpPrereqError("Only one disk add or remove operation"
9763
                                 " supported at a time", errors.ECODE_INVAL)
9764

    
9765
    if self.op.disks and self.op.disk_template is not None:
9766
      raise errors.OpPrereqError("Disk template conversion and other disk"
9767
                                 " changes not supported at the same time",
9768
                                 errors.ECODE_INVAL)
9769

    
9770
    if (self.op.disk_template and
9771
        self.op.disk_template in constants.DTS_INT_MIRROR and
9772
        self.op.remote_node is None):
9773
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
9774
                                 " one requires specifying a secondary node",
9775
                                 errors.ECODE_INVAL)
9776

    
9777
    # NIC validation
9778
    nic_addremove = 0
9779
    for nic_op, nic_dict in self.op.nics:
9780
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9781
      if nic_op == constants.DDM_REMOVE:
9782
        nic_addremove += 1
9783
        continue
9784
      elif nic_op == constants.DDM_ADD:
9785
        nic_addremove += 1
9786
      else:
9787
        if not isinstance(nic_op, int):
9788
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9789
        if not isinstance(nic_dict, dict):
9790
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9791
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9792

    
9793
      # nic_dict should be a dict
9794
      nic_ip = nic_dict.get(constants.INIC_IP, None)
9795
      if nic_ip is not None:
9796
        if nic_ip.lower() == constants.VALUE_NONE:
9797
          nic_dict[constants.INIC_IP] = None
9798
        else:
9799
          if not netutils.IPAddress.IsValid(nic_ip):
9800
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9801
                                       errors.ECODE_INVAL)
9802

    
9803
      nic_bridge = nic_dict.get('bridge', None)
9804
      nic_link = nic_dict.get(constants.INIC_LINK, None)
9805
      if nic_bridge and nic_link:
9806
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9807
                                   " at the same time", errors.ECODE_INVAL)
9808
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9809
        nic_dict['bridge'] = None
9810
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9811
        nic_dict[constants.INIC_LINK] = None
9812

    
9813
      if nic_op == constants.DDM_ADD:
9814
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
9815
        if nic_mac is None:
9816
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
9817

    
9818
      if constants.INIC_MAC in nic_dict:
9819
        nic_mac = nic_dict[constants.INIC_MAC]
9820
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9821
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9822

    
9823
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9824
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9825
                                     " modifying an existing nic",
9826
                                     errors.ECODE_INVAL)
9827

    
9828
    if nic_addremove > 1:
9829
      raise errors.OpPrereqError("Only one NIC add or remove operation"
9830
                                 " supported at a time", errors.ECODE_INVAL)
9831

    
9832
  def ExpandNames(self):
9833
    self._ExpandAndLockInstance()
9834
    self.needed_locks[locking.LEVEL_NODE] = []
9835
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9836

    
9837
  def DeclareLocks(self, level):
9838
    if level == locking.LEVEL_NODE:
9839
      self._LockInstancesNodes()
9840
      if self.op.disk_template and self.op.remote_node:
9841
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9842
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9843

    
9844
  def BuildHooksEnv(self):
9845
    """Build hooks env.
9846

9847
    This runs on the master, primary and secondaries.
9848

9849
    """
9850
    args = dict()
9851
    if constants.BE_MEMORY in self.be_new:
9852
      args['memory'] = self.be_new[constants.BE_MEMORY]
9853
    if constants.BE_VCPUS in self.be_new:
9854
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
9855
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9856
    # information at all.
9857
    if self.op.nics:
9858
      args['nics'] = []
9859
      nic_override = dict(self.op.nics)
9860
      for idx, nic in enumerate(self.instance.nics):
9861
        if idx in nic_override:
9862
          this_nic_override = nic_override[idx]
9863
        else:
9864
          this_nic_override = {}
9865
        if constants.INIC_IP in this_nic_override:
9866
          ip = this_nic_override[constants.INIC_IP]
9867
        else:
9868
          ip = nic.ip
9869
        if constants.INIC_MAC in this_nic_override:
9870
          mac = this_nic_override[constants.INIC_MAC]
9871
        else:
9872
          mac = nic.mac
9873
        if idx in self.nic_pnew:
9874
          nicparams = self.nic_pnew[idx]
9875
        else:
9876
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9877
        mode = nicparams[constants.NIC_MODE]
9878
        link = nicparams[constants.NIC_LINK]
9879
        args['nics'].append((ip, mac, mode, link))
9880
      if constants.DDM_ADD in nic_override:
9881
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
9882
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
9883
        nicparams = self.nic_pnew[constants.DDM_ADD]
9884
        mode = nicparams[constants.NIC_MODE]
9885
        link = nicparams[constants.NIC_LINK]
9886
        args['nics'].append((ip, mac, mode, link))
9887
      elif constants.DDM_REMOVE in nic_override:
9888
        del args['nics'][-1]
9889

    
9890
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9891
    if self.op.disk_template:
9892
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9893

    
9894
    return env
9895

    
9896
  def BuildHooksNodes(self):
9897
    """Build hooks nodes.
9898

9899
    """
9900
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9901
    return (nl, nl)
9902

    
9903
  def CheckPrereq(self):
9904
    """Check prerequisites.
9905

9906
    This only checks the instance list against the existing names.
9907

9908
    """
9909
    # checking the new params on the primary/secondary nodes
9910

    
9911
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9912
    cluster = self.cluster = self.cfg.GetClusterInfo()
9913
    assert self.instance is not None, \
9914
      "Cannot retrieve locked instance %s" % self.op.instance_name
9915
    pnode = instance.primary_node
9916
    nodelist = list(instance.all_nodes)
9917

    
9918
    # OS change
9919
    if self.op.os_name and not self.op.force:
9920
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9921
                      self.op.force_variant)
9922
      instance_os = self.op.os_name
9923
    else:
9924
      instance_os = instance.os
9925

    
9926
    if self.op.disk_template:
9927
      if instance.disk_template == self.op.disk_template:
9928
        raise errors.OpPrereqError("Instance already has disk template %s" %
9929
                                   instance.disk_template, errors.ECODE_INVAL)
9930

    
9931
      if (instance.disk_template,
9932
          self.op.disk_template) not in self._DISK_CONVERSIONS:
9933
        raise errors.OpPrereqError("Unsupported disk template conversion from"
9934
                                   " %s to %s" % (instance.disk_template,
9935
                                                  self.op.disk_template),
9936
                                   errors.ECODE_INVAL)
9937
      _CheckInstanceDown(self, instance, "cannot change disk template")
9938
      if self.op.disk_template in constants.DTS_INT_MIRROR:
9939
        if self.op.remote_node == pnode:
9940
          raise errors.OpPrereqError("Given new secondary node %s is the same"
9941
                                     " as the primary node of the instance" %
9942
                                     self.op.remote_node, errors.ECODE_STATE)
9943
        _CheckNodeOnline(self, self.op.remote_node)
9944
        _CheckNodeNotDrained(self, self.op.remote_node)
9945
        # FIXME: here we assume that the old instance type is DT_PLAIN
9946
        assert instance.disk_template == constants.DT_PLAIN
9947
        disks = [{constants.IDISK_SIZE: d.size,
9948
                  constants.IDISK_VG: d.logical_id[0]}
9949
                 for d in instance.disks]
9950
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9951
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9952

    
9953
    # hvparams processing
9954
    if self.op.hvparams:
9955
      hv_type = instance.hypervisor
9956
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9957
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9958
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9959

    
9960
      # local check
9961
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9962
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9963
      self.hv_new = hv_new # the new actual values
9964
      self.hv_inst = i_hvdict # the new dict (without defaults)
9965
    else:
9966
      self.hv_new = self.hv_inst = {}
9967

    
9968
    # beparams processing
9969
    if self.op.beparams:
9970
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9971
                                   use_none=True)
9972
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9973
      be_new = cluster.SimpleFillBE(i_bedict)
9974
      self.be_new = be_new # the new actual values
9975
      self.be_inst = i_bedict # the new dict (without defaults)
9976
    else:
9977
      self.be_new = self.be_inst = {}
9978

    
9979
    # osparams processing
9980
    if self.op.osparams:
9981
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9982
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9983
      self.os_inst = i_osdict # the new dict (without defaults)
9984
    else:
9985
      self.os_inst = {}
9986

    
9987
    self.warn = []
9988

    
9989
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9990
      mem_check_list = [pnode]
9991
      if be_new[constants.BE_AUTO_BALANCE]:
9992
        # either we changed auto_balance to yes or it was from before
9993
        mem_check_list.extend(instance.secondary_nodes)
9994
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9995
                                                  instance.hypervisor)
9996
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9997
                                         instance.hypervisor)
9998
      pninfo = nodeinfo[pnode]
9999
      msg = pninfo.fail_msg
10000
      if msg:
10001
        # Assume the primary node is unreachable and go ahead
10002
        self.warn.append("Can't get info from primary node %s: %s" %
10003
                         (pnode,  msg))
10004
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
10005
        self.warn.append("Node data from primary node %s doesn't contain"
10006
                         " free memory information" % pnode)
10007
      elif instance_info.fail_msg:
10008
        self.warn.append("Can't get instance runtime information: %s" %
10009
                        instance_info.fail_msg)
10010
      else:
10011
        if instance_info.payload:
10012
          current_mem = int(instance_info.payload['memory'])
10013
        else:
10014
          # Assume instance not running
10015
          # (there is a slight race condition here, but it's not very probable,
10016
          # and we have no other way to check)
10017
          current_mem = 0
10018
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10019
                    pninfo.payload['memory_free'])
10020
        if miss_mem > 0:
10021
          raise errors.OpPrereqError("This change will prevent the instance"
10022
                                     " from starting, due to %d MB of memory"
10023
                                     " missing on its primary node" % miss_mem,
10024
                                     errors.ECODE_NORES)
10025

    
10026
      if be_new[constants.BE_AUTO_BALANCE]:
10027
        for node, nres in nodeinfo.items():
10028
          if node not in instance.secondary_nodes:
10029
            continue
10030
          msg = nres.fail_msg
10031
          if msg:
10032
            self.warn.append("Can't get info from secondary node %s: %s" %
10033
                             (node, msg))
10034
          elif not isinstance(nres.payload.get('memory_free', None), int):
10035
            self.warn.append("Secondary node %s didn't return free"
10036
                             " memory information" % node)
10037
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
10038
            self.warn.append("Not enough memory to failover instance to"
10039
                             " secondary node %s" % node)
10040

    
10041
    # NIC processing
10042
    self.nic_pnew = {}
10043
    self.nic_pinst = {}
10044
    for nic_op, nic_dict in self.op.nics:
10045
      if nic_op == constants.DDM_REMOVE:
10046
        if not instance.nics:
10047
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10048
                                     errors.ECODE_INVAL)
10049
        continue
10050
      if nic_op != constants.DDM_ADD:
10051
        # an existing nic
10052
        if not instance.nics:
10053
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10054
                                     " no NICs" % nic_op,
10055
                                     errors.ECODE_INVAL)
10056
        if nic_op < 0 or nic_op >= len(instance.nics):
10057
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10058
                                     " are 0 to %d" %
10059
                                     (nic_op, len(instance.nics) - 1),
10060
                                     errors.ECODE_INVAL)
10061
        old_nic_params = instance.nics[nic_op].nicparams
10062
        old_nic_ip = instance.nics[nic_op].ip
10063
      else:
10064
        old_nic_params = {}
10065
        old_nic_ip = None
10066

    
10067
      update_params_dict = dict([(key, nic_dict[key])
10068
                                 for key in constants.NICS_PARAMETERS
10069
                                 if key in nic_dict])
10070

    
10071
      if 'bridge' in nic_dict:
10072
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
10073

    
10074
      new_nic_params = _GetUpdatedParams(old_nic_params,
10075
                                         update_params_dict)
10076
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10077
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10078
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10079
      self.nic_pinst[nic_op] = new_nic_params
10080
      self.nic_pnew[nic_op] = new_filled_nic_params
10081
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10082

    
10083
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
10084
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10085
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10086
        if msg:
10087
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10088
          if self.op.force:
10089
            self.warn.append(msg)
10090
          else:
10091
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10092
      if new_nic_mode == constants.NIC_MODE_ROUTED:
10093
        if constants.INIC_IP in nic_dict:
10094
          nic_ip = nic_dict[constants.INIC_IP]
10095
        else:
10096
          nic_ip = old_nic_ip
10097
        if nic_ip is None:
10098
          raise errors.OpPrereqError('Cannot set the nic ip to None'
10099
                                     ' on a routed nic', errors.ECODE_INVAL)
10100
      if constants.INIC_MAC in nic_dict:
10101
        nic_mac = nic_dict[constants.INIC_MAC]
10102
        if nic_mac is None:
10103
          raise errors.OpPrereqError('Cannot set the nic mac to None',
10104
                                     errors.ECODE_INVAL)
10105
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10106
          # otherwise generate the mac
10107
          nic_dict[constants.INIC_MAC] = \
10108
            self.cfg.GenerateMAC(self.proc.GetECId())
10109
        else:
10110
          # or validate/reserve the current one
10111
          try:
10112
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10113
          except errors.ReservationError:
10114
            raise errors.OpPrereqError("MAC address %s already in use"
10115
                                       " in cluster" % nic_mac,
10116
                                       errors.ECODE_NOTUNIQUE)
10117

    
10118
    # DISK processing
10119
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10120
      raise errors.OpPrereqError("Disk operations not supported for"
10121
                                 " diskless instances",
10122
                                 errors.ECODE_INVAL)
10123
    for disk_op, _ in self.op.disks:
10124
      if disk_op == constants.DDM_REMOVE:
10125
        if len(instance.disks) == 1:
10126
          raise errors.OpPrereqError("Cannot remove the last disk of"
10127
                                     " an instance", errors.ECODE_INVAL)
10128
        _CheckInstanceDown(self, instance, "cannot remove disks")
10129

    
10130
      if (disk_op == constants.DDM_ADD and
10131
          len(instance.disks) >= constants.MAX_DISKS):
10132
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10133
                                   " add more" % constants.MAX_DISKS,
10134
                                   errors.ECODE_STATE)
10135
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10136
        # an existing disk
10137
        if disk_op < 0 or disk_op >= len(instance.disks):
10138
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
10139
                                     " are 0 to %d" %
10140
                                     (disk_op, len(instance.disks)),
10141
                                     errors.ECODE_INVAL)
10142

    
10143
    return
10144

    
10145
  def _ConvertPlainToDrbd(self, feedback_fn):
10146
    """Converts an instance from plain to drbd.
10147

10148
    """
10149
    feedback_fn("Converting template to drbd")
10150
    instance = self.instance
10151
    pnode = instance.primary_node
10152
    snode = self.op.remote_node
10153

    
10154
    # create a fake disk info for _GenerateDiskTemplate
10155
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10156
                  constants.IDISK_VG: d.logical_id[0]}
10157
                 for d in instance.disks]
10158
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10159
                                      instance.name, pnode, [snode],
10160
                                      disk_info, None, None, 0, feedback_fn)
10161
    info = _GetInstanceInfoText(instance)
10162
    feedback_fn("Creating aditional volumes...")
10163
    # first, create the missing data and meta devices
10164
    for disk in new_disks:
10165
      # unfortunately this is... not too nice
10166
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10167
                            info, True)
10168
      for child in disk.children:
10169
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
10170
    # at this stage, all new LVs have been created, we can rename the
10171
    # old ones
10172
    feedback_fn("Renaming original volumes...")
10173
    rename_list = [(o, n.children[0].logical_id)
10174
                   for (o, n) in zip(instance.disks, new_disks)]
10175
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10176
    result.Raise("Failed to rename original LVs")
10177

    
10178
    feedback_fn("Initializing DRBD devices...")
10179
    # all child devices are in place, we can now create the DRBD devices
10180
    for disk in new_disks:
10181
      for node in [pnode, snode]:
10182
        f_create = node == pnode
10183
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10184

    
10185
    # at this point, the instance has been modified
10186
    instance.disk_template = constants.DT_DRBD8
10187
    instance.disks = new_disks
10188
    self.cfg.Update(instance, feedback_fn)
10189

    
10190
    # disks are created, waiting for sync
10191
    disk_abort = not _WaitForSync(self, instance)
10192
    if disk_abort:
10193
      raise errors.OpExecError("There are some degraded disks for"
10194
                               " this instance, please cleanup manually")
10195

    
10196
  def _ConvertDrbdToPlain(self, feedback_fn):
10197
    """Converts an instance from drbd to plain.
10198

10199
    """
10200
    instance = self.instance
10201
    assert len(instance.secondary_nodes) == 1
10202
    pnode = instance.primary_node
10203
    snode = instance.secondary_nodes[0]
10204
    feedback_fn("Converting template to plain")
10205

    
10206
    old_disks = instance.disks
10207
    new_disks = [d.children[0] for d in old_disks]
10208

    
10209
    # copy over size and mode
10210
    for parent, child in zip(old_disks, new_disks):
10211
      child.size = parent.size
10212
      child.mode = parent.mode
10213

    
10214
    # update instance structure
10215
    instance.disks = new_disks
10216
    instance.disk_template = constants.DT_PLAIN
10217
    self.cfg.Update(instance, feedback_fn)
10218

    
10219
    feedback_fn("Removing volumes on the secondary node...")
10220
    for disk in old_disks:
10221
      self.cfg.SetDiskID(disk, snode)
10222
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10223
      if msg:
10224
        self.LogWarning("Could not remove block device %s on node %s,"
10225
                        " continuing anyway: %s", disk.iv_name, snode, msg)
10226

    
10227
    feedback_fn("Removing unneeded volumes on the primary node...")
10228
    for idx, disk in enumerate(old_disks):
10229
      meta = disk.children[1]
10230
      self.cfg.SetDiskID(meta, pnode)
10231
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10232
      if msg:
10233
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
10234
                        " continuing anyway: %s", idx, pnode, msg)
10235

    
10236
  def Exec(self, feedback_fn):
10237
    """Modifies an instance.
10238

10239
    All parameters take effect only at the next restart of the instance.
10240

10241
    """
10242
    # Process here the warnings from CheckPrereq, as we don't have a
10243
    # feedback_fn there.
10244
    for warn in self.warn:
10245
      feedback_fn("WARNING: %s" % warn)
10246

    
10247
    result = []
10248
    instance = self.instance
10249
    # disk changes
10250
    for disk_op, disk_dict in self.op.disks:
10251
      if disk_op == constants.DDM_REMOVE:
10252
        # remove the last disk
10253
        device = instance.disks.pop()
10254
        device_idx = len(instance.disks)
10255
        for node, disk in device.ComputeNodeTree(instance.primary_node):
10256
          self.cfg.SetDiskID(disk, node)
10257
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10258
          if msg:
10259
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
10260
                            " continuing anyway", device_idx, node, msg)
10261
        result.append(("disk/%d" % device_idx, "remove"))
10262
      elif disk_op == constants.DDM_ADD:
10263
        # add a new disk
10264
        if instance.disk_template in (constants.DT_FILE,
10265
                                        constants.DT_SHARED_FILE):
10266
          file_driver, file_path = instance.disks[0].logical_id
10267
          file_path = os.path.dirname(file_path)
10268
        else:
10269
          file_driver = file_path = None
10270
        disk_idx_base = len(instance.disks)
10271
        new_disk = _GenerateDiskTemplate(self,
10272
                                         instance.disk_template,
10273
                                         instance.name, instance.primary_node,
10274
                                         instance.secondary_nodes,
10275
                                         [disk_dict],
10276
                                         file_path,
10277
                                         file_driver,
10278
                                         disk_idx_base, feedback_fn)[0]
10279
        instance.disks.append(new_disk)
10280
        info = _GetInstanceInfoText(instance)
10281

    
10282
        logging.info("Creating volume %s for instance %s",
10283
                     new_disk.iv_name, instance.name)
10284
        # Note: this needs to be kept in sync with _CreateDisks
10285
        #HARDCODE
10286
        for node in instance.all_nodes:
10287
          f_create = node == instance.primary_node
10288
          try:
10289
            _CreateBlockDev(self, node, instance, new_disk,
10290
                            f_create, info, f_create)
10291
          except errors.OpExecError, err:
10292
            self.LogWarning("Failed to create volume %s (%s) on"
10293
                            " node %s: %s",
10294
                            new_disk.iv_name, new_disk, node, err)
10295
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10296
                       (new_disk.size, new_disk.mode)))
10297
      else:
10298
        # change a given disk
10299
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10300
        result.append(("disk.mode/%d" % disk_op,
10301
                       disk_dict[constants.IDISK_MODE]))
10302

    
10303
    if self.op.disk_template:
10304
      r_shut = _ShutdownInstanceDisks(self, instance)
10305
      if not r_shut:
10306
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10307
                                 " proceed with disk template conversion")
10308
      mode = (instance.disk_template, self.op.disk_template)
10309
      try:
10310
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
10311
      except:
10312
        self.cfg.ReleaseDRBDMinors(instance.name)
10313
        raise
10314
      result.append(("disk_template", self.op.disk_template))
10315

    
10316
    # NIC changes
10317
    for nic_op, nic_dict in self.op.nics:
10318
      if nic_op == constants.DDM_REMOVE:
10319
        # remove the last nic
10320
        del instance.nics[-1]
10321
        result.append(("nic.%d" % len(instance.nics), "remove"))
10322
      elif nic_op == constants.DDM_ADD:
10323
        # mac and bridge should be set, by now
10324
        mac = nic_dict[constants.INIC_MAC]
10325
        ip = nic_dict.get(constants.INIC_IP, None)
10326
        nicparams = self.nic_pinst[constants.DDM_ADD]
10327
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10328
        instance.nics.append(new_nic)
10329
        result.append(("nic.%d" % (len(instance.nics) - 1),
10330
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
10331
                       (new_nic.mac, new_nic.ip,
10332
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10333
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10334
                       )))
10335
      else:
10336
        for key in (constants.INIC_MAC, constants.INIC_IP):
10337
          if key in nic_dict:
10338
            setattr(instance.nics[nic_op], key, nic_dict[key])
10339
        if nic_op in self.nic_pinst:
10340
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10341
        for key, val in nic_dict.iteritems():
10342
          result.append(("nic.%s/%d" % (key, nic_op), val))
10343

    
10344
    # hvparams changes
10345
    if self.op.hvparams:
10346
      instance.hvparams = self.hv_inst
10347
      for key, val in self.op.hvparams.iteritems():
10348
        result.append(("hv/%s" % key, val))
10349

    
10350
    # beparams changes
10351
    if self.op.beparams:
10352
      instance.beparams = self.be_inst
10353
      for key, val in self.op.beparams.iteritems():
10354
        result.append(("be/%s" % key, val))
10355

    
10356
    # OS change
10357
    if self.op.os_name:
10358
      instance.os = self.op.os_name
10359

    
10360
    # osparams changes
10361
    if self.op.osparams:
10362
      instance.osparams = self.os_inst
10363
      for key, val in self.op.osparams.iteritems():
10364
        result.append(("os/%s" % key, val))
10365

    
10366
    self.cfg.Update(instance, feedback_fn)
10367

    
10368
    return result
10369

    
10370
  _DISK_CONVERSIONS = {
10371
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10372
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10373
    }
10374

    
10375

    
10376
class LUBackupQuery(NoHooksLU):
10377
  """Query the exports list
10378

10379
  """
10380
  REQ_BGL = False
10381

    
10382
  def ExpandNames(self):
10383
    self.needed_locks = {}
10384
    self.share_locks[locking.LEVEL_NODE] = 1
10385
    if not self.op.nodes:
10386
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10387
    else:
10388
      self.needed_locks[locking.LEVEL_NODE] = \
10389
        _GetWantedNodes(self, self.op.nodes)
10390

    
10391
  def Exec(self, feedback_fn):
10392
    """Compute the list of all the exported system images.
10393

10394
    @rtype: dict
10395
    @return: a dictionary with the structure node->(export-list)
10396
        where export-list is a list of the instances exported on
10397
        that node.
10398

10399
    """
10400
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
10401
    rpcresult = self.rpc.call_export_list(self.nodes)
10402
    result = {}
10403
    for node in rpcresult:
10404
      if rpcresult[node].fail_msg:
10405
        result[node] = False
10406
      else:
10407
        result[node] = rpcresult[node].payload
10408

    
10409
    return result
10410

    
10411

    
10412
class LUBackupPrepare(NoHooksLU):
10413
  """Prepares an instance for an export and returns useful information.
10414

10415
  """
10416
  REQ_BGL = False
10417

    
10418
  def ExpandNames(self):
10419
    self._ExpandAndLockInstance()
10420

    
10421
  def CheckPrereq(self):
10422
    """Check prerequisites.
10423

10424
    """
10425
    instance_name = self.op.instance_name
10426

    
10427
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10428
    assert self.instance is not None, \
10429
          "Cannot retrieve locked instance %s" % self.op.instance_name
10430
    _CheckNodeOnline(self, self.instance.primary_node)
10431

    
10432
    self._cds = _GetClusterDomainSecret()
10433

    
10434
  def Exec(self, feedback_fn):
10435
    """Prepares an instance for an export.
10436

10437
    """
10438
    instance = self.instance
10439

    
10440
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10441
      salt = utils.GenerateSecret(8)
10442

    
10443
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10444
      result = self.rpc.call_x509_cert_create(instance.primary_node,
10445
                                              constants.RIE_CERT_VALIDITY)
10446
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
10447

    
10448
      (name, cert_pem) = result.payload
10449

    
10450
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10451
                                             cert_pem)
10452

    
10453
      return {
10454
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10455
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10456
                          salt),
10457
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10458
        }
10459

    
10460
    return None
10461

    
10462

    
10463
class LUBackupExport(LogicalUnit):
10464
  """Export an instance to an image in the cluster.
10465

10466
  """
10467
  HPATH = "instance-export"
10468
  HTYPE = constants.HTYPE_INSTANCE
10469
  REQ_BGL = False
10470

    
10471
  def CheckArguments(self):
10472
    """Check the arguments.
10473

10474
    """
10475
    self.x509_key_name = self.op.x509_key_name
10476
    self.dest_x509_ca_pem = self.op.destination_x509_ca
10477

    
10478
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10479
      if not self.x509_key_name:
10480
        raise errors.OpPrereqError("Missing X509 key name for encryption",
10481
                                   errors.ECODE_INVAL)
10482

    
10483
      if not self.dest_x509_ca_pem:
10484
        raise errors.OpPrereqError("Missing destination X509 CA",
10485
                                   errors.ECODE_INVAL)
10486

    
10487
  def ExpandNames(self):
10488
    self._ExpandAndLockInstance()
10489

    
10490
    # Lock all nodes for local exports
10491
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10492
      # FIXME: lock only instance primary and destination node
10493
      #
10494
      # Sad but true, for now we have do lock all nodes, as we don't know where
10495
      # the previous export might be, and in this LU we search for it and
10496
      # remove it from its current node. In the future we could fix this by:
10497
      #  - making a tasklet to search (share-lock all), then create the
10498
      #    new one, then one to remove, after
10499
      #  - removing the removal operation altogether
10500
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10501

    
10502
  def DeclareLocks(self, level):
10503
    """Last minute lock declaration."""
10504
    # All nodes are locked anyway, so nothing to do here.
10505

    
10506
  def BuildHooksEnv(self):
10507
    """Build hooks env.
10508

10509
    This will run on the master, primary node and target node.
10510

10511
    """
10512
    env = {
10513
      "EXPORT_MODE": self.op.mode,
10514
      "EXPORT_NODE": self.op.target_node,
10515
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10516
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10517
      # TODO: Generic function for boolean env variables
10518
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10519
      }
10520

    
10521
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10522

    
10523
    return env
10524

    
10525
  def BuildHooksNodes(self):
10526
    """Build hooks nodes.
10527

10528
    """
10529
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10530

    
10531
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10532
      nl.append(self.op.target_node)
10533

    
10534
    return (nl, nl)
10535

    
10536
  def CheckPrereq(self):
10537
    """Check prerequisites.
10538

10539
    This checks that the instance and node names are valid.
10540

10541
    """
10542
    instance_name = self.op.instance_name
10543

    
10544
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10545
    assert self.instance is not None, \
10546
          "Cannot retrieve locked instance %s" % self.op.instance_name
10547
    _CheckNodeOnline(self, self.instance.primary_node)
10548

    
10549
    if (self.op.remove_instance and self.instance.admin_up and
10550
        not self.op.shutdown):
10551
      raise errors.OpPrereqError("Can not remove instance without shutting it"
10552
                                 " down before")
10553

    
10554
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10555
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10556
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10557
      assert self.dst_node is not None
10558

    
10559
      _CheckNodeOnline(self, self.dst_node.name)
10560
      _CheckNodeNotDrained(self, self.dst_node.name)
10561

    
10562
      self._cds = None
10563
      self.dest_disk_info = None
10564
      self.dest_x509_ca = None
10565

    
10566
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10567
      self.dst_node = None
10568

    
10569
      if len(self.op.target_node) != len(self.instance.disks):
10570
        raise errors.OpPrereqError(("Received destination information for %s"
10571
                                    " disks, but instance %s has %s disks") %
10572
                                   (len(self.op.target_node), instance_name,
10573
                                    len(self.instance.disks)),
10574
                                   errors.ECODE_INVAL)
10575

    
10576
      cds = _GetClusterDomainSecret()
10577

    
10578
      # Check X509 key name
10579
      try:
10580
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10581
      except (TypeError, ValueError), err:
10582
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10583

    
10584
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10585
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10586
                                   errors.ECODE_INVAL)
10587

    
10588
      # Load and verify CA
10589
      try:
10590
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10591
      except OpenSSL.crypto.Error, err:
10592
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10593
                                   (err, ), errors.ECODE_INVAL)
10594

    
10595
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10596
      if errcode is not None:
10597
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10598
                                   (msg, ), errors.ECODE_INVAL)
10599

    
10600
      self.dest_x509_ca = cert
10601

    
10602
      # Verify target information
10603
      disk_info = []
10604
      for idx, disk_data in enumerate(self.op.target_node):
10605
        try:
10606
          (host, port, magic) = \
10607
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10608
        except errors.GenericError, err:
10609
          raise errors.OpPrereqError("Target info for disk %s: %s" %
10610
                                     (idx, err), errors.ECODE_INVAL)
10611

    
10612
        disk_info.append((host, port, magic))
10613

    
10614
      assert len(disk_info) == len(self.op.target_node)
10615
      self.dest_disk_info = disk_info
10616

    
10617
    else:
10618
      raise errors.ProgrammerError("Unhandled export mode %r" %
10619
                                   self.op.mode)
10620

    
10621
    # instance disk type verification
10622
    # TODO: Implement export support for file-based disks
10623
    for disk in self.instance.disks:
10624
      if disk.dev_type == constants.LD_FILE:
10625
        raise errors.OpPrereqError("Export not supported for instances with"
10626
                                   " file-based disks", errors.ECODE_INVAL)
10627

    
10628
  def _CleanupExports(self, feedback_fn):
10629
    """Removes exports of current instance from all other nodes.
10630

10631
    If an instance in a cluster with nodes A..D was exported to node C, its
10632
    exports will be removed from the nodes A, B and D.
10633

10634
    """
10635
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
10636

    
10637
    nodelist = self.cfg.GetNodeList()
10638
    nodelist.remove(self.dst_node.name)
10639

    
10640
    # on one-node clusters nodelist will be empty after the removal
10641
    # if we proceed the backup would be removed because OpBackupQuery
10642
    # substitutes an empty list with the full cluster node list.
10643
    iname = self.instance.name
10644
    if nodelist:
10645
      feedback_fn("Removing old exports for instance %s" % iname)
10646
      exportlist = self.rpc.call_export_list(nodelist)
10647
      for node in exportlist:
10648
        if exportlist[node].fail_msg:
10649
          continue
10650
        if iname in exportlist[node].payload:
10651
          msg = self.rpc.call_export_remove(node, iname).fail_msg
10652
          if msg:
10653
            self.LogWarning("Could not remove older export for instance %s"
10654
                            " on node %s: %s", iname, node, msg)
10655

    
10656
  def Exec(self, feedback_fn):
10657
    """Export an instance to an image in the cluster.
10658

10659
    """
10660
    assert self.op.mode in constants.EXPORT_MODES
10661

    
10662
    instance = self.instance
10663
    src_node = instance.primary_node
10664

    
10665
    if self.op.shutdown:
10666
      # shutdown the instance, but not the disks
10667
      feedback_fn("Shutting down instance %s" % instance.name)
10668
      result = self.rpc.call_instance_shutdown(src_node, instance,
10669
                                               self.op.shutdown_timeout)
10670
      # TODO: Maybe ignore failures if ignore_remove_failures is set
10671
      result.Raise("Could not shutdown instance %s on"
10672
                   " node %s" % (instance.name, src_node))
10673

    
10674
    # set the disks ID correctly since call_instance_start needs the
10675
    # correct drbd minor to create the symlinks
10676
    for disk in instance.disks:
10677
      self.cfg.SetDiskID(disk, src_node)
10678

    
10679
    activate_disks = (not instance.admin_up)
10680

    
10681
    if activate_disks:
10682
      # Activate the instance disks if we'exporting a stopped instance
10683
      feedback_fn("Activating disks for %s" % instance.name)
10684
      _StartInstanceDisks(self, instance, None)
10685

    
10686
    try:
10687
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10688
                                                     instance)
10689

    
10690
      helper.CreateSnapshots()
10691
      try:
10692
        if (self.op.shutdown and instance.admin_up and
10693
            not self.op.remove_instance):
10694
          assert not activate_disks
10695
          feedback_fn("Starting instance %s" % instance.name)
10696
          result = self.rpc.call_instance_start(src_node, instance, None, None)
10697
          msg = result.fail_msg
10698
          if msg:
10699
            feedback_fn("Failed to start instance: %s" % msg)
10700
            _ShutdownInstanceDisks(self, instance)
10701
            raise errors.OpExecError("Could not start instance: %s" % msg)
10702

    
10703
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
10704
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10705
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10706
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
10707
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10708

    
10709
          (key_name, _, _) = self.x509_key_name
10710

    
10711
          dest_ca_pem = \
10712
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10713
                                            self.dest_x509_ca)
10714

    
10715
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10716
                                                     key_name, dest_ca_pem,
10717
                                                     timeouts)
10718
      finally:
10719
        helper.Cleanup()
10720

    
10721
      # Check for backwards compatibility
10722
      assert len(dresults) == len(instance.disks)
10723
      assert compat.all(isinstance(i, bool) for i in dresults), \
10724
             "Not all results are boolean: %r" % dresults
10725

    
10726
    finally:
10727
      if activate_disks:
10728
        feedback_fn("Deactivating disks for %s" % instance.name)
10729
        _ShutdownInstanceDisks(self, instance)
10730

    
10731
    if not (compat.all(dresults) and fin_resu):
10732
      failures = []
10733
      if not fin_resu:
10734
        failures.append("export finalization")
10735
      if not compat.all(dresults):
10736
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10737
                               if not dsk)
10738
        failures.append("disk export: disk(s) %s" % fdsk)
10739

    
10740
      raise errors.OpExecError("Export failed, errors in %s" %
10741
                               utils.CommaJoin(failures))
10742

    
10743
    # At this point, the export was successful, we can cleanup/finish
10744

    
10745
    # Remove instance if requested
10746
    if self.op.remove_instance:
10747
      feedback_fn("Removing instance %s" % instance.name)
10748
      _RemoveInstance(self, feedback_fn, instance,
10749
                      self.op.ignore_remove_failures)
10750

    
10751
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10752
      self._CleanupExports(feedback_fn)
10753

    
10754
    return fin_resu, dresults
10755

    
10756

    
10757
class LUBackupRemove(NoHooksLU):
10758
  """Remove exports related to the named instance.
10759

10760
  """
10761
  REQ_BGL = False
10762

    
10763
  def ExpandNames(self):
10764
    self.needed_locks = {}
10765
    # We need all nodes to be locked in order for RemoveExport to work, but we
10766
    # don't need to lock the instance itself, as nothing will happen to it (and
10767
    # we can remove exports also for a removed instance)
10768
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10769

    
10770
  def Exec(self, feedback_fn):
10771
    """Remove any export.
10772

10773
    """
10774
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10775
    # If the instance was not found we'll try with the name that was passed in.
10776
    # This will only work if it was an FQDN, though.
10777
    fqdn_warn = False
10778
    if not instance_name:
10779
      fqdn_warn = True
10780
      instance_name = self.op.instance_name
10781

    
10782
    locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
10783
    exportlist = self.rpc.call_export_list(locked_nodes)
10784
    found = False
10785
    for node in exportlist:
10786
      msg = exportlist[node].fail_msg
10787
      if msg:
10788
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10789
        continue
10790
      if instance_name in exportlist[node].payload:
10791
        found = True
10792
        result = self.rpc.call_export_remove(node, instance_name)
10793
        msg = result.fail_msg
10794
        if msg:
10795
          logging.error("Could not remove export for instance %s"
10796
                        " on node %s: %s", instance_name, node, msg)
10797

    
10798
    if fqdn_warn and not found:
10799
      feedback_fn("Export not found. If trying to remove an export belonging"
10800
                  " to a deleted instance please use its Fully Qualified"
10801
                  " Domain Name.")
10802

    
10803

    
10804
class LUGroupAdd(LogicalUnit):
10805
  """Logical unit for creating node groups.
10806

10807
  """
10808
  HPATH = "group-add"
10809
  HTYPE = constants.HTYPE_GROUP
10810
  REQ_BGL = False
10811

    
10812
  def ExpandNames(self):
10813
    # We need the new group's UUID here so that we can create and acquire the
10814
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10815
    # that it should not check whether the UUID exists in the configuration.
10816
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10817
    self.needed_locks = {}
10818
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10819

    
10820
  def CheckPrereq(self):
10821
    """Check prerequisites.
10822

10823
    This checks that the given group name is not an existing node group
10824
    already.
10825

10826
    """
10827
    try:
10828
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10829
    except errors.OpPrereqError:
10830
      pass
10831
    else:
10832
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10833
                                 " node group (UUID: %s)" %
10834
                                 (self.op.group_name, existing_uuid),
10835
                                 errors.ECODE_EXISTS)
10836

    
10837
    if self.op.ndparams:
10838
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10839

    
10840
  def BuildHooksEnv(self):
10841
    """Build hooks env.
10842

10843
    """
10844
    return {
10845
      "GROUP_NAME": self.op.group_name,
10846
      }
10847

    
10848
  def BuildHooksNodes(self):
10849
    """Build hooks nodes.
10850

10851
    """
10852
    mn = self.cfg.GetMasterNode()
10853
    return ([mn], [mn])
10854

    
10855
  def Exec(self, feedback_fn):
10856
    """Add the node group to the cluster.
10857

10858
    """
10859
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10860
                                  uuid=self.group_uuid,
10861
                                  alloc_policy=self.op.alloc_policy,
10862
                                  ndparams=self.op.ndparams)
10863

    
10864
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10865
    del self.remove_locks[locking.LEVEL_NODEGROUP]
10866

    
10867

    
10868
class LUGroupAssignNodes(NoHooksLU):
10869
  """Logical unit for assigning nodes to groups.
10870

10871
  """
10872
  REQ_BGL = False
10873

    
10874
  def ExpandNames(self):
10875
    # These raise errors.OpPrereqError on their own:
10876
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10877
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10878

    
10879
    # We want to lock all the affected nodes and groups. We have readily
10880
    # available the list of nodes, and the *destination* group. To gather the
10881
    # list of "source" groups, we need to fetch node information.
10882
    self.node_data = self.cfg.GetAllNodesInfo()
10883
    affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10884
    affected_groups.add(self.group_uuid)
10885

    
10886
    self.needed_locks = {
10887
      locking.LEVEL_NODEGROUP: list(affected_groups),
10888
      locking.LEVEL_NODE: self.op.nodes,
10889
      }
10890

    
10891
  def CheckPrereq(self):
10892
    """Check prerequisites.
10893

10894
    """
10895
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10896
    instance_data = self.cfg.GetAllInstancesInfo()
10897

    
10898
    if self.group is None:
10899
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10900
                               (self.op.group_name, self.group_uuid))
10901

    
10902
    (new_splits, previous_splits) = \
10903
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10904
                                             for node in self.op.nodes],
10905
                                            self.node_data, instance_data)
10906

    
10907
    if new_splits:
10908
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10909

    
10910
      if not self.op.force:
10911
        raise errors.OpExecError("The following instances get split by this"
10912
                                 " change and --force was not given: %s" %
10913
                                 fmt_new_splits)
10914
      else:
10915
        self.LogWarning("This operation will split the following instances: %s",
10916
                        fmt_new_splits)
10917

    
10918
        if previous_splits:
10919
          self.LogWarning("In addition, these already-split instances continue"
10920
                          " to be split across groups: %s",
10921
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
10922

    
10923
  def Exec(self, feedback_fn):
10924
    """Assign nodes to a new group.
10925

10926
    """
10927
    for node in self.op.nodes:
10928
      self.node_data[node].group = self.group_uuid
10929

    
10930
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10931

    
10932
  @staticmethod
10933
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10934
    """Check for split instances after a node assignment.
10935

10936
    This method considers a series of node assignments as an atomic operation,
10937
    and returns information about split instances after applying the set of
10938
    changes.
10939

10940
    In particular, it returns information about newly split instances, and
10941
    instances that were already split, and remain so after the change.
10942

10943
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10944
    considered.
10945

10946
    @type changes: list of (node_name, new_group_uuid) pairs.
10947
    @param changes: list of node assignments to consider.
10948
    @param node_data: a dict with data for all nodes
10949
    @param instance_data: a dict with all instances to consider
10950
    @rtype: a two-tuple
10951
    @return: a list of instances that were previously okay and result split as a
10952
      consequence of this change, and a list of instances that were previously
10953
      split and this change does not fix.
10954

10955
    """
10956
    changed_nodes = dict((node, group) for node, group in changes
10957
                         if node_data[node].group != group)
10958

    
10959
    all_split_instances = set()
10960
    previously_split_instances = set()
10961

    
10962
    def InstanceNodes(instance):
10963
      return [instance.primary_node] + list(instance.secondary_nodes)
10964

    
10965
    for inst in instance_data.values():
10966
      if inst.disk_template not in constants.DTS_INT_MIRROR:
10967
        continue
10968

    
10969
      instance_nodes = InstanceNodes(inst)
10970

    
10971
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
10972
        previously_split_instances.add(inst.name)
10973

    
10974
      if len(set(changed_nodes.get(node, node_data[node].group)
10975
                 for node in instance_nodes)) > 1:
10976
        all_split_instances.add(inst.name)
10977

    
10978
    return (list(all_split_instances - previously_split_instances),
10979
            list(previously_split_instances & all_split_instances))
10980

    
10981

    
10982
class _GroupQuery(_QueryBase):
10983
  FIELDS = query.GROUP_FIELDS
10984

    
10985
  def ExpandNames(self, lu):
10986
    lu.needed_locks = {}
10987

    
10988
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10989
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10990

    
10991
    if not self.names:
10992
      self.wanted = [name_to_uuid[name]
10993
                     for name in utils.NiceSort(name_to_uuid.keys())]
10994
    else:
10995
      # Accept names to be either names or UUIDs.
10996
      missing = []
10997
      self.wanted = []
10998
      all_uuid = frozenset(self._all_groups.keys())
10999

    
11000
      for name in self.names:
11001
        if name in all_uuid:
11002
          self.wanted.append(name)
11003
        elif name in name_to_uuid:
11004
          self.wanted.append(name_to_uuid[name])
11005
        else:
11006
          missing.append(name)
11007

    
11008
      if missing:
11009
        raise errors.OpPrereqError("Some groups do not exist: %s" %
11010
                                   utils.CommaJoin(missing),
11011
                                   errors.ECODE_NOENT)
11012

    
11013
  def DeclareLocks(self, lu, level):
11014
    pass
11015

    
11016
  def _GetQueryData(self, lu):
11017
    """Computes the list of node groups and their attributes.
11018

11019
    """
11020
    do_nodes = query.GQ_NODE in self.requested_data
11021
    do_instances = query.GQ_INST in self.requested_data
11022

    
11023
    group_to_nodes = None
11024
    group_to_instances = None
11025

    
11026
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11027
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11028
    # latter GetAllInstancesInfo() is not enough, for we have to go through
11029
    # instance->node. Hence, we will need to process nodes even if we only need
11030
    # instance information.
11031
    if do_nodes or do_instances:
11032
      all_nodes = lu.cfg.GetAllNodesInfo()
11033
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11034
      node_to_group = {}
11035

    
11036
      for node in all_nodes.values():
11037
        if node.group in group_to_nodes:
11038
          group_to_nodes[node.group].append(node.name)
11039
          node_to_group[node.name] = node.group
11040

    
11041
      if do_instances:
11042
        all_instances = lu.cfg.GetAllInstancesInfo()
11043
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
11044

    
11045
        for instance in all_instances.values():
11046
          node = instance.primary_node
11047
          if node in node_to_group:
11048
            group_to_instances[node_to_group[node]].append(instance.name)
11049

    
11050
        if not do_nodes:
11051
          # Do not pass on node information if it was not requested.
11052
          group_to_nodes = None
11053

    
11054
    return query.GroupQueryData([self._all_groups[uuid]
11055
                                 for uuid in self.wanted],
11056
                                group_to_nodes, group_to_instances)
11057

    
11058

    
11059
class LUGroupQuery(NoHooksLU):
11060
  """Logical unit for querying node groups.
11061

11062
  """
11063
  REQ_BGL = False
11064

    
11065
  def CheckArguments(self):
11066
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11067
                          self.op.output_fields, False)
11068

    
11069
  def ExpandNames(self):
11070
    self.gq.ExpandNames(self)
11071

    
11072
  def Exec(self, feedback_fn):
11073
    return self.gq.OldStyleQuery(self)
11074

    
11075

    
11076
class LUGroupSetParams(LogicalUnit):
11077
  """Modifies the parameters of a node group.
11078

11079
  """
11080
  HPATH = "group-modify"
11081
  HTYPE = constants.HTYPE_GROUP
11082
  REQ_BGL = False
11083

    
11084
  def CheckArguments(self):
11085
    all_changes = [
11086
      self.op.ndparams,
11087
      self.op.alloc_policy,
11088
      ]
11089

    
11090
    if all_changes.count(None) == len(all_changes):
11091
      raise errors.OpPrereqError("Please pass at least one modification",
11092
                                 errors.ECODE_INVAL)
11093

    
11094
  def ExpandNames(self):
11095
    # This raises errors.OpPrereqError on its own:
11096
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11097

    
11098
    self.needed_locks = {
11099
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11100
      }
11101

    
11102
  def CheckPrereq(self):
11103
    """Check prerequisites.
11104

11105
    """
11106
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11107

    
11108
    if self.group is None:
11109
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11110
                               (self.op.group_name, self.group_uuid))
11111

    
11112
    if self.op.ndparams:
11113
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11114
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11115
      self.new_ndparams = new_ndparams
11116

    
11117
  def BuildHooksEnv(self):
11118
    """Build hooks env.
11119

11120
    """
11121
    return {
11122
      "GROUP_NAME": self.op.group_name,
11123
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
11124
      }
11125

    
11126
  def BuildHooksNodes(self):
11127
    """Build hooks nodes.
11128

11129
    """
11130
    mn = self.cfg.GetMasterNode()
11131
    return ([mn], [mn])
11132

    
11133
  def Exec(self, feedback_fn):
11134
    """Modifies the node group.
11135

11136
    """
11137
    result = []
11138

    
11139
    if self.op.ndparams:
11140
      self.group.ndparams = self.new_ndparams
11141
      result.append(("ndparams", str(self.group.ndparams)))
11142

    
11143
    if self.op.alloc_policy:
11144
      self.group.alloc_policy = self.op.alloc_policy
11145

    
11146
    self.cfg.Update(self.group, feedback_fn)
11147
    return result
11148

    
11149

    
11150

    
11151
class LUGroupRemove(LogicalUnit):
11152
  HPATH = "group-remove"
11153
  HTYPE = constants.HTYPE_GROUP
11154
  REQ_BGL = False
11155

    
11156
  def ExpandNames(self):
11157
    # This will raises errors.OpPrereqError on its own:
11158
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11159
    self.needed_locks = {
11160
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11161
      }
11162

    
11163
  def CheckPrereq(self):
11164
    """Check prerequisites.
11165

11166
    This checks that the given group name exists as a node group, that is
11167
    empty (i.e., contains no nodes), and that is not the last group of the
11168
    cluster.
11169

11170
    """
11171
    # Verify that the group is empty.
11172
    group_nodes = [node.name
11173
                   for node in self.cfg.GetAllNodesInfo().values()
11174
                   if node.group == self.group_uuid]
11175

    
11176
    if group_nodes:
11177
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
11178
                                 " nodes: %s" %
11179
                                 (self.op.group_name,
11180
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
11181
                                 errors.ECODE_STATE)
11182

    
11183
    # Verify the cluster would not be left group-less.
11184
    if len(self.cfg.GetNodeGroupList()) == 1:
11185
      raise errors.OpPrereqError("Group '%s' is the only group,"
11186
                                 " cannot be removed" %
11187
                                 self.op.group_name,
11188
                                 errors.ECODE_STATE)
11189

    
11190
  def BuildHooksEnv(self):
11191
    """Build hooks env.
11192

11193
    """
11194
    return {
11195
      "GROUP_NAME": self.op.group_name,
11196
      }
11197

    
11198
  def BuildHooksNodes(self):
11199
    """Build hooks nodes.
11200

11201
    """
11202
    mn = self.cfg.GetMasterNode()
11203
    return ([mn], [mn])
11204

    
11205
  def Exec(self, feedback_fn):
11206
    """Remove the node group.
11207

11208
    """
11209
    try:
11210
      self.cfg.RemoveNodeGroup(self.group_uuid)
11211
    except errors.ConfigurationError:
11212
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11213
                               (self.op.group_name, self.group_uuid))
11214

    
11215
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11216

    
11217

    
11218
class LUGroupRename(LogicalUnit):
11219
  HPATH = "group-rename"
11220
  HTYPE = constants.HTYPE_GROUP
11221
  REQ_BGL = False
11222

    
11223
  def ExpandNames(self):
11224
    # This raises errors.OpPrereqError on its own:
11225
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11226

    
11227
    self.needed_locks = {
11228
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11229
      }
11230

    
11231
  def CheckPrereq(self):
11232
    """Check prerequisites.
11233

11234
    Ensures requested new name is not yet used.
11235

11236
    """
11237
    try:
11238
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11239
    except errors.OpPrereqError:
11240
      pass
11241
    else:
11242
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11243
                                 " node group (UUID: %s)" %
11244
                                 (self.op.new_name, new_name_uuid),
11245
                                 errors.ECODE_EXISTS)
11246

    
11247
  def BuildHooksEnv(self):
11248
    """Build hooks env.
11249

11250
    """
11251
    return {
11252
      "OLD_NAME": self.op.group_name,
11253
      "NEW_NAME": self.op.new_name,
11254
      }
11255

    
11256
  def BuildHooksNodes(self):
11257
    """Build hooks nodes.
11258

11259
    """
11260
    mn = self.cfg.GetMasterNode()
11261

    
11262
    all_nodes = self.cfg.GetAllNodesInfo()
11263
    all_nodes.pop(mn, None)
11264

    
11265
    run_nodes = [mn]
11266
    run_nodes.extend(node.name for node in all_nodes.values()
11267
                     if node.group == self.group_uuid)
11268

    
11269
    return (run_nodes, run_nodes)
11270

    
11271
  def Exec(self, feedback_fn):
11272
    """Rename the node group.
11273

11274
    """
11275
    group = self.cfg.GetNodeGroup(self.group_uuid)
11276

    
11277
    if group is None:
11278
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11279
                               (self.op.group_name, self.group_uuid))
11280

    
11281
    group.name = self.op.new_name
11282
    self.cfg.Update(group, feedback_fn)
11283

    
11284
    return self.op.new_name
11285

    
11286

    
11287
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11288
  """Generic tags LU.
11289

11290
  This is an abstract class which is the parent of all the other tags LUs.
11291

11292
  """
11293
  def ExpandNames(self):
11294
    self.group_uuid = None
11295
    self.needed_locks = {}
11296
    if self.op.kind == constants.TAG_NODE:
11297
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11298
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
11299
    elif self.op.kind == constants.TAG_INSTANCE:
11300
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11301
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11302
    elif self.op.kind == constants.TAG_NODEGROUP:
11303
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11304

    
11305
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11306
    # not possible to acquire the BGL based on opcode parameters)
11307

    
11308
  def CheckPrereq(self):
11309
    """Check prerequisites.
11310

11311
    """
11312
    if self.op.kind == constants.TAG_CLUSTER:
11313
      self.target = self.cfg.GetClusterInfo()
11314
    elif self.op.kind == constants.TAG_NODE:
11315
      self.target = self.cfg.GetNodeInfo(self.op.name)
11316
    elif self.op.kind == constants.TAG_INSTANCE:
11317
      self.target = self.cfg.GetInstanceInfo(self.op.name)
11318
    elif self.op.kind == constants.TAG_NODEGROUP:
11319
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
11320
    else:
11321
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11322
                                 str(self.op.kind), errors.ECODE_INVAL)
11323

    
11324

    
11325
class LUTagsGet(TagsLU):
11326
  """Returns the tags of a given object.
11327

11328
  """
11329
  REQ_BGL = False
11330

    
11331
  def ExpandNames(self):
11332
    TagsLU.ExpandNames(self)
11333

    
11334
    # Share locks as this is only a read operation
11335
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11336

    
11337
  def Exec(self, feedback_fn):
11338
    """Returns the tag list.
11339

11340
    """
11341
    return list(self.target.GetTags())
11342

    
11343

    
11344
class LUTagsSearch(NoHooksLU):
11345
  """Searches the tags for a given pattern.
11346

11347
  """
11348
  REQ_BGL = False
11349

    
11350
  def ExpandNames(self):
11351
    self.needed_locks = {}
11352

    
11353
  def CheckPrereq(self):
11354
    """Check prerequisites.
11355

11356
    This checks the pattern passed for validity by compiling it.
11357

11358
    """
11359
    try:
11360
      self.re = re.compile(self.op.pattern)
11361
    except re.error, err:
11362
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11363
                                 (self.op.pattern, err), errors.ECODE_INVAL)
11364

    
11365
  def Exec(self, feedback_fn):
11366
    """Returns the tag list.
11367

11368
    """
11369
    cfg = self.cfg
11370
    tgts = [("/cluster", cfg.GetClusterInfo())]
11371
    ilist = cfg.GetAllInstancesInfo().values()
11372
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11373
    nlist = cfg.GetAllNodesInfo().values()
11374
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11375
    tgts.extend(("/nodegroup/%s" % n.name, n)
11376
                for n in cfg.GetAllNodeGroupsInfo().values())
11377
    results = []
11378
    for path, target in tgts:
11379
      for tag in target.GetTags():
11380
        if self.re.search(tag):
11381
          results.append((path, tag))
11382
    return results
11383

    
11384

    
11385
class LUTagsSet(TagsLU):
11386
  """Sets a tag on a given object.
11387

11388
  """
11389
  REQ_BGL = False
11390

    
11391
  def CheckPrereq(self):
11392
    """Check prerequisites.
11393

11394
    This checks the type and length of the tag name and value.
11395

11396
    """
11397
    TagsLU.CheckPrereq(self)
11398
    for tag in self.op.tags:
11399
      objects.TaggableObject.ValidateTag(tag)
11400

    
11401
  def Exec(self, feedback_fn):
11402
    """Sets the tag.
11403

11404
    """
11405
    try:
11406
      for tag in self.op.tags:
11407
        self.target.AddTag(tag)
11408
    except errors.TagError, err:
11409
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
11410
    self.cfg.Update(self.target, feedback_fn)
11411

    
11412

    
11413
class LUTagsDel(TagsLU):
11414
  """Delete a list of tags from a given object.
11415

11416
  """
11417
  REQ_BGL = False
11418

    
11419
  def CheckPrereq(self):
11420
    """Check prerequisites.
11421

11422
    This checks that we have the given tag.
11423

11424
    """
11425
    TagsLU.CheckPrereq(self)
11426
    for tag in self.op.tags:
11427
      objects.TaggableObject.ValidateTag(tag)
11428
    del_tags = frozenset(self.op.tags)
11429
    cur_tags = self.target.GetTags()
11430

    
11431
    diff_tags = del_tags - cur_tags
11432
    if diff_tags:
11433
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
11434
      raise errors.OpPrereqError("Tag(s) %s not found" %
11435
                                 (utils.CommaJoin(diff_names), ),
11436
                                 errors.ECODE_NOENT)
11437

    
11438
  def Exec(self, feedback_fn):
11439
    """Remove the tag from the object.
11440

11441
    """
11442
    for tag in self.op.tags:
11443
      self.target.RemoveTag(tag)
11444
    self.cfg.Update(self.target, feedback_fn)
11445

    
11446

    
11447
class LUTestDelay(NoHooksLU):
11448
  """Sleep for a specified amount of time.
11449

11450
  This LU sleeps on the master and/or nodes for a specified amount of
11451
  time.
11452

11453
  """
11454
  REQ_BGL = False
11455

    
11456
  def ExpandNames(self):
11457
    """Expand names and set required locks.
11458

11459
    This expands the node list, if any.
11460

11461
    """
11462
    self.needed_locks = {}
11463
    if self.op.on_nodes:
11464
      # _GetWantedNodes can be used here, but is not always appropriate to use
11465
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11466
      # more information.
11467
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11468
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11469

    
11470
  def _TestDelay(self):
11471
    """Do the actual sleep.
11472

11473
    """
11474
    if self.op.on_master:
11475
      if not utils.TestDelay(self.op.duration):
11476
        raise errors.OpExecError("Error during master delay test")
11477
    if self.op.on_nodes:
11478
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11479
      for node, node_result in result.items():
11480
        node_result.Raise("Failure during rpc call to node %s" % node)
11481

    
11482
  def Exec(self, feedback_fn):
11483
    """Execute the test delay opcode, with the wanted repetitions.
11484

11485
    """
11486
    if self.op.repeat == 0:
11487
      self._TestDelay()
11488
    else:
11489
      top_value = self.op.repeat - 1
11490
      for i in range(self.op.repeat):
11491
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11492
        self._TestDelay()
11493

    
11494

    
11495
class LUTestJqueue(NoHooksLU):
11496
  """Utility LU to test some aspects of the job queue.
11497

11498
  """
11499
  REQ_BGL = False
11500

    
11501
  # Must be lower than default timeout for WaitForJobChange to see whether it
11502
  # notices changed jobs
11503
  _CLIENT_CONNECT_TIMEOUT = 20.0
11504
  _CLIENT_CONFIRM_TIMEOUT = 60.0
11505

    
11506
  @classmethod
11507
  def _NotifyUsingSocket(cls, cb, errcls):
11508
    """Opens a Unix socket and waits for another program to connect.
11509

11510
    @type cb: callable
11511
    @param cb: Callback to send socket name to client
11512
    @type errcls: class
11513
    @param errcls: Exception class to use for errors
11514

11515
    """
11516
    # Using a temporary directory as there's no easy way to create temporary
11517
    # sockets without writing a custom loop around tempfile.mktemp and
11518
    # socket.bind
11519
    tmpdir = tempfile.mkdtemp()
11520
    try:
11521
      tmpsock = utils.PathJoin(tmpdir, "sock")
11522

    
11523
      logging.debug("Creating temporary socket at %s", tmpsock)
11524
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11525
      try:
11526
        sock.bind(tmpsock)
11527
        sock.listen(1)
11528

    
11529
        # Send details to client
11530
        cb(tmpsock)
11531

    
11532
        # Wait for client to connect before continuing
11533
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11534
        try:
11535
          (conn, _) = sock.accept()
11536
        except socket.error, err:
11537
          raise errcls("Client didn't connect in time (%s)" % err)
11538
      finally:
11539
        sock.close()
11540
    finally:
11541
      # Remove as soon as client is connected
11542
      shutil.rmtree(tmpdir)
11543

    
11544
    # Wait for client to close
11545
    try:
11546
      try:
11547
        # pylint: disable-msg=E1101
11548
        # Instance of '_socketobject' has no ... member
11549
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11550
        conn.recv(1)
11551
      except socket.error, err:
11552
        raise errcls("Client failed to confirm notification (%s)" % err)
11553
    finally:
11554
      conn.close()
11555

    
11556
  def _SendNotification(self, test, arg, sockname):
11557
    """Sends a notification to the client.
11558

11559
    @type test: string
11560
    @param test: Test name
11561
    @param arg: Test argument (depends on test)
11562
    @type sockname: string
11563
    @param sockname: Socket path
11564

11565
    """
11566
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11567

    
11568
  def _Notify(self, prereq, test, arg):
11569
    """Notifies the client of a test.
11570

11571
    @type prereq: bool
11572
    @param prereq: Whether this is a prereq-phase test
11573
    @type test: string
11574
    @param test: Test name
11575
    @param arg: Test argument (depends on test)
11576

11577
    """
11578
    if prereq:
11579
      errcls = errors.OpPrereqError
11580
    else:
11581
      errcls = errors.OpExecError
11582

    
11583
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11584
                                                  test, arg),
11585
                                   errcls)
11586

    
11587
  def CheckArguments(self):
11588
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11589
    self.expandnames_calls = 0
11590

    
11591
  def ExpandNames(self):
11592
    checkargs_calls = getattr(self, "checkargs_calls", 0)
11593
    if checkargs_calls < 1:
11594
      raise errors.ProgrammerError("CheckArguments was not called")
11595

    
11596
    self.expandnames_calls += 1
11597

    
11598
    if self.op.notify_waitlock:
11599
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
11600

    
11601
    self.LogInfo("Expanding names")
11602

    
11603
    # Get lock on master node (just to get a lock, not for a particular reason)
11604
    self.needed_locks = {
11605
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11606
      }
11607

    
11608
  def Exec(self, feedback_fn):
11609
    if self.expandnames_calls < 1:
11610
      raise errors.ProgrammerError("ExpandNames was not called")
11611

    
11612
    if self.op.notify_exec:
11613
      self._Notify(False, constants.JQT_EXEC, None)
11614

    
11615
    self.LogInfo("Executing")
11616

    
11617
    if self.op.log_messages:
11618
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11619
      for idx, msg in enumerate(self.op.log_messages):
11620
        self.LogInfo("Sending log message %s", idx + 1)
11621
        feedback_fn(constants.JQT_MSGPREFIX + msg)
11622
        # Report how many test messages have been sent
11623
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11624

    
11625
    if self.op.fail:
11626
      raise errors.OpExecError("Opcode failure was requested")
11627

    
11628
    return True
11629

    
11630

    
11631
class IAllocator(object):
11632
  """IAllocator framework.
11633

11634
  An IAllocator instance has three sets of attributes:
11635
    - cfg that is needed to query the cluster
11636
    - input data (all members of the _KEYS class attribute are required)
11637
    - four buffer attributes (in|out_data|text), that represent the
11638
      input (to the external script) in text and data structure format,
11639
      and the output from it, again in two formats
11640
    - the result variables from the script (success, info, nodes) for
11641
      easy usage
11642

11643
  """
11644
  # pylint: disable-msg=R0902
11645
  # lots of instance attributes
11646
  _ALLO_KEYS = [
11647
    "name", "mem_size", "disks", "disk_template",
11648
    "os", "tags", "nics", "vcpus", "hypervisor",
11649
    ]
11650
  _RELO_KEYS = [
11651
    "name", "relocate_from",
11652
    ]
11653
  _EVAC_KEYS = [
11654
    "evac_nodes",
11655
    ]
11656

    
11657
  def __init__(self, cfg, rpc, mode, **kwargs):
11658
    self.cfg = cfg
11659
    self.rpc = rpc
11660
    # init buffer variables
11661
    self.in_text = self.out_text = self.in_data = self.out_data = None
11662
    # init all input fields so that pylint is happy
11663
    self.mode = mode
11664
    self.mem_size = self.disks = self.disk_template = None
11665
    self.os = self.tags = self.nics = self.vcpus = None
11666
    self.hypervisor = None
11667
    self.relocate_from = None
11668
    self.name = None
11669
    self.evac_nodes = None
11670
    # computed fields
11671
    self.required_nodes = None
11672
    # init result fields
11673
    self.success = self.info = self.result = None
11674
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11675
      keyset = self._ALLO_KEYS
11676
      fn = self._AddNewInstance
11677
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11678
      keyset = self._RELO_KEYS
11679
      fn = self._AddRelocateInstance
11680
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11681
      keyset = self._EVAC_KEYS
11682
      fn = self._AddEvacuateNodes
11683
    else:
11684
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11685
                                   " IAllocator" % self.mode)
11686
    for key in kwargs:
11687
      if key not in keyset:
11688
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
11689
                                     " IAllocator" % key)
11690
      setattr(self, key, kwargs[key])
11691

    
11692
    for key in keyset:
11693
      if key not in kwargs:
11694
        raise errors.ProgrammerError("Missing input parameter '%s' to"
11695
                                     " IAllocator" % key)
11696
    self._BuildInputData(fn)
11697

    
11698
  def _ComputeClusterData(self):
11699
    """Compute the generic allocator input data.
11700

11701
    This is the data that is independent of the actual operation.
11702

11703
    """
11704
    cfg = self.cfg
11705
    cluster_info = cfg.GetClusterInfo()
11706
    # cluster data
11707
    data = {
11708
      "version": constants.IALLOCATOR_VERSION,
11709
      "cluster_name": cfg.GetClusterName(),
11710
      "cluster_tags": list(cluster_info.GetTags()),
11711
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11712
      # we don't have job IDs
11713
      }
11714
    ninfo = cfg.GetAllNodesInfo()
11715
    iinfo = cfg.GetAllInstancesInfo().values()
11716
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11717

    
11718
    # node data
11719
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
11720

    
11721
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11722
      hypervisor_name = self.hypervisor
11723
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11724
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11725
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11726
      hypervisor_name = cluster_info.enabled_hypervisors[0]
11727

    
11728
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11729
                                        hypervisor_name)
11730
    node_iinfo = \
11731
      self.rpc.call_all_instances_info(node_list,
11732
                                       cluster_info.enabled_hypervisors)
11733

    
11734
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11735

    
11736
    config_ndata = self._ComputeBasicNodeData(ninfo)
11737
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11738
                                                 i_list, config_ndata)
11739
    assert len(data["nodes"]) == len(ninfo), \
11740
        "Incomplete node data computed"
11741

    
11742
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11743

    
11744
    self.in_data = data
11745

    
11746
  @staticmethod
11747
  def _ComputeNodeGroupData(cfg):
11748
    """Compute node groups data.
11749

11750
    """
11751
    ng = {}
11752
    for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11753
      ng[guuid] = {
11754
        "name": gdata.name,
11755
        "alloc_policy": gdata.alloc_policy,
11756
        }
11757
    return ng
11758

    
11759
  @staticmethod
11760
  def _ComputeBasicNodeData(node_cfg):
11761
    """Compute global node data.
11762

11763
    @rtype: dict
11764
    @returns: a dict of name: (node dict, node config)
11765

11766
    """
11767
    node_results = {}
11768
    for ninfo in node_cfg.values():
11769
      # fill in static (config-based) values
11770
      pnr = {
11771
        "tags": list(ninfo.GetTags()),
11772
        "primary_ip": ninfo.primary_ip,
11773
        "secondary_ip": ninfo.secondary_ip,
11774
        "offline": ninfo.offline,
11775
        "drained": ninfo.drained,
11776
        "master_candidate": ninfo.master_candidate,
11777
        "group": ninfo.group,
11778
        "master_capable": ninfo.master_capable,
11779
        "vm_capable": ninfo.vm_capable,
11780
        }
11781

    
11782
      node_results[ninfo.name] = pnr
11783

    
11784
    return node_results
11785

    
11786
  @staticmethod
11787
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11788
                              node_results):
11789
    """Compute global node data.
11790

11791
    @param node_results: the basic node structures as filled from the config
11792

11793
    """
11794
    # make a copy of the current dict
11795
    node_results = dict(node_results)
11796
    for nname, nresult in node_data.items():
11797
      assert nname in node_results, "Missing basic data for node %s" % nname
11798
      ninfo = node_cfg[nname]
11799

    
11800
      if not (ninfo.offline or ninfo.drained):
11801
        nresult.Raise("Can't get data for node %s" % nname)
11802
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11803
                                nname)
11804
        remote_info = nresult.payload
11805

    
11806
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
11807
                     'vg_size', 'vg_free', 'cpu_total']:
11808
          if attr not in remote_info:
11809
            raise errors.OpExecError("Node '%s' didn't return attribute"
11810
                                     " '%s'" % (nname, attr))
11811
          if not isinstance(remote_info[attr], int):
11812
            raise errors.OpExecError("Node '%s' returned invalid value"
11813
                                     " for '%s': %s" %
11814
                                     (nname, attr, remote_info[attr]))
11815
        # compute memory used by primary instances
11816
        i_p_mem = i_p_up_mem = 0
11817
        for iinfo, beinfo in i_list:
11818
          if iinfo.primary_node == nname:
11819
            i_p_mem += beinfo[constants.BE_MEMORY]
11820
            if iinfo.name not in node_iinfo[nname].payload:
11821
              i_used_mem = 0
11822
            else:
11823
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11824
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11825
            remote_info['memory_free'] -= max(0, i_mem_diff)
11826

    
11827
            if iinfo.admin_up:
11828
              i_p_up_mem += beinfo[constants.BE_MEMORY]
11829

    
11830
        # compute memory used by instances
11831
        pnr_dyn = {
11832
          "total_memory": remote_info['memory_total'],
11833
          "reserved_memory": remote_info['memory_dom0'],
11834
          "free_memory": remote_info['memory_free'],
11835
          "total_disk": remote_info['vg_size'],
11836
          "free_disk": remote_info['vg_free'],
11837
          "total_cpus": remote_info['cpu_total'],
11838
          "i_pri_memory": i_p_mem,
11839
          "i_pri_up_memory": i_p_up_mem,
11840
          }
11841
        pnr_dyn.update(node_results[nname])
11842
        node_results[nname] = pnr_dyn
11843

    
11844
    return node_results
11845

    
11846
  @staticmethod
11847
  def _ComputeInstanceData(cluster_info, i_list):
11848
    """Compute global instance data.
11849

11850
    """
11851
    instance_data = {}
11852
    for iinfo, beinfo in i_list:
11853
      nic_data = []
11854
      for nic in iinfo.nics:
11855
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11856
        nic_dict = {"mac": nic.mac,
11857
                    "ip": nic.ip,
11858
                    "mode": filled_params[constants.NIC_MODE],
11859
                    "link": filled_params[constants.NIC_LINK],
11860
                   }
11861
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11862
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11863
        nic_data.append(nic_dict)
11864
      pir = {
11865
        "tags": list(iinfo.GetTags()),
11866
        "admin_up": iinfo.admin_up,
11867
        "vcpus": beinfo[constants.BE_VCPUS],
11868
        "memory": beinfo[constants.BE_MEMORY],
11869
        "os": iinfo.os,
11870
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11871
        "nics": nic_data,
11872
        "disks": [{constants.IDISK_SIZE: dsk.size,
11873
                   constants.IDISK_MODE: dsk.mode}
11874
                  for dsk in iinfo.disks],
11875
        "disk_template": iinfo.disk_template,
11876
        "hypervisor": iinfo.hypervisor,
11877
        }
11878
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11879
                                                 pir["disks"])
11880
      instance_data[iinfo.name] = pir
11881

    
11882
    return instance_data
11883

    
11884
  def _AddNewInstance(self):
11885
    """Add new instance data to allocator structure.
11886

11887
    This in combination with _AllocatorGetClusterData will create the
11888
    correct structure needed as input for the allocator.
11889

11890
    The checks for the completeness of the opcode must have already been
11891
    done.
11892

11893
    """
11894
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11895

    
11896
    if self.disk_template in constants.DTS_INT_MIRROR:
11897
      self.required_nodes = 2
11898
    else:
11899
      self.required_nodes = 1
11900
    request = {
11901
      "name": self.name,
11902
      "disk_template": self.disk_template,
11903
      "tags": self.tags,
11904
      "os": self.os,
11905
      "vcpus": self.vcpus,
11906
      "memory": self.mem_size,
11907
      "disks": self.disks,
11908
      "disk_space_total": disk_space,
11909
      "nics": self.nics,
11910
      "required_nodes": self.required_nodes,
11911
      }
11912
    return request
11913

    
11914
  def _AddRelocateInstance(self):
11915
    """Add relocate instance data to allocator structure.
11916

11917
    This in combination with _IAllocatorGetClusterData will create the
11918
    correct structure needed as input for the allocator.
11919

11920
    The checks for the completeness of the opcode must have already been
11921
    done.
11922

11923
    """
11924
    instance = self.cfg.GetInstanceInfo(self.name)
11925
    if instance is None:
11926
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
11927
                                   " IAllocator" % self.name)
11928

    
11929
    if instance.disk_template not in constants.DTS_MIRRORED:
11930
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11931
                                 errors.ECODE_INVAL)
11932

    
11933
    if instance.disk_template in constants.DTS_INT_MIRROR and \
11934
        len(instance.secondary_nodes) != 1:
11935
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
11936
                                 errors.ECODE_STATE)
11937

    
11938
    self.required_nodes = 1
11939
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
11940
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11941

    
11942
    request = {
11943
      "name": self.name,
11944
      "disk_space_total": disk_space,
11945
      "required_nodes": self.required_nodes,
11946
      "relocate_from": self.relocate_from,
11947
      }
11948
    return request
11949

    
11950
  def _AddEvacuateNodes(self):
11951
    """Add evacuate nodes data to allocator structure.
11952

11953
    """
11954
    request = {
11955
      "evac_nodes": self.evac_nodes
11956
      }
11957
    return request
11958

    
11959
  def _BuildInputData(self, fn):
11960
    """Build input data structures.
11961

11962
    """
11963
    self._ComputeClusterData()
11964

    
11965
    request = fn()
11966
    request["type"] = self.mode
11967
    self.in_data["request"] = request
11968

    
11969
    self.in_text = serializer.Dump(self.in_data)
11970

    
11971
  def Run(self, name, validate=True, call_fn=None):
11972
    """Run an instance allocator and return the results.
11973

11974
    """
11975
    if call_fn is None:
11976
      call_fn = self.rpc.call_iallocator_runner
11977

    
11978
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11979
    result.Raise("Failure while running the iallocator script")
11980

    
11981
    self.out_text = result.payload
11982
    if validate:
11983
      self._ValidateResult()
11984

    
11985
  def _ValidateResult(self):
11986
    """Process the allocator results.
11987

11988
    This will process and if successful save the result in
11989
    self.out_data and the other parameters.
11990

11991
    """
11992
    try:
11993
      rdict = serializer.Load(self.out_text)
11994
    except Exception, err:
11995
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11996

    
11997
    if not isinstance(rdict, dict):
11998
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
11999

    
12000
    # TODO: remove backwards compatiblity in later versions
12001
    if "nodes" in rdict and "result" not in rdict:
12002
      rdict["result"] = rdict["nodes"]
12003
      del rdict["nodes"]
12004

    
12005
    for key in "success", "info", "result":
12006
      if key not in rdict:
12007
        raise errors.OpExecError("Can't parse iallocator results:"
12008
                                 " missing key '%s'" % key)
12009
      setattr(self, key, rdict[key])
12010

    
12011
    if not isinstance(rdict["result"], list):
12012
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
12013
                               " is not a list")
12014

    
12015
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
12016
      assert self.relocate_from is not None
12017
      assert self.required_nodes == 1
12018

    
12019
      node2group = dict((name, ndata["group"])
12020
                        for (name, ndata) in self.in_data["nodes"].items())
12021

    
12022
      fn = compat.partial(self._NodesToGroups, node2group,
12023
                          self.in_data["nodegroups"])
12024

    
12025
      request_groups = fn(self.relocate_from)
12026
      result_groups = fn(rdict["result"])
12027

    
12028
      if result_groups != request_groups:
12029
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12030
                                 " differ from original groups (%s)" %
12031
                                 (utils.CommaJoin(result_groups),
12032
                                  utils.CommaJoin(request_groups)))
12033

    
12034
    self.out_data = rdict
12035

    
12036
  @staticmethod
12037
  def _NodesToGroups(node2group, groups, nodes):
12038
    """Returns a list of unique group names for a list of nodes.
12039

12040
    @type node2group: dict
12041
    @param node2group: Map from node name to group UUID
12042
    @type groups: dict
12043
    @param groups: Group information
12044
    @type nodes: list
12045
    @param nodes: Node names
12046

12047
    """
12048
    result = set()
12049

    
12050
    for node in nodes:
12051
      try:
12052
        group_uuid = node2group[node]
12053
      except KeyError:
12054
        # Ignore unknown node
12055
        pass
12056
      else:
12057
        try:
12058
          group = groups[group_uuid]
12059
        except KeyError:
12060
          # Can't find group, let's use UUID
12061
          group_name = group_uuid
12062
        else:
12063
          group_name = group["name"]
12064

    
12065
        result.add(group_name)
12066

    
12067
    return sorted(result)
12068

    
12069

    
12070
class LUTestAllocator(NoHooksLU):
12071
  """Run allocator tests.
12072

12073
  This LU runs the allocator tests
12074

12075
  """
12076
  def CheckPrereq(self):
12077
    """Check prerequisites.
12078

12079
    This checks the opcode parameters depending on the director and mode test.
12080

12081
    """
12082
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12083
      for attr in ["mem_size", "disks", "disk_template",
12084
                   "os", "tags", "nics", "vcpus"]:
12085
        if not hasattr(self.op, attr):
12086
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
12087
                                     attr, errors.ECODE_INVAL)
12088
      iname = self.cfg.ExpandInstanceName(self.op.name)
12089
      if iname is not None:
12090
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
12091
                                   iname, errors.ECODE_EXISTS)
12092
      if not isinstance(self.op.nics, list):
12093
        raise errors.OpPrereqError("Invalid parameter 'nics'",
12094
                                   errors.ECODE_INVAL)
12095
      if not isinstance(self.op.disks, list):
12096
        raise errors.OpPrereqError("Invalid parameter 'disks'",
12097
                                   errors.ECODE_INVAL)
12098
      for row in self.op.disks:
12099
        if (not isinstance(row, dict) or
12100
            "size" not in row or
12101
            not isinstance(row["size"], int) or
12102
            "mode" not in row or
12103
            row["mode"] not in ['r', 'w']):
12104
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
12105
                                     " parameter", errors.ECODE_INVAL)
12106
      if self.op.hypervisor is None:
12107
        self.op.hypervisor = self.cfg.GetHypervisorType()
12108
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12109
      fname = _ExpandInstanceName(self.cfg, self.op.name)
12110
      self.op.name = fname
12111
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12112
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12113
      if not hasattr(self.op, "evac_nodes"):
12114
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12115
                                   " opcode input", errors.ECODE_INVAL)
12116
    else:
12117
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12118
                                 self.op.mode, errors.ECODE_INVAL)
12119

    
12120
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12121
      if self.op.allocator is None:
12122
        raise errors.OpPrereqError("Missing allocator name",
12123
                                   errors.ECODE_INVAL)
12124
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12125
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
12126
                                 self.op.direction, errors.ECODE_INVAL)
12127

    
12128
  def Exec(self, feedback_fn):
12129
    """Run the allocator test.
12130

12131
    """
12132
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12133
      ial = IAllocator(self.cfg, self.rpc,
12134
                       mode=self.op.mode,
12135
                       name=self.op.name,
12136
                       mem_size=self.op.mem_size,
12137
                       disks=self.op.disks,
12138
                       disk_template=self.op.disk_template,
12139
                       os=self.op.os,
12140
                       tags=self.op.tags,
12141
                       nics=self.op.nics,
12142
                       vcpus=self.op.vcpus,
12143
                       hypervisor=self.op.hypervisor,
12144
                       )
12145
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12146
      ial = IAllocator(self.cfg, self.rpc,
12147
                       mode=self.op.mode,
12148
                       name=self.op.name,
12149
                       relocate_from=list(self.relocate_from),
12150
                       )
12151
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12152
      ial = IAllocator(self.cfg, self.rpc,
12153
                       mode=self.op.mode,
12154
                       evac_nodes=self.op.evac_nodes)
12155
    else:
12156
      raise errors.ProgrammerError("Uncatched mode %s in"
12157
                                   " LUTestAllocator.Exec", self.op.mode)
12158

    
12159
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
12160
      result = ial.in_text
12161
    else:
12162
      ial.Run(self.op.allocator, validate=False)
12163
      result = ial.out_text
12164
    return result
12165

    
12166

    
12167
#: Query type implementations
12168
_QUERY_IMPL = {
12169
  constants.QR_INSTANCE: _InstanceQuery,
12170
  constants.QR_NODE: _NodeQuery,
12171
  constants.QR_GROUP: _GroupQuery,
12172
  constants.QR_OS: _OsQuery,
12173
  }
12174

    
12175
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12176

    
12177

    
12178
def _GetQueryImplementation(name):
12179
  """Returns the implemtnation for a query type.
12180

12181
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
12182

12183
  """
12184
  try:
12185
    return _QUERY_IMPL[name]
12186
  except KeyError:
12187
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12188
                               errors.ECODE_INVAL)