Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 4463ae96

History | View | Annotate | Download (349.2 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39

    
40
from ganeti import ssh
41
from ganeti import utils
42
from ganeti import errors
43
from ganeti import hypervisor
44
from ganeti import locking
45
from ganeti import constants
46
from ganeti import objects
47
from ganeti import serializer
48
from ganeti import ssconf
49
from ganeti import uidpool
50
from ganeti import compat
51
from ganeti import masterd
52

    
53
import ganeti.masterd.instance # pylint: disable-msg=W0611
54

    
55

    
56
class LogicalUnit(object):
57
  """Logical Unit base class.
58

59
  Subclasses must follow these rules:
60
    - implement ExpandNames
61
    - implement CheckPrereq (except when tasklets are used)
62
    - implement Exec (except when tasklets are used)
63
    - implement BuildHooksEnv
64
    - redefine HPATH and HTYPE
65
    - optionally redefine their run requirements:
66
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
67

68
  Note that all commands require root permissions.
69

70
  @ivar dry_run_result: the value (if any) that will be returned to the caller
71
      in dry-run mode (signalled by opcode dry_run parameter)
72

73
  """
74
  HPATH = None
75
  HTYPE = None
76
  _OP_REQP = []
77
  REQ_BGL = True
78

    
79
  def __init__(self, processor, op, context, rpc):
80
    """Constructor for LogicalUnit.
81

82
    This needs to be overridden in derived classes in order to check op
83
    validity.
84

85
    """
86
    self.proc = processor
87
    self.op = op
88
    self.cfg = context.cfg
89
    self.context = context
90
    self.rpc = rpc
91
    # Dicts used to declare locking needs to mcpu
92
    self.needed_locks = None
93
    self.acquired_locks = {}
94
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
95
    self.add_locks = {}
96
    self.remove_locks = {}
97
    # Used to force good behavior when calling helper functions
98
    self.recalculate_locks = {}
99
    self.__ssh = None
100
    # logging
101
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
102
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
103
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
104
    # support for dry-run
105
    self.dry_run_result = None
106
    # support for generic debug attribute
107
    if (not hasattr(self.op, "debug_level") or
108
        not isinstance(self.op.debug_level, int)):
109
      self.op.debug_level = 0
110

    
111
    # Tasklets
112
    self.tasklets = None
113

    
114
    for attr_name in self._OP_REQP:
115
      attr_val = getattr(op, attr_name, None)
116
      if attr_val is None:
117
        raise errors.OpPrereqError("Required parameter '%s' missing" %
118
                                   attr_name, errors.ECODE_INVAL)
119

    
120
    self.CheckArguments()
121

    
122
  def __GetSSH(self):
123
    """Returns the SshRunner object
124

125
    """
126
    if not self.__ssh:
127
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
128
    return self.__ssh
129

    
130
  ssh = property(fget=__GetSSH)
131

    
132
  def CheckArguments(self):
133
    """Check syntactic validity for the opcode arguments.
134

135
    This method is for doing a simple syntactic check and ensure
136
    validity of opcode parameters, without any cluster-related
137
    checks. While the same can be accomplished in ExpandNames and/or
138
    CheckPrereq, doing these separate is better because:
139

140
      - ExpandNames is left as as purely a lock-related function
141
      - CheckPrereq is run after we have acquired locks (and possible
142
        waited for them)
143

144
    The function is allowed to change the self.op attribute so that
145
    later methods can no longer worry about missing parameters.
146

147
    """
148
    pass
149

    
150
  def ExpandNames(self):
151
    """Expand names for this LU.
152

153
    This method is called before starting to execute the opcode, and it should
154
    update all the parameters of the opcode to their canonical form (e.g. a
155
    short node name must be fully expanded after this method has successfully
156
    completed). This way locking, hooks, logging, ecc. can work correctly.
157

158
    LUs which implement this method must also populate the self.needed_locks
159
    member, as a dict with lock levels as keys, and a list of needed lock names
160
    as values. Rules:
161

162
      - use an empty dict if you don't need any lock
163
      - if you don't need any lock at a particular level omit that level
164
      - don't put anything for the BGL level
165
      - if you want all locks at a level use locking.ALL_SET as a value
166

167
    If you need to share locks (rather than acquire them exclusively) at one
168
    level you can modify self.share_locks, setting a true value (usually 1) for
169
    that level. By default locks are not shared.
170

171
    This function can also define a list of tasklets, which then will be
172
    executed in order instead of the usual LU-level CheckPrereq and Exec
173
    functions, if those are not defined by the LU.
174

175
    Examples::
176

177
      # Acquire all nodes and one instance
178
      self.needed_locks = {
179
        locking.LEVEL_NODE: locking.ALL_SET,
180
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
181
      }
182
      # Acquire just two nodes
183
      self.needed_locks = {
184
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
185
      }
186
      # Acquire no locks
187
      self.needed_locks = {} # No, you can't leave it to the default value None
188

189
    """
190
    # The implementation of this method is mandatory only if the new LU is
191
    # concurrent, so that old LUs don't need to be changed all at the same
192
    # time.
193
    if self.REQ_BGL:
194
      self.needed_locks = {} # Exclusive LUs don't need locks.
195
    else:
196
      raise NotImplementedError
197

    
198
  def DeclareLocks(self, level):
199
    """Declare LU locking needs for a level
200

201
    While most LUs can just declare their locking needs at ExpandNames time,
202
    sometimes there's the need to calculate some locks after having acquired
203
    the ones before. This function is called just before acquiring locks at a
204
    particular level, but after acquiring the ones at lower levels, and permits
205
    such calculations. It can be used to modify self.needed_locks, and by
206
    default it does nothing.
207

208
    This function is only called if you have something already set in
209
    self.needed_locks for the level.
210

211
    @param level: Locking level which is going to be locked
212
    @type level: member of ganeti.locking.LEVELS
213

214
    """
215

    
216
  def CheckPrereq(self):
217
    """Check prerequisites for this LU.
218

219
    This method should check that the prerequisites for the execution
220
    of this LU are fulfilled. It can do internode communication, but
221
    it should be idempotent - no cluster or system changes are
222
    allowed.
223

224
    The method should raise errors.OpPrereqError in case something is
225
    not fulfilled. Its return value is ignored.
226

227
    This method should also update all the parameters of the opcode to
228
    their canonical form if it hasn't been done by ExpandNames before.
229

230
    """
231
    if self.tasklets is not None:
232
      for (idx, tl) in enumerate(self.tasklets):
233
        logging.debug("Checking prerequisites for tasklet %s/%s",
234
                      idx + 1, len(self.tasklets))
235
        tl.CheckPrereq()
236
    else:
237
      raise NotImplementedError
238

    
239
  def Exec(self, feedback_fn):
240
    """Execute the LU.
241

242
    This method should implement the actual work. It should raise
243
    errors.OpExecError for failures that are somewhat dealt with in
244
    code, or expected.
245

246
    """
247
    if self.tasklets is not None:
248
      for (idx, tl) in enumerate(self.tasklets):
249
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
250
        tl.Exec(feedback_fn)
251
    else:
252
      raise NotImplementedError
253

    
254
  def BuildHooksEnv(self):
255
    """Build hooks environment for this LU.
256

257
    This method should return a three-node tuple consisting of: a dict
258
    containing the environment that will be used for running the
259
    specific hook for this LU, a list of node names on which the hook
260
    should run before the execution, and a list of node names on which
261
    the hook should run after the execution.
262

263
    The keys of the dict must not have 'GANETI_' prefixed as this will
264
    be handled in the hooks runner. Also note additional keys will be
265
    added by the hooks runner. If the LU doesn't define any
266
    environment, an empty dict (and not None) should be returned.
267

268
    No nodes should be returned as an empty list (and not None).
269

270
    Note that if the HPATH for a LU class is None, this function will
271
    not be called.
272

273
    """
274
    raise NotImplementedError
275

    
276
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
277
    """Notify the LU about the results of its hooks.
278

279
    This method is called every time a hooks phase is executed, and notifies
280
    the Logical Unit about the hooks' result. The LU can then use it to alter
281
    its result based on the hooks.  By default the method does nothing and the
282
    previous result is passed back unchanged but any LU can define it if it
283
    wants to use the local cluster hook-scripts somehow.
284

285
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
286
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
287
    @param hook_results: the results of the multi-node hooks rpc call
288
    @param feedback_fn: function used send feedback back to the caller
289
    @param lu_result: the previous Exec result this LU had, or None
290
        in the PRE phase
291
    @return: the new Exec result, based on the previous result
292
        and hook results
293

294
    """
295
    # API must be kept, thus we ignore the unused argument and could
296
    # be a function warnings
297
    # pylint: disable-msg=W0613,R0201
298
    return lu_result
299

    
300
  def _ExpandAndLockInstance(self):
301
    """Helper function to expand and lock an instance.
302

303
    Many LUs that work on an instance take its name in self.op.instance_name
304
    and need to expand it and then declare the expanded name for locking. This
305
    function does it, and then updates self.op.instance_name to the expanded
306
    name. It also initializes needed_locks as a dict, if this hasn't been done
307
    before.
308

309
    """
310
    if self.needed_locks is None:
311
      self.needed_locks = {}
312
    else:
313
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
314
        "_ExpandAndLockInstance called with instance-level locks set"
315
    self.op.instance_name = _ExpandInstanceName(self.cfg,
316
                                                self.op.instance_name)
317
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
318

    
319
  def _LockInstancesNodes(self, primary_only=False):
320
    """Helper function to declare instances' nodes for locking.
321

322
    This function should be called after locking one or more instances to lock
323
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
324
    with all primary or secondary nodes for instances already locked and
325
    present in self.needed_locks[locking.LEVEL_INSTANCE].
326

327
    It should be called from DeclareLocks, and for safety only works if
328
    self.recalculate_locks[locking.LEVEL_NODE] is set.
329

330
    In the future it may grow parameters to just lock some instance's nodes, or
331
    to just lock primaries or secondary nodes, if needed.
332

333
    If should be called in DeclareLocks in a way similar to::
334

335
      if level == locking.LEVEL_NODE:
336
        self._LockInstancesNodes()
337

338
    @type primary_only: boolean
339
    @param primary_only: only lock primary nodes of locked instances
340

341
    """
342
    assert locking.LEVEL_NODE in self.recalculate_locks, \
343
      "_LockInstancesNodes helper function called with no nodes to recalculate"
344

    
345
    # TODO: check if we're really been called with the instance locks held
346

    
347
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
348
    # future we might want to have different behaviors depending on the value
349
    # of self.recalculate_locks[locking.LEVEL_NODE]
350
    wanted_nodes = []
351
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
352
      instance = self.context.cfg.GetInstanceInfo(instance_name)
353
      wanted_nodes.append(instance.primary_node)
354
      if not primary_only:
355
        wanted_nodes.extend(instance.secondary_nodes)
356

    
357
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
358
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
359
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
360
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
361

    
362
    del self.recalculate_locks[locking.LEVEL_NODE]
363

    
364

    
365
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
366
  """Simple LU which runs no hooks.
367

368
  This LU is intended as a parent for other LogicalUnits which will
369
  run no hooks, in order to reduce duplicate code.
370

371
  """
372
  HPATH = None
373
  HTYPE = None
374

    
375
  def BuildHooksEnv(self):
376
    """Empty BuildHooksEnv for NoHooksLu.
377

378
    This just raises an error.
379

380
    """
381
    assert False, "BuildHooksEnv called for NoHooksLUs"
382

    
383

    
384
class Tasklet:
385
  """Tasklet base class.
386

387
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
388
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
389
  tasklets know nothing about locks.
390

391
  Subclasses must follow these rules:
392
    - Implement CheckPrereq
393
    - Implement Exec
394

395
  """
396
  def __init__(self, lu):
397
    self.lu = lu
398

    
399
    # Shortcuts
400
    self.cfg = lu.cfg
401
    self.rpc = lu.rpc
402

    
403
  def CheckPrereq(self):
404
    """Check prerequisites for this tasklets.
405

406
    This method should check whether the prerequisites for the execution of
407
    this tasklet are fulfilled. It can do internode communication, but it
408
    should be idempotent - no cluster or system changes are allowed.
409

410
    The method should raise errors.OpPrereqError in case something is not
411
    fulfilled. Its return value is ignored.
412

413
    This method should also update all parameters to their canonical form if it
414
    hasn't been done before.
415

416
    """
417
    raise NotImplementedError
418

    
419
  def Exec(self, feedback_fn):
420
    """Execute the tasklet.
421

422
    This method should implement the actual work. It should raise
423
    errors.OpExecError for failures that are somewhat dealt with in code, or
424
    expected.
425

426
    """
427
    raise NotImplementedError
428

    
429

    
430
def _GetWantedNodes(lu, nodes):
431
  """Returns list of checked and expanded node names.
432

433
  @type lu: L{LogicalUnit}
434
  @param lu: the logical unit on whose behalf we execute
435
  @type nodes: list
436
  @param nodes: list of node names or None for all nodes
437
  @rtype: list
438
  @return: the list of nodes, sorted
439
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
440

441
  """
442
  if not isinstance(nodes, list):
443
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
444
                               errors.ECODE_INVAL)
445

    
446
  if not nodes:
447
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
448
      " non-empty list of nodes whose name is to be expanded.")
449

    
450
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
451
  return utils.NiceSort(wanted)
452

    
453

    
454
def _GetWantedInstances(lu, instances):
455
  """Returns list of checked and expanded instance names.
456

457
  @type lu: L{LogicalUnit}
458
  @param lu: the logical unit on whose behalf we execute
459
  @type instances: list
460
  @param instances: list of instance names or None for all instances
461
  @rtype: list
462
  @return: the list of instances, sorted
463
  @raise errors.OpPrereqError: if the instances parameter is wrong type
464
  @raise errors.OpPrereqError: if any of the passed instances is not found
465

466
  """
467
  if not isinstance(instances, list):
468
    raise errors.OpPrereqError("Invalid argument type 'instances'",
469
                               errors.ECODE_INVAL)
470

    
471
  if instances:
472
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
473
  else:
474
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
475
  return wanted
476

    
477

    
478
def _GetUpdatedParams(old_params, update_dict):
479
  """Return the new version of a parameter dictionary.
480

481
  @type old_params: dict
482
  @param old_params: old parameters
483
  @type update_dict: dict
484
  @param update_dict: dict containing new parameter values, or
485
      constants.VALUE_DEFAULT to reset the parameter to its default
486
      value
487
  @rtype: dict
488
  @return: the new parameter dictionary
489

490
  """
491
  params_copy = copy.deepcopy(old_params)
492
  for key, val in update_dict.iteritems():
493
    if val == constants.VALUE_DEFAULT:
494
      try:
495
        del params_copy[key]
496
      except KeyError:
497
        pass
498
    else:
499
      params_copy[key] = val
500
  return params_copy
501

    
502

    
503
def _CheckOutputFields(static, dynamic, selected):
504
  """Checks whether all selected fields are valid.
505

506
  @type static: L{utils.FieldSet}
507
  @param static: static fields set
508
  @type dynamic: L{utils.FieldSet}
509
  @param dynamic: dynamic fields set
510

511
  """
512
  f = utils.FieldSet()
513
  f.Extend(static)
514
  f.Extend(dynamic)
515

    
516
  delta = f.NonMatching(selected)
517
  if delta:
518
    raise errors.OpPrereqError("Unknown output fields selected: %s"
519
                               % ",".join(delta), errors.ECODE_INVAL)
520

    
521

    
522
def _CheckBooleanOpField(op, name):
523
  """Validates boolean opcode parameters.
524

525
  This will ensure that an opcode parameter is either a boolean value,
526
  or None (but that it always exists).
527

528
  """
529
  val = getattr(op, name, None)
530
  if not (val is None or isinstance(val, bool)):
531
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
532
                               (name, str(val)), errors.ECODE_INVAL)
533
  setattr(op, name, val)
534

    
535

    
536
def _CheckGlobalHvParams(params):
537
  """Validates that given hypervisor params are not global ones.
538

539
  This will ensure that instances don't get customised versions of
540
  global params.
541

542
  """
543
  used_globals = constants.HVC_GLOBALS.intersection(params)
544
  if used_globals:
545
    msg = ("The following hypervisor parameters are global and cannot"
546
           " be customized at instance level, please modify them at"
547
           " cluster level: %s" % utils.CommaJoin(used_globals))
548
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
549

    
550

    
551
def _CheckNodeOnline(lu, node):
552
  """Ensure that a given node is online.
553

554
  @param lu: the LU on behalf of which we make the check
555
  @param node: the node to check
556
  @raise errors.OpPrereqError: if the node is offline
557

558
  """
559
  if lu.cfg.GetNodeInfo(node).offline:
560
    raise errors.OpPrereqError("Can't use offline node %s" % node,
561
                               errors.ECODE_INVAL)
562

    
563

    
564
def _CheckNodeNotDrained(lu, node):
565
  """Ensure that a given node is not drained.
566

567
  @param lu: the LU on behalf of which we make the check
568
  @param node: the node to check
569
  @raise errors.OpPrereqError: if the node is drained
570

571
  """
572
  if lu.cfg.GetNodeInfo(node).drained:
573
    raise errors.OpPrereqError("Can't use drained node %s" % node,
574
                               errors.ECODE_INVAL)
575

    
576

    
577
def _CheckNodeHasOS(lu, node, os_name, force_variant):
578
  """Ensure that a node supports a given OS.
579

580
  @param lu: the LU on behalf of which we make the check
581
  @param node: the node to check
582
  @param os_name: the OS to query about
583
  @param force_variant: whether to ignore variant errors
584
  @raise errors.OpPrereqError: if the node is not supporting the OS
585

586
  """
587
  result = lu.rpc.call_os_get(node, os_name)
588
  result.Raise("OS '%s' not in supported OS list for node %s" %
589
               (os_name, node),
590
               prereq=True, ecode=errors.ECODE_INVAL)
591
  if not force_variant:
592
    _CheckOSVariant(result.payload, os_name)
593

    
594

    
595
def _RequireFileStorage():
596
  """Checks that file storage is enabled.
597

598
  @raise errors.OpPrereqError: when file storage is disabled
599

600
  """
601
  if not constants.ENABLE_FILE_STORAGE:
602
    raise errors.OpPrereqError("File storage disabled at configure time",
603
                               errors.ECODE_INVAL)
604

    
605

    
606
def _CheckDiskTemplate(template):
607
  """Ensure a given disk template is valid.
608

609
  """
610
  if template not in constants.DISK_TEMPLATES:
611
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
612
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
613
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
614
  if template == constants.DT_FILE:
615
    _RequireFileStorage()
616

    
617

    
618
def _CheckStorageType(storage_type):
619
  """Ensure a given storage type is valid.
620

621
  """
622
  if storage_type not in constants.VALID_STORAGE_TYPES:
623
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
624
                               errors.ECODE_INVAL)
625
  if storage_type == constants.ST_FILE:
626
    _RequireFileStorage()
627

    
628

    
629
def _GetClusterDomainSecret():
630
  """Reads the cluster domain secret.
631

632
  """
633
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
634
                               strict=True)
635

    
636

    
637
def _CheckInstanceDown(lu, instance, reason):
638
  """Ensure that an instance is not running."""
639
  if instance.admin_up:
640
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
641
                               (instance.name, reason), errors.ECODE_STATE)
642

    
643
  pnode = instance.primary_node
644
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
645
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
646
              prereq=True, ecode=errors.ECODE_ENVIRON)
647

    
648
  if instance.name in ins_l.payload:
649
    raise errors.OpPrereqError("Instance %s is running, %s" %
650
                               (instance.name, reason), errors.ECODE_STATE)
651

    
652

    
653
def _ExpandItemName(fn, name, kind):
654
  """Expand an item name.
655

656
  @param fn: the function to use for expansion
657
  @param name: requested item name
658
  @param kind: text description ('Node' or 'Instance')
659
  @return: the resolved (full) name
660
  @raise errors.OpPrereqError: if the item is not found
661

662
  """
663
  full_name = fn(name)
664
  if full_name is None:
665
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
666
                               errors.ECODE_NOENT)
667
  return full_name
668

    
669

    
670
def _ExpandNodeName(cfg, name):
671
  """Wrapper over L{_ExpandItemName} for nodes."""
672
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
673

    
674

    
675
def _ExpandInstanceName(cfg, name):
676
  """Wrapper over L{_ExpandItemName} for instance."""
677
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
678

    
679

    
680
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
681
                          memory, vcpus, nics, disk_template, disks,
682
                          bep, hvp, hypervisor_name):
683
  """Builds instance related env variables for hooks
684

685
  This builds the hook environment from individual variables.
686

687
  @type name: string
688
  @param name: the name of the instance
689
  @type primary_node: string
690
  @param primary_node: the name of the instance's primary node
691
  @type secondary_nodes: list
692
  @param secondary_nodes: list of secondary nodes as strings
693
  @type os_type: string
694
  @param os_type: the name of the instance's OS
695
  @type status: boolean
696
  @param status: the should_run status of the instance
697
  @type memory: string
698
  @param memory: the memory size of the instance
699
  @type vcpus: string
700
  @param vcpus: the count of VCPUs the instance has
701
  @type nics: list
702
  @param nics: list of tuples (ip, mac, mode, link) representing
703
      the NICs the instance has
704
  @type disk_template: string
705
  @param disk_template: the disk template of the instance
706
  @type disks: list
707
  @param disks: the list of (size, mode) pairs
708
  @type bep: dict
709
  @param bep: the backend parameters for the instance
710
  @type hvp: dict
711
  @param hvp: the hypervisor parameters for the instance
712
  @type hypervisor_name: string
713
  @param hypervisor_name: the hypervisor for the instance
714
  @rtype: dict
715
  @return: the hook environment for this instance
716

717
  """
718
  if status:
719
    str_status = "up"
720
  else:
721
    str_status = "down"
722
  env = {
723
    "OP_TARGET": name,
724
    "INSTANCE_NAME": name,
725
    "INSTANCE_PRIMARY": primary_node,
726
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
727
    "INSTANCE_OS_TYPE": os_type,
728
    "INSTANCE_STATUS": str_status,
729
    "INSTANCE_MEMORY": memory,
730
    "INSTANCE_VCPUS": vcpus,
731
    "INSTANCE_DISK_TEMPLATE": disk_template,
732
    "INSTANCE_HYPERVISOR": hypervisor_name,
733
  }
734

    
735
  if nics:
736
    nic_count = len(nics)
737
    for idx, (ip, mac, mode, link) in enumerate(nics):
738
      if ip is None:
739
        ip = ""
740
      env["INSTANCE_NIC%d_IP" % idx] = ip
741
      env["INSTANCE_NIC%d_MAC" % idx] = mac
742
      env["INSTANCE_NIC%d_MODE" % idx] = mode
743
      env["INSTANCE_NIC%d_LINK" % idx] = link
744
      if mode == constants.NIC_MODE_BRIDGED:
745
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
746
  else:
747
    nic_count = 0
748

    
749
  env["INSTANCE_NIC_COUNT"] = nic_count
750

    
751
  if disks:
752
    disk_count = len(disks)
753
    for idx, (size, mode) in enumerate(disks):
754
      env["INSTANCE_DISK%d_SIZE" % idx] = size
755
      env["INSTANCE_DISK%d_MODE" % idx] = mode
756
  else:
757
    disk_count = 0
758

    
759
  env["INSTANCE_DISK_COUNT"] = disk_count
760

    
761
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
762
    for key, value in source.items():
763
      env["INSTANCE_%s_%s" % (kind, key)] = value
764

    
765
  return env
766

    
767

    
768
def _NICListToTuple(lu, nics):
769
  """Build a list of nic information tuples.
770

771
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
772
  value in LUQueryInstanceData.
773

774
  @type lu:  L{LogicalUnit}
775
  @param lu: the logical unit on whose behalf we execute
776
  @type nics: list of L{objects.NIC}
777
  @param nics: list of nics to convert to hooks tuples
778

779
  """
780
  hooks_nics = []
781
  cluster = lu.cfg.GetClusterInfo()
782
  for nic in nics:
783
    ip = nic.ip
784
    mac = nic.mac
785
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
786
    mode = filled_params[constants.NIC_MODE]
787
    link = filled_params[constants.NIC_LINK]
788
    hooks_nics.append((ip, mac, mode, link))
789
  return hooks_nics
790

    
791

    
792
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
793
  """Builds instance related env variables for hooks from an object.
794

795
  @type lu: L{LogicalUnit}
796
  @param lu: the logical unit on whose behalf we execute
797
  @type instance: L{objects.Instance}
798
  @param instance: the instance for which we should build the
799
      environment
800
  @type override: dict
801
  @param override: dictionary with key/values that will override
802
      our values
803
  @rtype: dict
804
  @return: the hook environment dictionary
805

806
  """
807
  cluster = lu.cfg.GetClusterInfo()
808
  bep = cluster.FillBE(instance)
809
  hvp = cluster.FillHV(instance)
810
  args = {
811
    'name': instance.name,
812
    'primary_node': instance.primary_node,
813
    'secondary_nodes': instance.secondary_nodes,
814
    'os_type': instance.os,
815
    'status': instance.admin_up,
816
    'memory': bep[constants.BE_MEMORY],
817
    'vcpus': bep[constants.BE_VCPUS],
818
    'nics': _NICListToTuple(lu, instance.nics),
819
    'disk_template': instance.disk_template,
820
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
821
    'bep': bep,
822
    'hvp': hvp,
823
    'hypervisor_name': instance.hypervisor,
824
  }
825
  if override:
826
    args.update(override)
827
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
828

    
829

    
830
def _AdjustCandidatePool(lu, exceptions):
831
  """Adjust the candidate pool after node operations.
832

833
  """
834
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
835
  if mod_list:
836
    lu.LogInfo("Promoted nodes to master candidate role: %s",
837
               utils.CommaJoin(node.name for node in mod_list))
838
    for name in mod_list:
839
      lu.context.ReaddNode(name)
840
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
841
  if mc_now > mc_max:
842
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
843
               (mc_now, mc_max))
844

    
845

    
846
def _DecideSelfPromotion(lu, exceptions=None):
847
  """Decide whether I should promote myself as a master candidate.
848

849
  """
850
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
851
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
852
  # the new node will increase mc_max with one, so:
853
  mc_should = min(mc_should + 1, cp_size)
854
  return mc_now < mc_should
855

    
856

    
857
def _CheckNicsBridgesExist(lu, target_nics, target_node):
858
  """Check that the brigdes needed by a list of nics exist.
859

860
  """
861
  cluster = lu.cfg.GetClusterInfo()
862
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
863
  brlist = [params[constants.NIC_LINK] for params in paramslist
864
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
865
  if brlist:
866
    result = lu.rpc.call_bridges_exist(target_node, brlist)
867
    result.Raise("Error checking bridges on destination node '%s'" %
868
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
869

    
870

    
871
def _CheckInstanceBridgesExist(lu, instance, node=None):
872
  """Check that the brigdes needed by an instance exist.
873

874
  """
875
  if node is None:
876
    node = instance.primary_node
877
  _CheckNicsBridgesExist(lu, instance.nics, node)
878

    
879

    
880
def _CheckOSVariant(os_obj, name):
881
  """Check whether an OS name conforms to the os variants specification.
882

883
  @type os_obj: L{objects.OS}
884
  @param os_obj: OS object to check
885
  @type name: string
886
  @param name: OS name passed by the user, to check for validity
887

888
  """
889
  if not os_obj.supported_variants:
890
    return
891
  try:
892
    variant = name.split("+", 1)[1]
893
  except IndexError:
894
    raise errors.OpPrereqError("OS name must include a variant",
895
                               errors.ECODE_INVAL)
896

    
897
  if variant not in os_obj.supported_variants:
898
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
899

    
900

    
901
def _GetNodeInstancesInner(cfg, fn):
902
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
903

    
904

    
905
def _GetNodeInstances(cfg, node_name):
906
  """Returns a list of all primary and secondary instances on a node.
907

908
  """
909

    
910
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
911

    
912

    
913
def _GetNodePrimaryInstances(cfg, node_name):
914
  """Returns primary instances on a node.
915

916
  """
917
  return _GetNodeInstancesInner(cfg,
918
                                lambda inst: node_name == inst.primary_node)
919

    
920

    
921
def _GetNodeSecondaryInstances(cfg, node_name):
922
  """Returns secondary instances on a node.
923

924
  """
925
  return _GetNodeInstancesInner(cfg,
926
                                lambda inst: node_name in inst.secondary_nodes)
927

    
928

    
929
def _GetStorageTypeArgs(cfg, storage_type):
930
  """Returns the arguments for a storage type.
931

932
  """
933
  # Special case for file storage
934
  if storage_type == constants.ST_FILE:
935
    # storage.FileStorage wants a list of storage directories
936
    return [[cfg.GetFileStorageDir()]]
937

    
938
  return []
939

    
940

    
941
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
942
  faulty = []
943

    
944
  for dev in instance.disks:
945
    cfg.SetDiskID(dev, node_name)
946

    
947
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
948
  result.Raise("Failed to get disk status from node %s" % node_name,
949
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
950

    
951
  for idx, bdev_status in enumerate(result.payload):
952
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
953
      faulty.append(idx)
954

    
955
  return faulty
956

    
957

    
958
class LUPostInitCluster(LogicalUnit):
959
  """Logical unit for running hooks after cluster initialization.
960

961
  """
962
  HPATH = "cluster-init"
963
  HTYPE = constants.HTYPE_CLUSTER
964
  _OP_REQP = []
965

    
966
  def BuildHooksEnv(self):
967
    """Build hooks env.
968

969
    """
970
    env = {"OP_TARGET": self.cfg.GetClusterName()}
971
    mn = self.cfg.GetMasterNode()
972
    return env, [], [mn]
973

    
974
  def CheckPrereq(self):
975
    """No prerequisites to check.
976

977
    """
978
    return True
979

    
980
  def Exec(self, feedback_fn):
981
    """Nothing to do.
982

983
    """
984
    return True
985

    
986

    
987
class LUDestroyCluster(LogicalUnit):
988
  """Logical unit for destroying the cluster.
989

990
  """
991
  HPATH = "cluster-destroy"
992
  HTYPE = constants.HTYPE_CLUSTER
993
  _OP_REQP = []
994

    
995
  def BuildHooksEnv(self):
996
    """Build hooks env.
997

998
    """
999
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1000
    return env, [], []
1001

    
1002
  def CheckPrereq(self):
1003
    """Check prerequisites.
1004

1005
    This checks whether the cluster is empty.
1006

1007
    Any errors are signaled by raising errors.OpPrereqError.
1008

1009
    """
1010
    master = self.cfg.GetMasterNode()
1011

    
1012
    nodelist = self.cfg.GetNodeList()
1013
    if len(nodelist) != 1 or nodelist[0] != master:
1014
      raise errors.OpPrereqError("There are still %d node(s) in"
1015
                                 " this cluster." % (len(nodelist) - 1),
1016
                                 errors.ECODE_INVAL)
1017
    instancelist = self.cfg.GetInstanceList()
1018
    if instancelist:
1019
      raise errors.OpPrereqError("There are still %d instance(s) in"
1020
                                 " this cluster." % len(instancelist),
1021
                                 errors.ECODE_INVAL)
1022

    
1023
  def Exec(self, feedback_fn):
1024
    """Destroys the cluster.
1025

1026
    """
1027
    master = self.cfg.GetMasterNode()
1028
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1029

    
1030
    # Run post hooks on master node before it's removed
1031
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1032
    try:
1033
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1034
    except:
1035
      # pylint: disable-msg=W0702
1036
      self.LogWarning("Errors occurred running hooks on %s" % master)
1037

    
1038
    result = self.rpc.call_node_stop_master(master, False)
1039
    result.Raise("Could not disable the master role")
1040

    
1041
    if modify_ssh_setup:
1042
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1043
      utils.CreateBackup(priv_key)
1044
      utils.CreateBackup(pub_key)
1045

    
1046
    return master
1047

    
1048

    
1049
def _VerifyCertificate(filename):
1050
  """Verifies a certificate for LUVerifyCluster.
1051

1052
  @type filename: string
1053
  @param filename: Path to PEM file
1054

1055
  """
1056
  try:
1057
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1058
                                           utils.ReadFile(filename))
1059
  except Exception, err: # pylint: disable-msg=W0703
1060
    return (LUVerifyCluster.ETYPE_ERROR,
1061
            "Failed to load X509 certificate %s: %s" % (filename, err))
1062

    
1063
  (errcode, msg) = \
1064
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1065
                                constants.SSL_CERT_EXPIRATION_ERROR)
1066

    
1067
  if msg:
1068
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1069
  else:
1070
    fnamemsg = None
1071

    
1072
  if errcode is None:
1073
    return (None, fnamemsg)
1074
  elif errcode == utils.CERT_WARNING:
1075
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1076
  elif errcode == utils.CERT_ERROR:
1077
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1078

    
1079
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1080

    
1081

    
1082
class LUVerifyCluster(LogicalUnit):
1083
  """Verifies the cluster status.
1084

1085
  """
1086
  HPATH = "cluster-verify"
1087
  HTYPE = constants.HTYPE_CLUSTER
1088
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1089
  REQ_BGL = False
1090

    
1091
  TCLUSTER = "cluster"
1092
  TNODE = "node"
1093
  TINSTANCE = "instance"
1094

    
1095
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1096
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1097
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1098
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1099
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1100
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1101
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1102
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1103
  ENODEDRBD = (TNODE, "ENODEDRBD")
1104
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1105
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1106
  ENODEHV = (TNODE, "ENODEHV")
1107
  ENODELVM = (TNODE, "ENODELVM")
1108
  ENODEN1 = (TNODE, "ENODEN1")
1109
  ENODENET = (TNODE, "ENODENET")
1110
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1111
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1112
  ENODERPC = (TNODE, "ENODERPC")
1113
  ENODESSH = (TNODE, "ENODESSH")
1114
  ENODEVERSION = (TNODE, "ENODEVERSION")
1115
  ENODESETUP = (TNODE, "ENODESETUP")
1116
  ENODETIME = (TNODE, "ENODETIME")
1117

    
1118
  ETYPE_FIELD = "code"
1119
  ETYPE_ERROR = "ERROR"
1120
  ETYPE_WARNING = "WARNING"
1121

    
1122
  class NodeImage(object):
1123
    """A class representing the logical and physical status of a node.
1124

1125
    @ivar volumes: a structure as returned from
1126
        L{ganeti.backend.GetVolumeList} (runtime)
1127
    @ivar instances: a list of running instances (runtime)
1128
    @ivar pinst: list of configured primary instances (config)
1129
    @ivar sinst: list of configured secondary instances (config)
1130
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1131
        of this node (config)
1132
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1133
    @ivar dfree: free disk, as reported by the node (runtime)
1134
    @ivar offline: the offline status (config)
1135
    @type rpc_fail: boolean
1136
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1137
        not whether the individual keys were correct) (runtime)
1138
    @type lvm_fail: boolean
1139
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1140
    @type hyp_fail: boolean
1141
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1142
    @type ghost: boolean
1143
    @ivar ghost: whether this is a known node or not (config)
1144

1145
    """
1146
    def __init__(self, offline=False):
1147
      self.volumes = {}
1148
      self.instances = []
1149
      self.pinst = []
1150
      self.sinst = []
1151
      self.sbp = {}
1152
      self.mfree = 0
1153
      self.dfree = 0
1154
      self.offline = offline
1155
      self.rpc_fail = False
1156
      self.lvm_fail = False
1157
      self.hyp_fail = False
1158
      self.ghost = False
1159

    
1160
  def ExpandNames(self):
1161
    self.needed_locks = {
1162
      locking.LEVEL_NODE: locking.ALL_SET,
1163
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1164
    }
1165
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1166

    
1167
  def _Error(self, ecode, item, msg, *args, **kwargs):
1168
    """Format an error message.
1169

1170
    Based on the opcode's error_codes parameter, either format a
1171
    parseable error code, or a simpler error string.
1172

1173
    This must be called only from Exec and functions called from Exec.
1174

1175
    """
1176
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1177
    itype, etxt = ecode
1178
    # first complete the msg
1179
    if args:
1180
      msg = msg % args
1181
    # then format the whole message
1182
    if self.op.error_codes:
1183
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1184
    else:
1185
      if item:
1186
        item = " " + item
1187
      else:
1188
        item = ""
1189
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1190
    # and finally report it via the feedback_fn
1191
    self._feedback_fn("  - %s" % msg)
1192

    
1193
  def _ErrorIf(self, cond, *args, **kwargs):
1194
    """Log an error message if the passed condition is True.
1195

1196
    """
1197
    cond = bool(cond) or self.op.debug_simulate_errors
1198
    if cond:
1199
      self._Error(*args, **kwargs)
1200
    # do not mark the operation as failed for WARN cases only
1201
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1202
      self.bad = self.bad or cond
1203

    
1204
  def _VerifyNode(self, ninfo, nresult):
1205
    """Run multiple tests against a node.
1206

1207
    Test list:
1208

1209
      - compares ganeti version
1210
      - checks vg existence and size > 20G
1211
      - checks config file checksum
1212
      - checks ssh to other nodes
1213

1214
    @type ninfo: L{objects.Node}
1215
    @param ninfo: the node to check
1216
    @param nresult: the results from the node
1217
    @rtype: boolean
1218
    @return: whether overall this call was successful (and we can expect
1219
         reasonable values in the respose)
1220

1221
    """
1222
    node = ninfo.name
1223
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1224

    
1225
    # main result, nresult should be a non-empty dict
1226
    test = not nresult or not isinstance(nresult, dict)
1227
    _ErrorIf(test, self.ENODERPC, node,
1228
                  "unable to verify node: no data returned")
1229
    if test:
1230
      return False
1231

    
1232
    # compares ganeti version
1233
    local_version = constants.PROTOCOL_VERSION
1234
    remote_version = nresult.get("version", None)
1235
    test = not (remote_version and
1236
                isinstance(remote_version, (list, tuple)) and
1237
                len(remote_version) == 2)
1238
    _ErrorIf(test, self.ENODERPC, node,
1239
             "connection to node returned invalid data")
1240
    if test:
1241
      return False
1242

    
1243
    test = local_version != remote_version[0]
1244
    _ErrorIf(test, self.ENODEVERSION, node,
1245
             "incompatible protocol versions: master %s,"
1246
             " node %s", local_version, remote_version[0])
1247
    if test:
1248
      return False
1249

    
1250
    # node seems compatible, we can actually try to look into its results
1251

    
1252
    # full package version
1253
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1254
                  self.ENODEVERSION, node,
1255
                  "software version mismatch: master %s, node %s",
1256
                  constants.RELEASE_VERSION, remote_version[1],
1257
                  code=self.ETYPE_WARNING)
1258

    
1259
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1260
    if isinstance(hyp_result, dict):
1261
      for hv_name, hv_result in hyp_result.iteritems():
1262
        test = hv_result is not None
1263
        _ErrorIf(test, self.ENODEHV, node,
1264
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1265

    
1266

    
1267
    test = nresult.get(constants.NV_NODESETUP,
1268
                           ["Missing NODESETUP results"])
1269
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1270
             "; ".join(test))
1271

    
1272
    return True
1273

    
1274
  def _VerifyNodeTime(self, ninfo, nresult,
1275
                      nvinfo_starttime, nvinfo_endtime):
1276
    """Check the node time.
1277

1278
    @type ninfo: L{objects.Node}
1279
    @param ninfo: the node to check
1280
    @param nresult: the remote results for the node
1281
    @param nvinfo_starttime: the start time of the RPC call
1282
    @param nvinfo_endtime: the end time of the RPC call
1283

1284
    """
1285
    node = ninfo.name
1286
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1287

    
1288
    ntime = nresult.get(constants.NV_TIME, None)
1289
    try:
1290
      ntime_merged = utils.MergeTime(ntime)
1291
    except (ValueError, TypeError):
1292
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1293
      return
1294

    
1295
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1296
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1297
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1298
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1299
    else:
1300
      ntime_diff = None
1301

    
1302
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1303
             "Node time diverges by at least %s from master node time",
1304
             ntime_diff)
1305

    
1306
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1307
    """Check the node time.
1308

1309
    @type ninfo: L{objects.Node}
1310
    @param ninfo: the node to check
1311
    @param nresult: the remote results for the node
1312
    @param vg_name: the configured VG name
1313

1314
    """
1315
    if vg_name is None:
1316
      return
1317

    
1318
    node = ninfo.name
1319
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1320

    
1321
    # checks vg existence and size > 20G
1322
    vglist = nresult.get(constants.NV_VGLIST, None)
1323
    test = not vglist
1324
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1325
    if not test:
1326
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1327
                                            constants.MIN_VG_SIZE)
1328
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1329

    
1330
    # check pv names
1331
    pvlist = nresult.get(constants.NV_PVLIST, None)
1332
    test = pvlist is None
1333
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1334
    if not test:
1335
      # check that ':' is not present in PV names, since it's a
1336
      # special character for lvcreate (denotes the range of PEs to
1337
      # use on the PV)
1338
      for _, pvname, owner_vg in pvlist:
1339
        test = ":" in pvname
1340
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1341
                 " '%s' of VG '%s'", pvname, owner_vg)
1342

    
1343
  def _VerifyNodeNetwork(self, ninfo, nresult):
1344
    """Check the node time.
1345

1346
    @type ninfo: L{objects.Node}
1347
    @param ninfo: the node to check
1348
    @param nresult: the remote results for the node
1349

1350
    """
1351
    node = ninfo.name
1352
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1353

    
1354
    test = constants.NV_NODELIST not in nresult
1355
    _ErrorIf(test, self.ENODESSH, node,
1356
             "node hasn't returned node ssh connectivity data")
1357
    if not test:
1358
      if nresult[constants.NV_NODELIST]:
1359
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1360
          _ErrorIf(True, self.ENODESSH, node,
1361
                   "ssh communication with node '%s': %s", a_node, a_msg)
1362

    
1363
    test = constants.NV_NODENETTEST not in nresult
1364
    _ErrorIf(test, self.ENODENET, node,
1365
             "node hasn't returned node tcp connectivity data")
1366
    if not test:
1367
      if nresult[constants.NV_NODENETTEST]:
1368
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1369
        for anode in nlist:
1370
          _ErrorIf(True, self.ENODENET, node,
1371
                   "tcp communication with node '%s': %s",
1372
                   anode, nresult[constants.NV_NODENETTEST][anode])
1373

    
1374
    test = constants.NV_MASTERIP not in nresult
1375
    _ErrorIf(test, self.ENODENET, node,
1376
             "node hasn't returned node master IP reachability data")
1377
    if not test:
1378
      if not nresult[constants.NV_MASTERIP]:
1379
        if node == self.master_node:
1380
          msg = "the master node cannot reach the master IP (not configured?)"
1381
        else:
1382
          msg = "cannot reach the master IP"
1383
        _ErrorIf(True, self.ENODENET, node, msg)
1384

    
1385

    
1386
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1387
    """Verify an instance.
1388

1389
    This function checks to see if the required block devices are
1390
    available on the instance's node.
1391

1392
    """
1393
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1394
    node_current = instanceconfig.primary_node
1395

    
1396
    node_vol_should = {}
1397
    instanceconfig.MapLVsByNode(node_vol_should)
1398

    
1399
    for node in node_vol_should:
1400
      n_img = node_image[node]
1401
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1402
        # ignore missing volumes on offline or broken nodes
1403
        continue
1404
      for volume in node_vol_should[node]:
1405
        test = volume not in n_img.volumes
1406
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1407
                 "volume %s missing on node %s", volume, node)
1408

    
1409
    if instanceconfig.admin_up:
1410
      pri_img = node_image[node_current]
1411
      test = instance not in pri_img.instances and not pri_img.offline
1412
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1413
               "instance not running on its primary node %s",
1414
               node_current)
1415

    
1416
    for node, n_img in node_image.items():
1417
      if (not node == node_current):
1418
        test = instance in n_img.instances
1419
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1420
                 "instance should not run on node %s", node)
1421

    
1422
  def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1423
    """Verify if there are any unknown volumes in the cluster.
1424

1425
    The .os, .swap and backup volumes are ignored. All other volumes are
1426
    reported as unknown.
1427

1428
    """
1429
    for node, n_img in node_image.items():
1430
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1431
        # skip non-healthy nodes
1432
        continue
1433
      for volume in n_img.volumes:
1434
        test = (node not in node_vol_should or
1435
                volume not in node_vol_should[node])
1436
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1437
                      "volume %s is unknown", volume)
1438

    
1439
  def _VerifyOrphanInstances(self, instancelist, node_image):
1440
    """Verify the list of running instances.
1441

1442
    This checks what instances are running but unknown to the cluster.
1443

1444
    """
1445
    for node, n_img in node_image.items():
1446
      for o_inst in n_img.instances:
1447
        test = o_inst not in instancelist
1448
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1449
                      "instance %s on node %s should not exist", o_inst, node)
1450

    
1451
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1452
    """Verify N+1 Memory Resilience.
1453

1454
    Check that if one single node dies we can still start all the
1455
    instances it was primary for.
1456

1457
    """
1458
    for node, n_img in node_image.items():
1459
      # This code checks that every node which is now listed as
1460
      # secondary has enough memory to host all instances it is
1461
      # supposed to should a single other node in the cluster fail.
1462
      # FIXME: not ready for failover to an arbitrary node
1463
      # FIXME: does not support file-backed instances
1464
      # WARNING: we currently take into account down instances as well
1465
      # as up ones, considering that even if they're down someone
1466
      # might want to start them even in the event of a node failure.
1467
      for prinode, instances in n_img.sbp.items():
1468
        needed_mem = 0
1469
        for instance in instances:
1470
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1471
          if bep[constants.BE_AUTO_BALANCE]:
1472
            needed_mem += bep[constants.BE_MEMORY]
1473
        test = n_img.mfree < needed_mem
1474
        self._ErrorIf(test, self.ENODEN1, node,
1475
                      "not enough memory on to accommodate"
1476
                      " failovers should peer node %s fail", prinode)
1477

    
1478
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1479
                       master_files):
1480
    """Verifies and computes the node required file checksums.
1481

1482
    @type ninfo: L{objects.Node}
1483
    @param ninfo: the node to check
1484
    @param nresult: the remote results for the node
1485
    @param file_list: required list of files
1486
    @param local_cksum: dictionary of local files and their checksums
1487
    @param master_files: list of files that only masters should have
1488

1489
    """
1490
    node = ninfo.name
1491
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1492

    
1493
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1494
    test = not isinstance(remote_cksum, dict)
1495
    _ErrorIf(test, self.ENODEFILECHECK, node,
1496
             "node hasn't returned file checksum data")
1497
    if test:
1498
      return
1499

    
1500
    for file_name in file_list:
1501
      node_is_mc = ninfo.master_candidate
1502
      must_have = (file_name not in master_files) or node_is_mc
1503
      # missing
1504
      test1 = file_name not in remote_cksum
1505
      # invalid checksum
1506
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1507
      # existing and good
1508
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1509
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1510
               "file '%s' missing", file_name)
1511
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1512
               "file '%s' has wrong checksum", file_name)
1513
      # not candidate and this is not a must-have file
1514
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1515
               "file '%s' should not exist on non master"
1516
               " candidates (and the file is outdated)", file_name)
1517
      # all good, except non-master/non-must have combination
1518
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1519
               "file '%s' should not exist"
1520
               " on non master candidates", file_name)
1521

    
1522
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1523
    """Verifies and the node DRBD status.
1524

1525
    @type ninfo: L{objects.Node}
1526
    @param ninfo: the node to check
1527
    @param nresult: the remote results for the node
1528
    @param instanceinfo: the dict of instances
1529
    @param drbd_map: the DRBD map as returned by
1530
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1531

1532
    """
1533
    node = ninfo.name
1534
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1535

    
1536
    # compute the DRBD minors
1537
    node_drbd = {}
1538
    for minor, instance in drbd_map[node].items():
1539
      test = instance not in instanceinfo
1540
      _ErrorIf(test, self.ECLUSTERCFG, None,
1541
               "ghost instance '%s' in temporary DRBD map", instance)
1542
        # ghost instance should not be running, but otherwise we
1543
        # don't give double warnings (both ghost instance and
1544
        # unallocated minor in use)
1545
      if test:
1546
        node_drbd[minor] = (instance, False)
1547
      else:
1548
        instance = instanceinfo[instance]
1549
        node_drbd[minor] = (instance.name, instance.admin_up)
1550

    
1551
    # and now check them
1552
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1553
    test = not isinstance(used_minors, (tuple, list))
1554
    _ErrorIf(test, self.ENODEDRBD, node,
1555
             "cannot parse drbd status file: %s", str(used_minors))
1556
    if test:
1557
      # we cannot check drbd status
1558
      return
1559

    
1560
    for minor, (iname, must_exist) in node_drbd.items():
1561
      test = minor not in used_minors and must_exist
1562
      _ErrorIf(test, self.ENODEDRBD, node,
1563
               "drbd minor %d of instance %s is not active", minor, iname)
1564
    for minor in used_minors:
1565
      test = minor not in node_drbd
1566
      _ErrorIf(test, self.ENODEDRBD, node,
1567
               "unallocated drbd minor %d is in use", minor)
1568

    
1569
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1570
    """Verifies and updates the node volume data.
1571

1572
    This function will update a L{NodeImage}'s internal structures
1573
    with data from the remote call.
1574

1575
    @type ninfo: L{objects.Node}
1576
    @param ninfo: the node to check
1577
    @param nresult: the remote results for the node
1578
    @param nimg: the node image object
1579
    @param vg_name: the configured VG name
1580

1581
    """
1582
    node = ninfo.name
1583
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1584

    
1585
    nimg.lvm_fail = True
1586
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1587
    if vg_name is None:
1588
      pass
1589
    elif isinstance(lvdata, basestring):
1590
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1591
               utils.SafeEncode(lvdata))
1592
    elif not isinstance(lvdata, dict):
1593
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1594
    else:
1595
      nimg.volumes = lvdata
1596
      nimg.lvm_fail = False
1597

    
1598
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1599
    """Verifies and updates the node instance list.
1600

1601
    If the listing was successful, then updates this node's instance
1602
    list. Otherwise, it marks the RPC call as failed for the instance
1603
    list key.
1604

1605
    @type ninfo: L{objects.Node}
1606
    @param ninfo: the node to check
1607
    @param nresult: the remote results for the node
1608
    @param nimg: the node image object
1609

1610
    """
1611
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1612
    test = not isinstance(idata, list)
1613
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1614
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1615
    if test:
1616
      nimg.hyp_fail = True
1617
    else:
1618
      nimg.instances = idata
1619

    
1620
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1621
    """Verifies and computes a node information map
1622

1623
    @type ninfo: L{objects.Node}
1624
    @param ninfo: the node to check
1625
    @param nresult: the remote results for the node
1626
    @param nimg: the node image object
1627
    @param vg_name: the configured VG name
1628

1629
    """
1630
    node = ninfo.name
1631
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1632

    
1633
    # try to read free memory (from the hypervisor)
1634
    hv_info = nresult.get(constants.NV_HVINFO, None)
1635
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1636
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1637
    if not test:
1638
      try:
1639
        nimg.mfree = int(hv_info["memory_free"])
1640
      except (ValueError, TypeError):
1641
        _ErrorIf(True, self.ENODERPC, node,
1642
                 "node returned invalid nodeinfo, check hypervisor")
1643

    
1644
    # FIXME: devise a free space model for file based instances as well
1645
    if vg_name is not None:
1646
      test = (constants.NV_VGLIST not in nresult or
1647
              vg_name not in nresult[constants.NV_VGLIST])
1648
      _ErrorIf(test, self.ENODELVM, node,
1649
               "node didn't return data for the volume group '%s'"
1650
               " - it is either missing or broken", vg_name)
1651
      if not test:
1652
        try:
1653
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1654
        except (ValueError, TypeError):
1655
          _ErrorIf(True, self.ENODERPC, node,
1656
                   "node returned invalid LVM info, check LVM status")
1657

    
1658
  def CheckPrereq(self):
1659
    """Check prerequisites.
1660

1661
    Transform the list of checks we're going to skip into a set and check that
1662
    all its members are valid.
1663

1664
    """
1665
    self.skip_set = frozenset(self.op.skip_checks)
1666
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1667
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1668
                                 errors.ECODE_INVAL)
1669

    
1670
  def BuildHooksEnv(self):
1671
    """Build hooks env.
1672

1673
    Cluster-Verify hooks just ran in the post phase and their failure makes
1674
    the output be logged in the verify output and the verification to fail.
1675

1676
    """
1677
    all_nodes = self.cfg.GetNodeList()
1678
    env = {
1679
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1680
      }
1681
    for node in self.cfg.GetAllNodesInfo().values():
1682
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1683

    
1684
    return env, [], all_nodes
1685

    
1686
  def Exec(self, feedback_fn):
1687
    """Verify integrity of cluster, performing various test on nodes.
1688

1689
    """
1690
    self.bad = False
1691
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1692
    verbose = self.op.verbose
1693
    self._feedback_fn = feedback_fn
1694
    feedback_fn("* Verifying global settings")
1695
    for msg in self.cfg.VerifyConfig():
1696
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1697

    
1698
    # Check the cluster certificates
1699
    for cert_filename in constants.ALL_CERT_FILES:
1700
      (errcode, msg) = _VerifyCertificate(cert_filename)
1701
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1702

    
1703
    vg_name = self.cfg.GetVGName()
1704
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1705
    cluster = self.cfg.GetClusterInfo()
1706
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1707
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1708
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1709
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1710
                        for iname in instancelist)
1711
    i_non_redundant = [] # Non redundant instances
1712
    i_non_a_balanced = [] # Non auto-balanced instances
1713
    n_offline = 0 # Count of offline nodes
1714
    n_drained = 0 # Count of nodes being drained
1715
    node_vol_should = {}
1716

    
1717
    # FIXME: verify OS list
1718
    # do local checksums
1719
    master_files = [constants.CLUSTER_CONF_FILE]
1720
    master_node = self.master_node = self.cfg.GetMasterNode()
1721
    master_ip = self.cfg.GetMasterIP()
1722

    
1723
    file_names = ssconf.SimpleStore().GetFileList()
1724
    file_names.extend(constants.ALL_CERT_FILES)
1725
    file_names.extend(master_files)
1726
    if cluster.modify_etc_hosts:
1727
      file_names.append(constants.ETC_HOSTS)
1728

    
1729
    local_checksums = utils.FingerprintFiles(file_names)
1730

    
1731
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1732
    node_verify_param = {
1733
      constants.NV_FILELIST: file_names,
1734
      constants.NV_NODELIST: [node.name for node in nodeinfo
1735
                              if not node.offline],
1736
      constants.NV_HYPERVISOR: hypervisors,
1737
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1738
                                  node.secondary_ip) for node in nodeinfo
1739
                                 if not node.offline],
1740
      constants.NV_INSTANCELIST: hypervisors,
1741
      constants.NV_VERSION: None,
1742
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1743
      constants.NV_NODESETUP: None,
1744
      constants.NV_TIME: None,
1745
      constants.NV_MASTERIP: (master_node, master_ip),
1746
      }
1747

    
1748
    if vg_name is not None:
1749
      node_verify_param[constants.NV_VGLIST] = None
1750
      node_verify_param[constants.NV_LVLIST] = vg_name
1751
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1752
      node_verify_param[constants.NV_DRBDLIST] = None
1753

    
1754
    # Build our expected cluster state
1755
    node_image = dict((node.name, self.NodeImage(offline=node.offline))
1756
                      for node in nodeinfo)
1757

    
1758
    for instance in instancelist:
1759
      inst_config = instanceinfo[instance]
1760

    
1761
      for nname in inst_config.all_nodes:
1762
        if nname not in node_image:
1763
          # ghost node
1764
          gnode = self.NodeImage()
1765
          gnode.ghost = True
1766
          node_image[nname] = gnode
1767

    
1768
      inst_config.MapLVsByNode(node_vol_should)
1769

    
1770
      pnode = inst_config.primary_node
1771
      node_image[pnode].pinst.append(instance)
1772

    
1773
      for snode in inst_config.secondary_nodes:
1774
        nimg = node_image[snode]
1775
        nimg.sinst.append(instance)
1776
        if pnode not in nimg.sbp:
1777
          nimg.sbp[pnode] = []
1778
        nimg.sbp[pnode].append(instance)
1779

    
1780
    # At this point, we have the in-memory data structures complete,
1781
    # except for the runtime information, which we'll gather next
1782

    
1783
    # Due to the way our RPC system works, exact response times cannot be
1784
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1785
    # time before and after executing the request, we can at least have a time
1786
    # window.
1787
    nvinfo_starttime = time.time()
1788
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1789
                                           self.cfg.GetClusterName())
1790
    nvinfo_endtime = time.time()
1791

    
1792
    all_drbd_map = self.cfg.ComputeDRBDMap()
1793

    
1794
    feedback_fn("* Verifying node status")
1795
    for node_i in nodeinfo:
1796
      node = node_i.name
1797
      nimg = node_image[node]
1798

    
1799
      if node_i.offline:
1800
        if verbose:
1801
          feedback_fn("* Skipping offline node %s" % (node,))
1802
        n_offline += 1
1803
        continue
1804

    
1805
      if node == master_node:
1806
        ntype = "master"
1807
      elif node_i.master_candidate:
1808
        ntype = "master candidate"
1809
      elif node_i.drained:
1810
        ntype = "drained"
1811
        n_drained += 1
1812
      else:
1813
        ntype = "regular"
1814
      if verbose:
1815
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1816

    
1817
      msg = all_nvinfo[node].fail_msg
1818
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1819
      if msg:
1820
        nimg.rpc_fail = True
1821
        continue
1822

    
1823
      nresult = all_nvinfo[node].payload
1824

    
1825
      nimg.call_ok = self._VerifyNode(node_i, nresult)
1826
      self._VerifyNodeNetwork(node_i, nresult)
1827
      self._VerifyNodeLVM(node_i, nresult, vg_name)
1828
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1829
                            master_files)
1830
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1831
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1832

    
1833
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1834
      self._UpdateNodeInstances(node_i, nresult, nimg)
1835
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1836

    
1837
    feedback_fn("* Verifying instance status")
1838
    for instance in instancelist:
1839
      if verbose:
1840
        feedback_fn("* Verifying instance %s" % instance)
1841
      inst_config = instanceinfo[instance]
1842
      self._VerifyInstance(instance, inst_config, node_image)
1843
      inst_nodes_offline = []
1844

    
1845
      pnode = inst_config.primary_node
1846
      pnode_img = node_image[pnode]
1847
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1848
               self.ENODERPC, pnode, "instance %s, connection to"
1849
               " primary node failed", instance)
1850

    
1851
      if pnode_img.offline:
1852
        inst_nodes_offline.append(pnode)
1853

    
1854
      # If the instance is non-redundant we cannot survive losing its primary
1855
      # node, so we are not N+1 compliant. On the other hand we have no disk
1856
      # templates with more than one secondary so that situation is not well
1857
      # supported either.
1858
      # FIXME: does not support file-backed instances
1859
      if not inst_config.secondary_nodes:
1860
        i_non_redundant.append(instance)
1861
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1862
               instance, "instance has multiple secondary nodes: %s",
1863
               utils.CommaJoin(inst_config.secondary_nodes),
1864
               code=self.ETYPE_WARNING)
1865

    
1866
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1867
        i_non_a_balanced.append(instance)
1868

    
1869
      for snode in inst_config.secondary_nodes:
1870
        s_img = node_image[snode]
1871
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1872
                 "instance %s, connection to secondary node failed", instance)
1873

    
1874
        if s_img.offline:
1875
          inst_nodes_offline.append(snode)
1876

    
1877
      # warn that the instance lives on offline nodes
1878
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1879
               "instance lives on offline node(s) %s",
1880
               utils.CommaJoin(inst_nodes_offline))
1881
      # ... or ghost nodes
1882
      for node in inst_config.all_nodes:
1883
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1884
                 "instance lives on ghost node %s", node)
1885

    
1886
    feedback_fn("* Verifying orphan volumes")
1887
    self._VerifyOrphanVolumes(node_vol_should, node_image)
1888

    
1889
    feedback_fn("* Verifying orphan instances")
1890
    self._VerifyOrphanInstances(instancelist, node_image)
1891

    
1892
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1893
      feedback_fn("* Verifying N+1 Memory redundancy")
1894
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
1895

    
1896
    feedback_fn("* Other Notes")
1897
    if i_non_redundant:
1898
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1899
                  % len(i_non_redundant))
1900

    
1901
    if i_non_a_balanced:
1902
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1903
                  % len(i_non_a_balanced))
1904

    
1905
    if n_offline:
1906
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
1907

    
1908
    if n_drained:
1909
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
1910

    
1911
    return not self.bad
1912

    
1913
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1914
    """Analyze the post-hooks' result
1915

1916
    This method analyses the hook result, handles it, and sends some
1917
    nicely-formatted feedback back to the user.
1918

1919
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1920
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1921
    @param hooks_results: the results of the multi-node hooks rpc call
1922
    @param feedback_fn: function used send feedback back to the caller
1923
    @param lu_result: previous Exec result
1924
    @return: the new Exec result, based on the previous result
1925
        and hook results
1926

1927
    """
1928
    # We only really run POST phase hooks, and are only interested in
1929
    # their results
1930
    if phase == constants.HOOKS_PHASE_POST:
1931
      # Used to change hooks' output to proper indentation
1932
      indent_re = re.compile('^', re.M)
1933
      feedback_fn("* Hooks Results")
1934
      assert hooks_results, "invalid result from hooks"
1935

    
1936
      for node_name in hooks_results:
1937
        res = hooks_results[node_name]
1938
        msg = res.fail_msg
1939
        test = msg and not res.offline
1940
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1941
                      "Communication failure in hooks execution: %s", msg)
1942
        if res.offline or msg:
1943
          # No need to investigate payload if node is offline or gave an error.
1944
          # override manually lu_result here as _ErrorIf only
1945
          # overrides self.bad
1946
          lu_result = 1
1947
          continue
1948
        for script, hkr, output in res.payload:
1949
          test = hkr == constants.HKR_FAIL
1950
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1951
                        "Script %s failed, output:", script)
1952
          if test:
1953
            output = indent_re.sub('      ', output)
1954
            feedback_fn("%s" % output)
1955
            lu_result = 0
1956

    
1957
      return lu_result
1958

    
1959

    
1960
class LUVerifyDisks(NoHooksLU):
1961
  """Verifies the cluster disks status.
1962

1963
  """
1964
  _OP_REQP = []
1965
  REQ_BGL = False
1966

    
1967
  def ExpandNames(self):
1968
    self.needed_locks = {
1969
      locking.LEVEL_NODE: locking.ALL_SET,
1970
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1971
    }
1972
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1973

    
1974
  def CheckPrereq(self):
1975
    """Check prerequisites.
1976

1977
    This has no prerequisites.
1978

1979
    """
1980
    pass
1981

    
1982
  def Exec(self, feedback_fn):
1983
    """Verify integrity of cluster disks.
1984

1985
    @rtype: tuple of three items
1986
    @return: a tuple of (dict of node-to-node_error, list of instances
1987
        which need activate-disks, dict of instance: (node, volume) for
1988
        missing volumes
1989

1990
    """
1991
    result = res_nodes, res_instances, res_missing = {}, [], {}
1992

    
1993
    vg_name = self.cfg.GetVGName()
1994
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1995
    instances = [self.cfg.GetInstanceInfo(name)
1996
                 for name in self.cfg.GetInstanceList()]
1997

    
1998
    nv_dict = {}
1999
    for inst in instances:
2000
      inst_lvs = {}
2001
      if (not inst.admin_up or
2002
          inst.disk_template not in constants.DTS_NET_MIRROR):
2003
        continue
2004
      inst.MapLVsByNode(inst_lvs)
2005
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2006
      for node, vol_list in inst_lvs.iteritems():
2007
        for vol in vol_list:
2008
          nv_dict[(node, vol)] = inst
2009

    
2010
    if not nv_dict:
2011
      return result
2012

    
2013
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2014

    
2015
    for node in nodes:
2016
      # node_volume
2017
      node_res = node_lvs[node]
2018
      if node_res.offline:
2019
        continue
2020
      msg = node_res.fail_msg
2021
      if msg:
2022
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2023
        res_nodes[node] = msg
2024
        continue
2025

    
2026
      lvs = node_res.payload
2027
      for lv_name, (_, _, lv_online) in lvs.items():
2028
        inst = nv_dict.pop((node, lv_name), None)
2029
        if (not lv_online and inst is not None
2030
            and inst.name not in res_instances):
2031
          res_instances.append(inst.name)
2032

    
2033
    # any leftover items in nv_dict are missing LVs, let's arrange the
2034
    # data better
2035
    for key, inst in nv_dict.iteritems():
2036
      if inst.name not in res_missing:
2037
        res_missing[inst.name] = []
2038
      res_missing[inst.name].append(key)
2039

    
2040
    return result
2041

    
2042

    
2043
class LURepairDiskSizes(NoHooksLU):
2044
  """Verifies the cluster disks sizes.
2045

2046
  """
2047
  _OP_REQP = ["instances"]
2048
  REQ_BGL = False
2049

    
2050
  def ExpandNames(self):
2051
    if not isinstance(self.op.instances, list):
2052
      raise errors.OpPrereqError("Invalid argument type 'instances'",
2053
                                 errors.ECODE_INVAL)
2054

    
2055
    if self.op.instances:
2056
      self.wanted_names = []
2057
      for name in self.op.instances:
2058
        full_name = _ExpandInstanceName(self.cfg, name)
2059
        self.wanted_names.append(full_name)
2060
      self.needed_locks = {
2061
        locking.LEVEL_NODE: [],
2062
        locking.LEVEL_INSTANCE: self.wanted_names,
2063
        }
2064
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2065
    else:
2066
      self.wanted_names = None
2067
      self.needed_locks = {
2068
        locking.LEVEL_NODE: locking.ALL_SET,
2069
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2070
        }
2071
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2072

    
2073
  def DeclareLocks(self, level):
2074
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2075
      self._LockInstancesNodes(primary_only=True)
2076

    
2077
  def CheckPrereq(self):
2078
    """Check prerequisites.
2079

2080
    This only checks the optional instance list against the existing names.
2081

2082
    """
2083
    if self.wanted_names is None:
2084
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2085

    
2086
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2087
                             in self.wanted_names]
2088

    
2089
  def _EnsureChildSizes(self, disk):
2090
    """Ensure children of the disk have the needed disk size.
2091

2092
    This is valid mainly for DRBD8 and fixes an issue where the
2093
    children have smaller disk size.
2094

2095
    @param disk: an L{ganeti.objects.Disk} object
2096

2097
    """
2098
    if disk.dev_type == constants.LD_DRBD8:
2099
      assert disk.children, "Empty children for DRBD8?"
2100
      fchild = disk.children[0]
2101
      mismatch = fchild.size < disk.size
2102
      if mismatch:
2103
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2104
                     fchild.size, disk.size)
2105
        fchild.size = disk.size
2106

    
2107
      # and we recurse on this child only, not on the metadev
2108
      return self._EnsureChildSizes(fchild) or mismatch
2109
    else:
2110
      return False
2111

    
2112
  def Exec(self, feedback_fn):
2113
    """Verify the size of cluster disks.
2114

2115
    """
2116
    # TODO: check child disks too
2117
    # TODO: check differences in size between primary/secondary nodes
2118
    per_node_disks = {}
2119
    for instance in self.wanted_instances:
2120
      pnode = instance.primary_node
2121
      if pnode not in per_node_disks:
2122
        per_node_disks[pnode] = []
2123
      for idx, disk in enumerate(instance.disks):
2124
        per_node_disks[pnode].append((instance, idx, disk))
2125

    
2126
    changed = []
2127
    for node, dskl in per_node_disks.items():
2128
      newl = [v[2].Copy() for v in dskl]
2129
      for dsk in newl:
2130
        self.cfg.SetDiskID(dsk, node)
2131
      result = self.rpc.call_blockdev_getsizes(node, newl)
2132
      if result.fail_msg:
2133
        self.LogWarning("Failure in blockdev_getsizes call to node"
2134
                        " %s, ignoring", node)
2135
        continue
2136
      if len(result.data) != len(dskl):
2137
        self.LogWarning("Invalid result from node %s, ignoring node results",
2138
                        node)
2139
        continue
2140
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2141
        if size is None:
2142
          self.LogWarning("Disk %d of instance %s did not return size"
2143
                          " information, ignoring", idx, instance.name)
2144
          continue
2145
        if not isinstance(size, (int, long)):
2146
          self.LogWarning("Disk %d of instance %s did not return valid"
2147
                          " size information, ignoring", idx, instance.name)
2148
          continue
2149
        size = size >> 20
2150
        if size != disk.size:
2151
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2152
                       " correcting: recorded %d, actual %d", idx,
2153
                       instance.name, disk.size, size)
2154
          disk.size = size
2155
          self.cfg.Update(instance, feedback_fn)
2156
          changed.append((instance.name, idx, size))
2157
        if self._EnsureChildSizes(disk):
2158
          self.cfg.Update(instance, feedback_fn)
2159
          changed.append((instance.name, idx, disk.size))
2160
    return changed
2161

    
2162

    
2163
class LURenameCluster(LogicalUnit):
2164
  """Rename the cluster.
2165

2166
  """
2167
  HPATH = "cluster-rename"
2168
  HTYPE = constants.HTYPE_CLUSTER
2169
  _OP_REQP = ["name"]
2170

    
2171
  def BuildHooksEnv(self):
2172
    """Build hooks env.
2173

2174
    """
2175
    env = {
2176
      "OP_TARGET": self.cfg.GetClusterName(),
2177
      "NEW_NAME": self.op.name,
2178
      }
2179
    mn = self.cfg.GetMasterNode()
2180
    all_nodes = self.cfg.GetNodeList()
2181
    return env, [mn], all_nodes
2182

    
2183
  def CheckPrereq(self):
2184
    """Verify that the passed name is a valid one.
2185

2186
    """
2187
    hostname = utils.GetHostInfo(self.op.name)
2188

    
2189
    new_name = hostname.name
2190
    self.ip = new_ip = hostname.ip
2191
    old_name = self.cfg.GetClusterName()
2192
    old_ip = self.cfg.GetMasterIP()
2193
    if new_name == old_name and new_ip == old_ip:
2194
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2195
                                 " cluster has changed",
2196
                                 errors.ECODE_INVAL)
2197
    if new_ip != old_ip:
2198
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2199
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2200
                                   " reachable on the network. Aborting." %
2201
                                   new_ip, errors.ECODE_NOTUNIQUE)
2202

    
2203
    self.op.name = new_name
2204

    
2205
  def Exec(self, feedback_fn):
2206
    """Rename the cluster.
2207

2208
    """
2209
    clustername = self.op.name
2210
    ip = self.ip
2211

    
2212
    # shutdown the master IP
2213
    master = self.cfg.GetMasterNode()
2214
    result = self.rpc.call_node_stop_master(master, False)
2215
    result.Raise("Could not disable the master role")
2216

    
2217
    try:
2218
      cluster = self.cfg.GetClusterInfo()
2219
      cluster.cluster_name = clustername
2220
      cluster.master_ip = ip
2221
      self.cfg.Update(cluster, feedback_fn)
2222

    
2223
      # update the known hosts file
2224
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2225
      node_list = self.cfg.GetNodeList()
2226
      try:
2227
        node_list.remove(master)
2228
      except ValueError:
2229
        pass
2230
      result = self.rpc.call_upload_file(node_list,
2231
                                         constants.SSH_KNOWN_HOSTS_FILE)
2232
      for to_node, to_result in result.iteritems():
2233
        msg = to_result.fail_msg
2234
        if msg:
2235
          msg = ("Copy of file %s to node %s failed: %s" %
2236
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2237
          self.proc.LogWarning(msg)
2238

    
2239
    finally:
2240
      result = self.rpc.call_node_start_master(master, False, False)
2241
      msg = result.fail_msg
2242
      if msg:
2243
        self.LogWarning("Could not re-enable the master role on"
2244
                        " the master, please restart manually: %s", msg)
2245

    
2246

    
2247
def _RecursiveCheckIfLVMBased(disk):
2248
  """Check if the given disk or its children are lvm-based.
2249

2250
  @type disk: L{objects.Disk}
2251
  @param disk: the disk to check
2252
  @rtype: boolean
2253
  @return: boolean indicating whether a LD_LV dev_type was found or not
2254

2255
  """
2256
  if disk.children:
2257
    for chdisk in disk.children:
2258
      if _RecursiveCheckIfLVMBased(chdisk):
2259
        return True
2260
  return disk.dev_type == constants.LD_LV
2261

    
2262

    
2263
class LUSetClusterParams(LogicalUnit):
2264
  """Change the parameters of the cluster.
2265

2266
  """
2267
  HPATH = "cluster-modify"
2268
  HTYPE = constants.HTYPE_CLUSTER
2269
  _OP_REQP = []
2270
  REQ_BGL = False
2271

    
2272
  def CheckArguments(self):
2273
    """Check parameters
2274

2275
    """
2276
    for attr in ["candidate_pool_size",
2277
                 "uid_pool", "add_uids", "remove_uids"]:
2278
      if not hasattr(self.op, attr):
2279
        setattr(self.op, attr, None)
2280

    
2281
    if self.op.candidate_pool_size is not None:
2282
      try:
2283
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2284
      except (ValueError, TypeError), err:
2285
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2286
                                   str(err), errors.ECODE_INVAL)
2287
      if self.op.candidate_pool_size < 1:
2288
        raise errors.OpPrereqError("At least one master candidate needed",
2289
                                   errors.ECODE_INVAL)
2290

    
2291
    _CheckBooleanOpField(self.op, "maintain_node_health")
2292

    
2293
    if self.op.uid_pool:
2294
      uidpool.CheckUidPool(self.op.uid_pool)
2295

    
2296
    if self.op.add_uids:
2297
      uidpool.CheckUidPool(self.op.add_uids)
2298

    
2299
    if self.op.remove_uids:
2300
      uidpool.CheckUidPool(self.op.remove_uids)
2301

    
2302
  def ExpandNames(self):
2303
    # FIXME: in the future maybe other cluster params won't require checking on
2304
    # all nodes to be modified.
2305
    self.needed_locks = {
2306
      locking.LEVEL_NODE: locking.ALL_SET,
2307
    }
2308
    self.share_locks[locking.LEVEL_NODE] = 1
2309

    
2310
  def BuildHooksEnv(self):
2311
    """Build hooks env.
2312

2313
    """
2314
    env = {
2315
      "OP_TARGET": self.cfg.GetClusterName(),
2316
      "NEW_VG_NAME": self.op.vg_name,
2317
      }
2318
    mn = self.cfg.GetMasterNode()
2319
    return env, [mn], [mn]
2320

    
2321
  def CheckPrereq(self):
2322
    """Check prerequisites.
2323

2324
    This checks whether the given params don't conflict and
2325
    if the given volume group is valid.
2326

2327
    """
2328
    if self.op.vg_name is not None and not self.op.vg_name:
2329
      instances = self.cfg.GetAllInstancesInfo().values()
2330
      for inst in instances:
2331
        for disk in inst.disks:
2332
          if _RecursiveCheckIfLVMBased(disk):
2333
            raise errors.OpPrereqError("Cannot disable lvm storage while"
2334
                                       " lvm-based instances exist",
2335
                                       errors.ECODE_INVAL)
2336

    
2337
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2338

    
2339
    # if vg_name not None, checks given volume group on all nodes
2340
    if self.op.vg_name:
2341
      vglist = self.rpc.call_vg_list(node_list)
2342
      for node in node_list:
2343
        msg = vglist[node].fail_msg
2344
        if msg:
2345
          # ignoring down node
2346
          self.LogWarning("Error while gathering data on node %s"
2347
                          " (ignoring node): %s", node, msg)
2348
          continue
2349
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2350
                                              self.op.vg_name,
2351
                                              constants.MIN_VG_SIZE)
2352
        if vgstatus:
2353
          raise errors.OpPrereqError("Error on node '%s': %s" %
2354
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2355

    
2356
    self.cluster = cluster = self.cfg.GetClusterInfo()
2357
    # validate params changes
2358
    if self.op.beparams:
2359
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2360
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2361

    
2362
    if self.op.nicparams:
2363
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2364
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2365
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2366
      nic_errors = []
2367

    
2368
      # check all instances for consistency
2369
      for instance in self.cfg.GetAllInstancesInfo().values():
2370
        for nic_idx, nic in enumerate(instance.nics):
2371
          params_copy = copy.deepcopy(nic.nicparams)
2372
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2373

    
2374
          # check parameter syntax
2375
          try:
2376
            objects.NIC.CheckParameterSyntax(params_filled)
2377
          except errors.ConfigurationError, err:
2378
            nic_errors.append("Instance %s, nic/%d: %s" %
2379
                              (instance.name, nic_idx, err))
2380

    
2381
          # if we're moving instances to routed, check that they have an ip
2382
          target_mode = params_filled[constants.NIC_MODE]
2383
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2384
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2385
                              (instance.name, nic_idx))
2386
      if nic_errors:
2387
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2388
                                   "\n".join(nic_errors))
2389

    
2390
    # hypervisor list/parameters
2391
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2392
    if self.op.hvparams:
2393
      if not isinstance(self.op.hvparams, dict):
2394
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2395
                                   errors.ECODE_INVAL)
2396
      for hv_name, hv_dict in self.op.hvparams.items():
2397
        if hv_name not in self.new_hvparams:
2398
          self.new_hvparams[hv_name] = hv_dict
2399
        else:
2400
          self.new_hvparams[hv_name].update(hv_dict)
2401

    
2402
    # os hypervisor parameters
2403
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2404
    if self.op.os_hvp:
2405
      if not isinstance(self.op.os_hvp, dict):
2406
        raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2407
                                   errors.ECODE_INVAL)
2408
      for os_name, hvs in self.op.os_hvp.items():
2409
        if not isinstance(hvs, dict):
2410
          raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2411
                                      " input"), errors.ECODE_INVAL)
2412
        if os_name not in self.new_os_hvp:
2413
          self.new_os_hvp[os_name] = hvs
2414
        else:
2415
          for hv_name, hv_dict in hvs.items():
2416
            if hv_name not in self.new_os_hvp[os_name]:
2417
              self.new_os_hvp[os_name][hv_name] = hv_dict
2418
            else:
2419
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2420

    
2421
    # changes to the hypervisor list
2422
    if self.op.enabled_hypervisors is not None:
2423
      self.hv_list = self.op.enabled_hypervisors
2424
      if not self.hv_list:
2425
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2426
                                   " least one member",
2427
                                   errors.ECODE_INVAL)
2428
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2429
      if invalid_hvs:
2430
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2431
                                   " entries: %s" %
2432
                                   utils.CommaJoin(invalid_hvs),
2433
                                   errors.ECODE_INVAL)
2434
      for hv in self.hv_list:
2435
        # if the hypervisor doesn't already exist in the cluster
2436
        # hvparams, we initialize it to empty, and then (in both
2437
        # cases) we make sure to fill the defaults, as we might not
2438
        # have a complete defaults list if the hypervisor wasn't
2439
        # enabled before
2440
        if hv not in new_hvp:
2441
          new_hvp[hv] = {}
2442
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2443
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2444
    else:
2445
      self.hv_list = cluster.enabled_hypervisors
2446

    
2447
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2448
      # either the enabled list has changed, or the parameters have, validate
2449
      for hv_name, hv_params in self.new_hvparams.items():
2450
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2451
            (self.op.enabled_hypervisors and
2452
             hv_name in self.op.enabled_hypervisors)):
2453
          # either this is a new hypervisor, or its parameters have changed
2454
          hv_class = hypervisor.GetHypervisor(hv_name)
2455
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2456
          hv_class.CheckParameterSyntax(hv_params)
2457
          _CheckHVParams(self, node_list, hv_name, hv_params)
2458

    
2459
    if self.op.os_hvp:
2460
      # no need to check any newly-enabled hypervisors, since the
2461
      # defaults have already been checked in the above code-block
2462
      for os_name, os_hvp in self.new_os_hvp.items():
2463
        for hv_name, hv_params in os_hvp.items():
2464
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2465
          # we need to fill in the new os_hvp on top of the actual hv_p
2466
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2467
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2468
          hv_class = hypervisor.GetHypervisor(hv_name)
2469
          hv_class.CheckParameterSyntax(new_osp)
2470
          _CheckHVParams(self, node_list, hv_name, new_osp)
2471

    
2472

    
2473
  def Exec(self, feedback_fn):
2474
    """Change the parameters of the cluster.
2475

2476
    """
2477
    if self.op.vg_name is not None:
2478
      new_volume = self.op.vg_name
2479
      if not new_volume:
2480
        new_volume = None
2481
      if new_volume != self.cfg.GetVGName():
2482
        self.cfg.SetVGName(new_volume)
2483
      else:
2484
        feedback_fn("Cluster LVM configuration already in desired"
2485
                    " state, not changing")
2486
    if self.op.hvparams:
2487
      self.cluster.hvparams = self.new_hvparams
2488
    if self.op.os_hvp:
2489
      self.cluster.os_hvp = self.new_os_hvp
2490
    if self.op.enabled_hypervisors is not None:
2491
      self.cluster.hvparams = self.new_hvparams
2492
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2493
    if self.op.beparams:
2494
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2495
    if self.op.nicparams:
2496
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2497

    
2498
    if self.op.candidate_pool_size is not None:
2499
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2500
      # we need to update the pool size here, otherwise the save will fail
2501
      _AdjustCandidatePool(self, [])
2502

    
2503
    if self.op.maintain_node_health is not None:
2504
      self.cluster.maintain_node_health = self.op.maintain_node_health
2505

    
2506
    if self.op.add_uids is not None:
2507
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2508

    
2509
    if self.op.remove_uids is not None:
2510
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2511

    
2512
    if self.op.uid_pool is not None:
2513
      self.cluster.uid_pool = self.op.uid_pool
2514

    
2515
    self.cfg.Update(self.cluster, feedback_fn)
2516

    
2517

    
2518
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2519
  """Distribute additional files which are part of the cluster configuration.
2520

2521
  ConfigWriter takes care of distributing the config and ssconf files, but
2522
  there are more files which should be distributed to all nodes. This function
2523
  makes sure those are copied.
2524

2525
  @param lu: calling logical unit
2526
  @param additional_nodes: list of nodes not in the config to distribute to
2527

2528
  """
2529
  # 1. Gather target nodes
2530
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2531
  dist_nodes = lu.cfg.GetOnlineNodeList()
2532
  if additional_nodes is not None:
2533
    dist_nodes.extend(additional_nodes)
2534
  if myself.name in dist_nodes:
2535
    dist_nodes.remove(myself.name)
2536

    
2537
  # 2. Gather files to distribute
2538
  dist_files = set([constants.ETC_HOSTS,
2539
                    constants.SSH_KNOWN_HOSTS_FILE,
2540
                    constants.RAPI_CERT_FILE,
2541
                    constants.RAPI_USERS_FILE,
2542
                    constants.CONFD_HMAC_KEY,
2543
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
2544
                   ])
2545

    
2546
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2547
  for hv_name in enabled_hypervisors:
2548
    hv_class = hypervisor.GetHypervisor(hv_name)
2549
    dist_files.update(hv_class.GetAncillaryFiles())
2550

    
2551
  # 3. Perform the files upload
2552
  for fname in dist_files:
2553
    if os.path.exists(fname):
2554
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2555
      for to_node, to_result in result.items():
2556
        msg = to_result.fail_msg
2557
        if msg:
2558
          msg = ("Copy of file %s to node %s failed: %s" %
2559
                 (fname, to_node, msg))
2560
          lu.proc.LogWarning(msg)
2561

    
2562

    
2563
class LURedistributeConfig(NoHooksLU):
2564
  """Force the redistribution of cluster configuration.
2565

2566
  This is a very simple LU.
2567

2568
  """
2569
  _OP_REQP = []
2570
  REQ_BGL = False
2571

    
2572
  def ExpandNames(self):
2573
    self.needed_locks = {
2574
      locking.LEVEL_NODE: locking.ALL_SET,
2575
    }
2576
    self.share_locks[locking.LEVEL_NODE] = 1
2577

    
2578
  def CheckPrereq(self):
2579
    """Check prerequisites.
2580

2581
    """
2582

    
2583
  def Exec(self, feedback_fn):
2584
    """Redistribute the configuration.
2585

2586
    """
2587
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2588
    _RedistributeAncillaryFiles(self)
2589

    
2590

    
2591
def _WaitForSync(lu, instance, disks=None, oneshot=False):
2592
  """Sleep and poll for an instance's disk to sync.
2593

2594
  """
2595
  if not instance.disks or disks is not None and not disks:
2596
    return True
2597

    
2598
  disks = _ExpandCheckDisks(instance, disks)
2599

    
2600
  if not oneshot:
2601
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2602

    
2603
  node = instance.primary_node
2604

    
2605
  for dev in disks:
2606
    lu.cfg.SetDiskID(dev, node)
2607

    
2608
  # TODO: Convert to utils.Retry
2609

    
2610
  retries = 0
2611
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2612
  while True:
2613
    max_time = 0
2614
    done = True
2615
    cumul_degraded = False
2616
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2617
    msg = rstats.fail_msg
2618
    if msg:
2619
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2620
      retries += 1
2621
      if retries >= 10:
2622
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2623
                                 " aborting." % node)
2624
      time.sleep(6)
2625
      continue
2626
    rstats = rstats.payload
2627
    retries = 0
2628
    for i, mstat in enumerate(rstats):
2629
      if mstat is None:
2630
        lu.LogWarning("Can't compute data for node %s/%s",
2631
                           node, disks[i].iv_name)
2632
        continue
2633

    
2634
      cumul_degraded = (cumul_degraded or
2635
                        (mstat.is_degraded and mstat.sync_percent is None))
2636
      if mstat.sync_percent is not None:
2637
        done = False
2638
        if mstat.estimated_time is not None:
2639
          rem_time = ("%s remaining (estimated)" %
2640
                      utils.FormatSeconds(mstat.estimated_time))
2641
          max_time = mstat.estimated_time
2642
        else:
2643
          rem_time = "no time estimate"
2644
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2645
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
2646

    
2647
    # if we're done but degraded, let's do a few small retries, to
2648
    # make sure we see a stable and not transient situation; therefore
2649
    # we force restart of the loop
2650
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2651
      logging.info("Degraded disks found, %d retries left", degr_retries)
2652
      degr_retries -= 1
2653
      time.sleep(1)
2654
      continue
2655

    
2656
    if done or oneshot:
2657
      break
2658

    
2659
    time.sleep(min(60, max_time))
2660

    
2661
  if done:
2662
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2663
  return not cumul_degraded
2664

    
2665

    
2666
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2667
  """Check that mirrors are not degraded.
2668

2669
  The ldisk parameter, if True, will change the test from the
2670
  is_degraded attribute (which represents overall non-ok status for
2671
  the device(s)) to the ldisk (representing the local storage status).
2672

2673
  """
2674
  lu.cfg.SetDiskID(dev, node)
2675

    
2676
  result = True
2677

    
2678
  if on_primary or dev.AssembleOnSecondary():
2679
    rstats = lu.rpc.call_blockdev_find(node, dev)
2680
    msg = rstats.fail_msg
2681
    if msg:
2682
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2683
      result = False
2684
    elif not rstats.payload:
2685
      lu.LogWarning("Can't find disk on node %s", node)
2686
      result = False
2687
    else:
2688
      if ldisk:
2689
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2690
      else:
2691
        result = result and not rstats.payload.is_degraded
2692

    
2693
  if dev.children:
2694
    for child in dev.children:
2695
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2696

    
2697
  return result
2698

    
2699

    
2700
class LUDiagnoseOS(NoHooksLU):
2701
  """Logical unit for OS diagnose/query.
2702

2703
  """
2704
  _OP_REQP = ["output_fields", "names"]
2705
  REQ_BGL = False
2706
  _FIELDS_STATIC = utils.FieldSet()
2707
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2708
  # Fields that need calculation of global os validity
2709
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2710

    
2711
  def ExpandNames(self):
2712
    if self.op.names:
2713
      raise errors.OpPrereqError("Selective OS query not supported",
2714
                                 errors.ECODE_INVAL)
2715

    
2716
    _CheckOutputFields(static=self._FIELDS_STATIC,
2717
                       dynamic=self._FIELDS_DYNAMIC,
2718
                       selected=self.op.output_fields)
2719

    
2720
    # Lock all nodes, in shared mode
2721
    # Temporary removal of locks, should be reverted later
2722
    # TODO: reintroduce locks when they are lighter-weight
2723
    self.needed_locks = {}
2724
    #self.share_locks[locking.LEVEL_NODE] = 1
2725
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2726

    
2727
  def CheckPrereq(self):
2728
    """Check prerequisites.
2729

2730
    """
2731

    
2732
  @staticmethod
2733
  def _DiagnoseByOS(rlist):
2734
    """Remaps a per-node return list into an a per-os per-node dictionary
2735

2736
    @param rlist: a map with node names as keys and OS objects as values
2737

2738
    @rtype: dict
2739
    @return: a dictionary with osnames as keys and as value another map, with
2740
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2741

2742
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2743
                                     (/srv/..., False, "invalid api")],
2744
                           "node2": [(/srv/..., True, "")]}
2745
          }
2746

2747
    """
2748
    all_os = {}
2749
    # we build here the list of nodes that didn't fail the RPC (at RPC
2750
    # level), so that nodes with a non-responding node daemon don't
2751
    # make all OSes invalid
2752
    good_nodes = [node_name for node_name in rlist
2753
                  if not rlist[node_name].fail_msg]
2754
    for node_name, nr in rlist.items():
2755
      if nr.fail_msg or not nr.payload:
2756
        continue
2757
      for name, path, status, diagnose, variants in nr.payload:
2758
        if name not in all_os:
2759
          # build a list of nodes for this os containing empty lists
2760
          # for each node in node_list
2761
          all_os[name] = {}
2762
          for nname in good_nodes:
2763
            all_os[name][nname] = []
2764
        all_os[name][node_name].append((path, status, diagnose, variants))
2765
    return all_os
2766

    
2767
  def Exec(self, feedback_fn):
2768
    """Compute the list of OSes.
2769

2770
    """
2771
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2772
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2773
    pol = self._DiagnoseByOS(node_data)
2774
    output = []
2775
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2776
    calc_variants = "variants" in self.op.output_fields
2777

    
2778
    for os_name, os_data in pol.items():
2779
      row = []
2780
      if calc_valid:
2781
        valid = True
2782
        variants = None
2783
        for osl in os_data.values():
2784
          valid = bool(valid and osl and osl[0][1])
2785
          if not valid:
2786
            variants = set()
2787
            break
2788
          if calc_variants:
2789
            node_variants = osl[0][3]
2790
            if variants is None:
2791
              variants = set(node_variants)
2792
            else:
2793
              variants.intersection_update(node_variants)
2794

    
2795
      for field in self.op.output_fields:
2796
        if field == "name":
2797
          val = os_name
2798
        elif field == "valid":
2799
          val = valid
2800
        elif field == "node_status":
2801
          # this is just a copy of the dict
2802
          val = {}
2803
          for node_name, nos_list in os_data.items():
2804
            val[node_name] = nos_list
2805
        elif field == "variants":
2806
          val = list(variants)
2807
        else:
2808
          raise errors.ParameterError(field)
2809
        row.append(val)
2810
      output.append(row)
2811

    
2812
    return output
2813

    
2814

    
2815
class LURemoveNode(LogicalUnit):
2816
  """Logical unit for removing a node.
2817

2818
  """
2819
  HPATH = "node-remove"
2820
  HTYPE = constants.HTYPE_NODE
2821
  _OP_REQP = ["node_name"]
2822

    
2823
  def BuildHooksEnv(self):
2824
    """Build hooks env.
2825

2826
    This doesn't run on the target node in the pre phase as a failed
2827
    node would then be impossible to remove.
2828

2829
    """
2830
    env = {
2831
      "OP_TARGET": self.op.node_name,
2832
      "NODE_NAME": self.op.node_name,
2833
      }
2834
    all_nodes = self.cfg.GetNodeList()
2835
    try:
2836
      all_nodes.remove(self.op.node_name)
2837
    except ValueError:
2838
      logging.warning("Node %s which is about to be removed not found"
2839
                      " in the all nodes list", self.op.node_name)
2840
    return env, all_nodes, all_nodes
2841

    
2842
  def CheckPrereq(self):
2843
    """Check prerequisites.
2844

2845
    This checks:
2846
     - the node exists in the configuration
2847
     - it does not have primary or secondary instances
2848
     - it's not the master
2849

2850
    Any errors are signaled by raising errors.OpPrereqError.
2851

2852
    """
2853
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2854
    node = self.cfg.GetNodeInfo(self.op.node_name)
2855
    assert node is not None
2856

    
2857
    instance_list = self.cfg.GetInstanceList()
2858

    
2859
    masternode = self.cfg.GetMasterNode()
2860
    if node.name == masternode:
2861
      raise errors.OpPrereqError("Node is the master node,"
2862
                                 " you need to failover first.",
2863
                                 errors.ECODE_INVAL)
2864

    
2865
    for instance_name in instance_list:
2866
      instance = self.cfg.GetInstanceInfo(instance_name)
2867
      if node.name in instance.all_nodes:
2868
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2869
                                   " please remove first." % instance_name,
2870
                                   errors.ECODE_INVAL)
2871
    self.op.node_name = node.name
2872
    self.node = node
2873

    
2874
  def Exec(self, feedback_fn):
2875
    """Removes the node from the cluster.
2876

2877
    """
2878
    node = self.node
2879
    logging.info("Stopping the node daemon and removing configs from node %s",
2880
                 node.name)
2881

    
2882
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2883

    
2884
    # Promote nodes to master candidate as needed
2885
    _AdjustCandidatePool(self, exceptions=[node.name])
2886
    self.context.RemoveNode(node.name)
2887

    
2888
    # Run post hooks on the node before it's removed
2889
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2890
    try:
2891
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2892
    except:
2893
      # pylint: disable-msg=W0702
2894
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2895

    
2896
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2897
    msg = result.fail_msg
2898
    if msg:
2899
      self.LogWarning("Errors encountered on the remote node while leaving"
2900
                      " the cluster: %s", msg)
2901

    
2902
    # Remove node from our /etc/hosts
2903
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2904
      # FIXME: this should be done via an rpc call to node daemon
2905
      utils.RemoveHostFromEtcHosts(node.name)
2906
      _RedistributeAncillaryFiles(self)
2907

    
2908

    
2909
class LUQueryNodes(NoHooksLU):
2910
  """Logical unit for querying nodes.
2911

2912
  """
2913
  # pylint: disable-msg=W0142
2914
  _OP_REQP = ["output_fields", "names", "use_locking"]
2915
  REQ_BGL = False
2916

    
2917
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2918
                    "master_candidate", "offline", "drained"]
2919

    
2920
  _FIELDS_DYNAMIC = utils.FieldSet(
2921
    "dtotal", "dfree",
2922
    "mtotal", "mnode", "mfree",
2923
    "bootid",
2924
    "ctotal", "cnodes", "csockets",
2925
    )
2926

    
2927
  _FIELDS_STATIC = utils.FieldSet(*[
2928
    "pinst_cnt", "sinst_cnt",
2929
    "pinst_list", "sinst_list",
2930
    "pip", "sip", "tags",
2931
    "master",
2932
    "role"] + _SIMPLE_FIELDS
2933
    )
2934

    
2935
  def ExpandNames(self):
2936
    _CheckOutputFields(static=self._FIELDS_STATIC,
2937
                       dynamic=self._FIELDS_DYNAMIC,
2938
                       selected=self.op.output_fields)
2939

    
2940
    self.needed_locks = {}
2941
    self.share_locks[locking.LEVEL_NODE] = 1
2942

    
2943
    if self.op.names:
2944
      self.wanted = _GetWantedNodes(self, self.op.names)
2945
    else:
2946
      self.wanted = locking.ALL_SET
2947

    
2948
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2949
    self.do_locking = self.do_node_query and self.op.use_locking
2950
    if self.do_locking:
2951
      # if we don't request only static fields, we need to lock the nodes
2952
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2953

    
2954
  def CheckPrereq(self):
2955
    """Check prerequisites.
2956

2957
    """
2958
    # The validation of the node list is done in the _GetWantedNodes,
2959
    # if non empty, and if empty, there's no validation to do
2960
    pass
2961

    
2962
  def Exec(self, feedback_fn):
2963
    """Computes the list of nodes and their attributes.
2964

2965
    """
2966
    all_info = self.cfg.GetAllNodesInfo()
2967
    if self.do_locking:
2968
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2969
    elif self.wanted != locking.ALL_SET:
2970
      nodenames = self.wanted
2971
      missing = set(nodenames).difference(all_info.keys())
2972
      if missing:
2973
        raise errors.OpExecError(
2974
          "Some nodes were removed before retrieving their data: %s" % missing)
2975
    else:
2976
      nodenames = all_info.keys()
2977

    
2978
    nodenames = utils.NiceSort(nodenames)
2979
    nodelist = [all_info[name] for name in nodenames]
2980

    
2981
    # begin data gathering
2982

    
2983
    if self.do_node_query:
2984
      live_data = {}
2985
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2986
                                          self.cfg.GetHypervisorType())
2987
      for name in nodenames:
2988
        nodeinfo = node_data[name]
2989
        if not nodeinfo.fail_msg and nodeinfo.payload:
2990
          nodeinfo = nodeinfo.payload
2991
          fn = utils.TryConvert
2992
          live_data[name] = {
2993
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2994
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2995
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2996
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2997
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2998
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2999
            "bootid": nodeinfo.get('bootid', None),
3000
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3001
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3002
            }
3003
        else:
3004
          live_data[name] = {}
3005
    else:
3006
      live_data = dict.fromkeys(nodenames, {})
3007

    
3008
    node_to_primary = dict([(name, set()) for name in nodenames])
3009
    node_to_secondary = dict([(name, set()) for name in nodenames])
3010

    
3011
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3012
                             "sinst_cnt", "sinst_list"))
3013
    if inst_fields & frozenset(self.op.output_fields):
3014
      inst_data = self.cfg.GetAllInstancesInfo()
3015

    
3016
      for inst in inst_data.values():
3017
        if inst.primary_node in node_to_primary:
3018
          node_to_primary[inst.primary_node].add(inst.name)
3019
        for secnode in inst.secondary_nodes:
3020
          if secnode in node_to_secondary:
3021
            node_to_secondary[secnode].add(inst.name)
3022

    
3023
    master_node = self.cfg.GetMasterNode()
3024

    
3025
    # end data gathering
3026

    
3027
    output = []
3028
    for node in nodelist:
3029
      node_output = []
3030
      for field in self.op.output_fields:
3031
        if field in self._SIMPLE_FIELDS:
3032
          val = getattr(node, field)
3033
        elif field == "pinst_list":
3034
          val = list(node_to_primary[node.name])
3035
        elif field == "sinst_list":
3036
          val = list(node_to_secondary[node.name])
3037
        elif field == "pinst_cnt":
3038
          val = len(node_to_primary[node.name])
3039
        elif field == "sinst_cnt":
3040
          val = len(node_to_secondary[node.name])
3041
        elif field == "pip":
3042
          val = node.primary_ip
3043
        elif field == "sip":
3044
          val = node.secondary_ip
3045
        elif field == "tags":
3046
          val = list(node.GetTags())
3047
        elif field == "master":
3048
          val = node.name == master_node
3049
        elif self._FIELDS_DYNAMIC.Matches(field):
3050
          val = live_data[node.name].get(field, None)
3051
        elif field == "role":
3052
          if node.name == master_node:
3053
            val = "M"
3054
          elif node.master_candidate:
3055
            val = "C"
3056
          elif node.drained:
3057
            val = "D"
3058
          elif node.offline:
3059
            val = "O"
3060
          else:
3061
            val = "R"
3062
        else:
3063
          raise errors.ParameterError(field)
3064
        node_output.append(val)
3065
      output.append(node_output)
3066

    
3067
    return output
3068

    
3069

    
3070
class LUQueryNodeVolumes(NoHooksLU):
3071
  """Logical unit for getting volumes on node(s).
3072

3073
  """
3074
  _OP_REQP = ["nodes", "output_fields"]
3075
  REQ_BGL = False
3076
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3077
  _FIELDS_STATIC = utils.FieldSet("node")
3078

    
3079
  def ExpandNames(self):
3080
    _CheckOutputFields(static=self._FIELDS_STATIC,
3081
                       dynamic=self._FIELDS_DYNAMIC,
3082
                       selected=self.op.output_fields)
3083

    
3084
    self.needed_locks = {}
3085
    self.share_locks[locking.LEVEL_NODE] = 1
3086
    if not self.op.nodes:
3087
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3088
    else:
3089
      self.needed_locks[locking.LEVEL_NODE] = \
3090
        _GetWantedNodes(self, self.op.nodes)
3091

    
3092
  def CheckPrereq(self):
3093
    """Check prerequisites.
3094

3095
    This checks that the fields required are valid output fields.
3096

3097
    """
3098
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3099

    
3100
  def Exec(self, feedback_fn):
3101
    """Computes the list of nodes and their attributes.
3102

3103
    """
3104
    nodenames = self.nodes
3105
    volumes = self.rpc.call_node_volumes(nodenames)
3106

    
3107
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3108
             in self.cfg.GetInstanceList()]
3109

    
3110
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3111

    
3112
    output = []
3113
    for node in nodenames:
3114
      nresult = volumes[node]
3115
      if nresult.offline:
3116
        continue
3117
      msg = nresult.fail_msg
3118
      if msg:
3119
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3120
        continue
3121

    
3122
      node_vols = nresult.payload[:]
3123
      node_vols.sort(key=lambda vol: vol['dev'])
3124

    
3125
      for vol in node_vols:
3126
        node_output = []
3127
        for field in self.op.output_fields:
3128
          if field == "node":
3129
            val = node
3130
          elif field == "phys":
3131
            val = vol['dev']
3132
          elif field == "vg":
3133
            val = vol['vg']
3134
          elif field == "name":
3135
            val = vol['name']
3136
          elif field == "size":
3137
            val = int(float(vol['size']))
3138
          elif field == "instance":
3139
            for inst in ilist:
3140
              if node not in lv_by_node[inst]:
3141
                continue
3142
              if vol['name'] in lv_by_node[inst][node]:
3143
                val = inst.name
3144
                break
3145
            else:
3146
              val = '-'
3147
          else:
3148
            raise errors.ParameterError(field)
3149
          node_output.append(str(val))
3150

    
3151
        output.append(node_output)
3152

    
3153
    return output
3154

    
3155

    
3156
class LUQueryNodeStorage(NoHooksLU):
3157
  """Logical unit for getting information on storage units on node(s).
3158

3159
  """
3160
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
3161
  REQ_BGL = False
3162
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3163

    
3164
  def CheckArguments(self):
3165
    _CheckStorageType(self.op.storage_type)
3166

    
3167
    _CheckOutputFields(static=self._FIELDS_STATIC,
3168
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3169
                       selected=self.op.output_fields)
3170

    
3171
  def ExpandNames(self):
3172
    self.needed_locks = {}
3173
    self.share_locks[locking.LEVEL_NODE] = 1
3174

    
3175
    if self.op.nodes:
3176
      self.needed_locks[locking.LEVEL_NODE] = \
3177
        _GetWantedNodes(self, self.op.nodes)
3178
    else:
3179
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3180

    
3181
  def CheckPrereq(self):
3182
    """Check prerequisites.
3183

3184
    This checks that the fields required are valid output fields.
3185

3186
    """
3187
    self.op.name = getattr(self.op, "name", None)
3188

    
3189
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3190

    
3191
  def Exec(self, feedback_fn):
3192
    """Computes the list of nodes and their attributes.
3193

3194
    """
3195
    # Always get name to sort by
3196
    if constants.SF_NAME in self.op.output_fields:
3197
      fields = self.op.output_fields[:]
3198
    else:
3199
      fields = [constants.SF_NAME] + self.op.output_fields
3200

    
3201
    # Never ask for node or type as it's only known to the LU
3202
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3203
      while extra in fields:
3204
        fields.remove(extra)
3205

    
3206
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3207
    name_idx = field_idx[constants.SF_NAME]
3208

    
3209
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3210
    data = self.rpc.call_storage_list(self.nodes,
3211
                                      self.op.storage_type, st_args,
3212
                                      self.op.name, fields)
3213

    
3214
    result = []
3215

    
3216
    for node in utils.NiceSort(self.nodes):
3217
      nresult = data[node]
3218
      if nresult.offline:
3219
        continue
3220

    
3221
      msg = nresult.fail_msg
3222
      if msg:
3223
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3224
        continue
3225

    
3226
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3227

    
3228
      for name in utils.NiceSort(rows.keys()):
3229
        row = rows[name]
3230

    
3231
        out = []
3232

    
3233
        for field in self.op.output_fields:
3234
          if field == constants.SF_NODE:
3235
            val = node
3236
          elif field == constants.SF_TYPE:
3237
            val = self.op.storage_type
3238
          elif field in field_idx:
3239
            val = row[field_idx[field]]
3240
          else:
3241
            raise errors.ParameterError(field)
3242

    
3243
          out.append(val)
3244

    
3245
        result.append(out)
3246

    
3247
    return result
3248

    
3249

    
3250
class LUModifyNodeStorage(NoHooksLU):
3251
  """Logical unit for modifying a storage volume on a node.
3252

3253
  """
3254
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3255
  REQ_BGL = False
3256

    
3257
  def CheckArguments(self):
3258
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3259

    
3260
    _CheckStorageType(self.op.storage_type)
3261

    
3262
  def ExpandNames(self):
3263
    self.needed_locks = {
3264
      locking.LEVEL_NODE: self.op.node_name,
3265
      }
3266

    
3267
  def CheckPrereq(self):
3268
    """Check prerequisites.
3269

3270
    """
3271
    storage_type = self.op.storage_type
3272

    
3273
    try:
3274
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3275
    except KeyError:
3276
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3277
                                 " modified" % storage_type,
3278
                                 errors.ECODE_INVAL)
3279

    
3280
    diff = set(self.op.changes.keys()) - modifiable
3281
    if diff:
3282
      raise errors.OpPrereqError("The following fields can not be modified for"
3283
                                 " storage units of type '%s': %r" %
3284
                                 (storage_type, list(diff)),
3285
                                 errors.ECODE_INVAL)
3286

    
3287
  def Exec(self, feedback_fn):
3288
    """Computes the list of nodes and their attributes.
3289

3290
    """
3291
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3292
    result = self.rpc.call_storage_modify(self.op.node_name,
3293
                                          self.op.storage_type, st_args,
3294
                                          self.op.name, self.op.changes)
3295
    result.Raise("Failed to modify storage unit '%s' on %s" %
3296
                 (self.op.name, self.op.node_name))
3297

    
3298

    
3299
class LUAddNode(LogicalUnit):
3300
  """Logical unit for adding node to the cluster.
3301

3302
  """
3303
  HPATH = "node-add"
3304
  HTYPE = constants.HTYPE_NODE
3305
  _OP_REQP = ["node_name"]
3306

    
3307
  def CheckArguments(self):
3308
    # validate/normalize the node name
3309
    self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3310

    
3311
  def BuildHooksEnv(self):
3312
    """Build hooks env.
3313

3314
    This will run on all nodes before, and on all nodes + the new node after.
3315

3316
    """
3317
    env = {
3318
      "OP_TARGET": self.op.node_name,
3319
      "NODE_NAME": self.op.node_name,
3320
      "NODE_PIP": self.op.primary_ip,
3321
      "NODE_SIP": self.op.secondary_ip,
3322
      }
3323
    nodes_0 = self.cfg.GetNodeList()
3324
    nodes_1 = nodes_0 + [self.op.node_name, ]
3325
    return env, nodes_0, nodes_1
3326

    
3327
  def CheckPrereq(self):
3328
    """Check prerequisites.
3329

3330
    This checks:
3331
     - the new node is not already in the config
3332
     - it is resolvable
3333
     - its parameters (single/dual homed) matches the cluster
3334

3335
    Any errors are signaled by raising errors.OpPrereqError.
3336

3337
    """
3338
    node_name = self.op.node_name
3339
    cfg = self.cfg
3340

    
3341
    dns_data = utils.GetHostInfo(node_name)
3342

    
3343
    node = dns_data.name
3344
    primary_ip = self.op.primary_ip = dns_data.ip
3345
    secondary_ip = getattr(self.op, "secondary_ip", None)
3346
    if secondary_ip is None:
3347
      secondary_ip = primary_ip
3348
    if not utils.IsValidIP(secondary_ip):
3349
      raise errors.OpPrereqError("Invalid secondary IP given",
3350
                                 errors.ECODE_INVAL)
3351
    self.op.secondary_ip = secondary_ip
3352

    
3353
    node_list = cfg.GetNodeList()
3354
    if not self.op.readd and node in node_list:
3355
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3356
                                 node, errors.ECODE_EXISTS)
3357
    elif self.op.readd and node not in node_list:
3358
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3359
                                 errors.ECODE_NOENT)
3360

    
3361
    self.changed_primary_ip = False
3362

    
3363
    for existing_node_name in node_list:
3364
      existing_node = cfg.GetNodeInfo(existing_node_name)
3365

    
3366
      if self.op.readd and node == existing_node_name:
3367
        if existing_node.secondary_ip != secondary_ip:
3368
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3369
                                     " address configuration as before",
3370
                                     errors.ECODE_INVAL)
3371
        if existing_node.primary_ip != primary_ip:
3372
          self.changed_primary_ip = True
3373

    
3374
        continue
3375

    
3376
      if (existing_node.primary_ip == primary_ip or
3377
          existing_node.secondary_ip == primary_ip or
3378
          existing_node.primary_ip == secondary_ip or
3379
          existing_node.secondary_ip == secondary_ip):
3380
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3381
                                   " existing node %s" % existing_node.name,
3382
                                   errors.ECODE_NOTUNIQUE)
3383

    
3384
    # check that the type of the node (single versus dual homed) is the
3385
    # same as for the master
3386
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3387
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3388
    newbie_singlehomed = secondary_ip == primary_ip
3389
    if master_singlehomed != newbie_singlehomed:
3390
      if master_singlehomed:
3391
        raise errors.OpPrereqError("The master has no private ip but the"
3392
                                   " new node has one",
3393
                                   errors.ECODE_INVAL)
3394
      else:
3395
        raise errors.OpPrereqError("The master has a private ip but the"
3396
                                   " new node doesn't have one",
3397
                                   errors.ECODE_INVAL)
3398

    
3399
    # checks reachability
3400
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3401
      raise errors.OpPrereqError("Node not reachable by ping",
3402
                                 errors.ECODE_ENVIRON)
3403

    
3404
    if not newbie_singlehomed:
3405
      # check reachability from my secondary ip to newbie's secondary ip
3406
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3407
                           source=myself.secondary_ip):
3408
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3409
                                   " based ping to noded port",
3410
                                   errors.ECODE_ENVIRON)
3411

    
3412
    if self.op.readd:
3413
      exceptions = [node]
3414
    else:
3415
      exceptions = []
3416

    
3417
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3418

    
3419
    if self.op.readd:
3420
      self.new_node = self.cfg.GetNodeInfo(node)
3421
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3422
    else:
3423
      self.new_node = objects.Node(name=node,
3424
                                   primary_ip=primary_ip,
3425
                                   secondary_ip=secondary_ip,
3426
                                   master_candidate=self.master_candidate,
3427
                                   offline=False, drained=False)
3428

    
3429
  def Exec(self, feedback_fn):
3430
    """Adds the new node to the cluster.
3431

3432
    """
3433
    new_node = self.new_node
3434
    node = new_node.name
3435

    
3436
    # for re-adds, reset the offline/drained/master-candidate flags;
3437
    # we need to reset here, otherwise offline would prevent RPC calls
3438
    # later in the procedure; this also means that if the re-add
3439
    # fails, we are left with a non-offlined, broken node
3440
    if self.op.readd:
3441
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3442
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3443
      # if we demote the node, we do cleanup later in the procedure
3444
      new_node.master_candidate = self.master_candidate
3445
      if self.changed_primary_ip:
3446
        new_node.primary_ip = self.op.primary_ip
3447

    
3448
    # notify the user about any possible mc promotion
3449
    if new_node.master_candidate:
3450
      self.LogInfo("Node will be a master candidate")
3451

    
3452
    # check connectivity
3453
    result = self.rpc.call_version([node])[node]
3454
    result.Raise("Can't get version information from node %s" % node)
3455
    if constants.PROTOCOL_VERSION == result.payload:
3456
      logging.info("Communication to node %s fine, sw version %s match",
3457
                   node, result.payload)
3458
    else:
3459
      raise errors.OpExecError("Version mismatch master version %s,"
3460
                               " node version %s" %
3461
                               (constants.PROTOCOL_VERSION, result.payload))
3462

    
3463
    # setup ssh on node
3464
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3465
      logging.info("Copy ssh key to node %s", node)
3466
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3467
      keyarray = []
3468
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3469
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3470
                  priv_key, pub_key]
3471

    
3472
      for i in keyfiles:
3473
        keyarray.append(utils.ReadFile(i))
3474

    
3475
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3476
                                      keyarray[2], keyarray[3], keyarray[4],
3477
                                      keyarray[5])
3478
      result.Raise("Cannot transfer ssh keys to the new node")
3479

    
3480
    # Add node to our /etc/hosts, and add key to known_hosts
3481
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3482
      # FIXME: this should be done via an rpc call to node daemon
3483
      utils.AddHostToEtcHosts(new_node.name)
3484

    
3485
    if new_node.secondary_ip != new_node.primary_ip:
3486
      result = self.rpc.call_node_has_ip_address(new_node.name,
3487
                                                 new_node.secondary_ip)
3488
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3489
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3490
      if not result.payload:
3491
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3492
                                 " you gave (%s). Please fix and re-run this"
3493
                                 " command." % new_node.secondary_ip)
3494

    
3495
    node_verify_list = [self.cfg.GetMasterNode()]
3496
    node_verify_param = {
3497
      constants.NV_NODELIST: [node],
3498
      # TODO: do a node-net-test as well?
3499
    }
3500

    
3501
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3502
                                       self.cfg.GetClusterName())
3503
    for verifier in node_verify_list:
3504
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3505
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3506
      if nl_payload:
3507
        for failed in nl_payload:
3508
          feedback_fn("ssh/hostname verification failed"
3509
                      " (checking from %s): %s" %
3510
                      (verifier, nl_payload[failed]))
3511
        raise errors.OpExecError("ssh/hostname verification failed.")
3512

    
3513
    if self.op.readd:
3514
      _RedistributeAncillaryFiles(self)
3515
      self.context.ReaddNode(new_node)
3516
      # make sure we redistribute the config
3517
      self.cfg.Update(new_node, feedback_fn)
3518
      # and make sure the new node will not have old files around
3519
      if not new_node.master_candidate:
3520
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3521
        msg = result.fail_msg
3522
        if msg:
3523
          self.LogWarning("Node failed to demote itself from master"
3524
                          " candidate status: %s" % msg)
3525
    else:
3526
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3527
      self.context.AddNode(new_node, self.proc.GetECId())
3528

    
3529

    
3530
class LUSetNodeParams(LogicalUnit):
3531
  """Modifies the parameters of a node.
3532

3533
  """
3534
  HPATH = "node-modify"
3535
  HTYPE = constants.HTYPE_NODE
3536
  _OP_REQP = ["node_name"]
3537
  REQ_BGL = False
3538

    
3539
  def CheckArguments(self):
3540
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3541
    _CheckBooleanOpField(self.op, 'master_candidate')
3542
    _CheckBooleanOpField(self.op, 'offline')
3543
    _CheckBooleanOpField(self.op, 'drained')
3544
    _CheckBooleanOpField(self.op, 'auto_promote')
3545
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3546
    if all_mods.count(None) == 3:
3547
      raise errors.OpPrereqError("Please pass at least one modification",
3548
                                 errors.ECODE_INVAL)
3549
    if all_mods.count(True) > 1:
3550
      raise errors.OpPrereqError("Can't set the node into more than one"
3551
                                 " state at the same time",
3552
                                 errors.ECODE_INVAL)
3553

    
3554
    # Boolean value that tells us whether we're offlining or draining the node
3555
    self.offline_or_drain = (self.op.offline == True or
3556
                             self.op.drained == True)
3557
    self.deoffline_or_drain = (self.op.offline == False or
3558
                               self.op.drained == False)
3559
    self.might_demote = (self.op.master_candidate == False or
3560
                         self.offline_or_drain)
3561

    
3562
    self.lock_all = self.op.auto_promote and self.might_demote
3563

    
3564

    
3565
  def ExpandNames(self):
3566
    if self.lock_all:
3567
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3568
    else:
3569
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3570

    
3571
  def BuildHooksEnv(self):
3572
    """Build hooks env.
3573

3574
    This runs on the master node.
3575

3576
    """
3577
    env = {
3578
      "OP_TARGET": self.op.node_name,
3579
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3580
      "OFFLINE": str(self.op.offline),
3581
      "DRAINED": str(self.op.drained),
3582
      }
3583
    nl = [self.cfg.GetMasterNode(),
3584
          self.op.node_name]
3585
    return env, nl, nl
3586

    
3587
  def CheckPrereq(self):
3588
    """Check prerequisites.
3589

3590
    This only checks the instance list against the existing names.
3591

3592
    """
3593
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3594

    
3595
    if (self.op.master_candidate is not None or
3596
        self.op.drained is not None or
3597
        self.op.offline is not None):
3598
      # we can't change the master's node flags
3599
      if self.op.node_name == self.cfg.GetMasterNode():
3600
        raise errors.OpPrereqError("The master role can be changed"
3601
                                   " only via masterfailover",
3602
                                   errors.ECODE_INVAL)
3603

    
3604

    
3605
    if node.master_candidate and self.might_demote and not self.lock_all:
3606
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3607
      # check if after removing the current node, we're missing master
3608
      # candidates
3609
      (mc_remaining, mc_should, _) = \
3610
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3611
      if mc_remaining < mc_should:
3612
        raise errors.OpPrereqError("Not enough master candidates, please"
3613
                                   " pass auto_promote to allow promotion",
3614
                                   errors.ECODE_INVAL)
3615

    
3616
    if (self.op.master_candidate == True and
3617
        ((node.offline and not self.op.offline == False) or
3618
         (node.drained and not self.op.drained == False))):
3619
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3620
                                 " to master_candidate" % node.name,
3621
                                 errors.ECODE_INVAL)
3622

    
3623
    # If we're being deofflined/drained, we'll MC ourself if needed
3624
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3625
        self.op.master_candidate == True and not node.master_candidate):
3626
      self.op.master_candidate = _DecideSelfPromotion(self)
3627
      if self.op.master_candidate:
3628
        self.LogInfo("Autopromoting node to master candidate")
3629

    
3630
    return
3631

    
3632
  def Exec(self, feedback_fn):
3633
    """Modifies a node.
3634

3635
    """
3636
    node = self.node
3637

    
3638
    result = []
3639
    changed_mc = False
3640

    
3641
    if self.op.offline is not None:
3642
      node.offline = self.op.offline
3643
      result.append(("offline", str(self.op.offline)))
3644
      if self.op.offline == True:
3645
        if node.master_candidate:
3646
          node.master_candidate = False
3647
          changed_mc = True
3648
          result.append(("master_candidate", "auto-demotion due to offline"))
3649
        if node.drained:
3650
          node.drained = False
3651
          result.append(("drained", "clear drained status due to offline"))
3652

    
3653
    if self.op.master_candidate is not None:
3654
      node.master_candidate = self.op.master_candidate
3655
      changed_mc = True
3656
      result.append(("master_candidate", str(self.op.master_candidate)))
3657
      if self.op.master_candidate == False:
3658
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3659
        msg = rrc.fail_msg
3660
        if msg:
3661
          self.LogWarning("Node failed to demote itself: %s" % msg)
3662

    
3663
    if self.op.drained is not None:
3664
      node.drained = self.op.drained
3665
      result.append(("drained", str(self.op.drained)))
3666
      if self.op.drained == True:
3667
        if node.master_candidate:
3668
          node.master_candidate = False
3669
          changed_mc = True
3670
          result.append(("master_candidate", "auto-demotion due to drain"))
3671
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3672
          msg = rrc.fail_msg
3673
          if msg:
3674
            self.LogWarning("Node failed to demote itself: %s" % msg)
3675
        if node.offline:
3676
          node.offline = False
3677
          result.append(("offline", "clear offline status due to drain"))
3678

    
3679
    # we locked all nodes, we adjust the CP before updating this node
3680
    if self.lock_all:
3681
      _AdjustCandidatePool(self, [node.name])
3682

    
3683
    # this will trigger configuration file update, if needed
3684
    self.cfg.Update(node, feedback_fn)
3685

    
3686
    # this will trigger job queue propagation or cleanup
3687
    if changed_mc:
3688
      self.context.ReaddNode(node)
3689

    
3690
    return result
3691

    
3692

    
3693
class LUPowercycleNode(NoHooksLU):
3694
  """Powercycles a node.
3695

3696
  """
3697
  _OP_REQP = ["node_name", "force"]
3698
  REQ_BGL = False
3699

    
3700
  def CheckArguments(self):
3701
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3702
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3703
      raise errors.OpPrereqError("The node is the master and the force"
3704
                                 " parameter was not set",
3705
                                 errors.ECODE_INVAL)
3706

    
3707
  def ExpandNames(self):
3708
    """Locking for PowercycleNode.
3709

3710
    This is a last-resort option and shouldn't block on other
3711
    jobs. Therefore, we grab no locks.
3712

3713
    """
3714
    self.needed_locks = {}
3715

    
3716
  def CheckPrereq(self):
3717
    """Check prerequisites.
3718

3719
    This LU has no prereqs.
3720

3721
    """
3722
    pass
3723

    
3724
  def Exec(self, feedback_fn):
3725
    """Reboots a node.
3726

3727
    """
3728
    result = self.rpc.call_node_powercycle(self.op.node_name,
3729
                                           self.cfg.GetHypervisorType())
3730
    result.Raise("Failed to schedule the reboot")
3731
    return result.payload
3732

    
3733

    
3734
class LUQueryClusterInfo(NoHooksLU):
3735
  """Query cluster configuration.
3736

3737
  """
3738
  _OP_REQP = []
3739
  REQ_BGL = False
3740

    
3741
  def ExpandNames(self):
3742
    self.needed_locks = {}
3743

    
3744
  def CheckPrereq(self):
3745
    """No prerequsites needed for this LU.
3746

3747
    """
3748
    pass
3749

    
3750
  def Exec(self, feedback_fn):
3751
    """Return cluster config.
3752

3753
    """
3754
    cluster = self.cfg.GetClusterInfo()
3755
    os_hvp = {}
3756

    
3757
    # Filter just for enabled hypervisors
3758
    for os_name, hv_dict in cluster.os_hvp.items():
3759
      os_hvp[os_name] = {}
3760
      for hv_name, hv_params in hv_dict.items():
3761
        if hv_name in cluster.enabled_hypervisors:
3762
          os_hvp[os_name][hv_name] = hv_params
3763

    
3764
    result = {
3765
      "software_version": constants.RELEASE_VERSION,
3766
      "protocol_version": constants.PROTOCOL_VERSION,
3767
      "config_version": constants.CONFIG_VERSION,
3768
      "os_api_version": max(constants.OS_API_VERSIONS),
3769
      "export_version": constants.EXPORT_VERSION,
3770
      "architecture": (platform.architecture()[0], platform.machine()),
3771
      "name": cluster.cluster_name,
3772
      "master": cluster.master_node,
3773
      "default_hypervisor": cluster.enabled_hypervisors[0],
3774
      "enabled_hypervisors": cluster.enabled_hypervisors,
3775
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3776
                        for hypervisor_name in cluster.enabled_hypervisors]),
3777
      "os_hvp": os_hvp,
3778
      "beparams": cluster.beparams,
3779
      "nicparams": cluster.nicparams,
3780
      "candidate_pool_size": cluster.candidate_pool_size,
3781
      "master_netdev": cluster.master_netdev,
3782
      "volume_group_name": cluster.volume_group_name,
3783
      "file_storage_dir": cluster.file_storage_dir,
3784
      "maintain_node_health": cluster.maintain_node_health,
3785
      "ctime": cluster.ctime,
3786
      "mtime": cluster.mtime,
3787
      "uuid": cluster.uuid,
3788
      "tags": list(cluster.GetTags()),
3789
      "uid_pool": cluster.uid_pool,
3790
      }
3791

    
3792
    return result
3793

    
3794

    
3795
class LUQueryConfigValues(NoHooksLU):
3796
  """Return configuration values.
3797

3798
  """
3799
  _OP_REQP = []
3800
  REQ_BGL = False
3801
  _FIELDS_DYNAMIC = utils.FieldSet()
3802
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3803
                                  "watcher_pause")
3804

    
3805
  def ExpandNames(self):
3806
    self.needed_locks = {}
3807

    
3808
    _CheckOutputFields(static=self._FIELDS_STATIC,
3809
                       dynamic=self._FIELDS_DYNAMIC,
3810
                       selected=self.op.output_fields)
3811

    
3812
  def CheckPrereq(self):
3813
    """No prerequisites.
3814

3815
    """
3816
    pass
3817

    
3818
  def Exec(self, feedback_fn):
3819
    """Dump a representation of the cluster config to the standard output.
3820

3821
    """
3822
    values = []
3823
    for field in self.op.output_fields:
3824
      if field == "cluster_name":
3825
        entry = self.cfg.GetClusterName()
3826
      elif field == "master_node":
3827
        entry = self.cfg.GetMasterNode()
3828
      elif field == "drain_flag":
3829
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3830
      elif field == "watcher_pause":
3831
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3832
      else:
3833
        raise errors.ParameterError(field)
3834
      values.append(entry)
3835
    return values
3836

    
3837

    
3838
class LUActivateInstanceDisks(NoHooksLU):
3839
  """Bring up an instance's disks.
3840

3841
  """
3842
  _OP_REQP = ["instance_name"]
3843
  REQ_BGL = False
3844

    
3845
  def ExpandNames(self):
3846
    self._ExpandAndLockInstance()
3847
    self.needed_locks[locking.LEVEL_NODE] = []
3848
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3849

    
3850
  def DeclareLocks(self, level):
3851
    if level == locking.LEVEL_NODE:
3852
      self._LockInstancesNodes()
3853

    
3854
  def CheckPrereq(self):
3855
    """Check prerequisites.
3856

3857
    This checks that the instance is in the cluster.
3858

3859
    """
3860
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3861
    assert self.instance is not None, \
3862
      "Cannot retrieve locked instance %s" % self.op.instance_name
3863
    _CheckNodeOnline(self, self.instance.primary_node)
3864
    if not hasattr(self.op, "ignore_size"):
3865
      self.op.ignore_size = False
3866

    
3867
  def Exec(self, feedback_fn):
3868
    """Activate the disks.
3869

3870
    """
3871
    disks_ok, disks_info = \
3872
              _AssembleInstanceDisks(self, self.instance,
3873
                                     ignore_size=self.op.ignore_size)
3874
    if not disks_ok:
3875
      raise errors.OpExecError("Cannot activate block devices")
3876

    
3877
    return disks_info
3878

    
3879

    
3880
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
3881
                           ignore_size=False):
3882
  """Prepare the block devices for an instance.
3883

3884
  This sets up the block devices on all nodes.
3885

3886
  @type lu: L{LogicalUnit}
3887
  @param lu: the logical unit on whose behalf we execute
3888
  @type instance: L{objects.Instance}
3889
  @param instance: the instance for whose disks we assemble
3890
  @type disks: list of L{objects.Disk} or None
3891
  @param disks: which disks to assemble (or all, if None)
3892
  @type ignore_secondaries: boolean
3893
  @param ignore_secondaries: if true, errors on secondary nodes
3894
      won't result in an error return from the function
3895
  @type ignore_size: boolean
3896
  @param ignore_size: if true, the current known size of the disk
3897
      will not be used during the disk activation, useful for cases
3898
      when the size is wrong
3899
  @return: False if the operation failed, otherwise a list of
3900
      (host, instance_visible_name, node_visible_name)
3901
      with the mapping from node devices to instance devices
3902

3903
  """
3904
  device_info = []
3905
  disks_ok = True
3906
  iname = instance.name
3907
  disks = _ExpandCheckDisks(instance, disks)
3908

    
3909
  # With the two passes mechanism we try to reduce the window of
3910
  # opportunity for the race condition of switching DRBD to primary
3911
  # before handshaking occured, but we do not eliminate it
3912

    
3913
  # The proper fix would be to wait (with some limits) until the
3914
  # connection has been made and drbd transitions from WFConnection
3915
  # into any other network-connected state (Connected, SyncTarget,
3916
  # SyncSource, etc.)
3917

    
3918
  # 1st pass, assemble on all nodes in secondary mode
3919
  for inst_disk in disks:
3920
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3921
      if ignore_size:
3922
        node_disk = node_disk.Copy()
3923
        node_disk.UnsetSize()
3924
      lu.cfg.SetDiskID(node_disk, node)
3925
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3926
      msg = result.fail_msg
3927
      if msg:
3928
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3929
                           " (is_primary=False, pass=1): %s",
3930
                           inst_disk.iv_name, node, msg)
3931
        if not ignore_secondaries:
3932
          disks_ok = False
3933

    
3934
  # FIXME: race condition on drbd migration to primary
3935

    
3936
  # 2nd pass, do only the primary node
3937
  for inst_disk in disks:
3938
    dev_path = None
3939

    
3940
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3941
      if node != instance.primary_node:
3942
        continue
3943
      if ignore_size:
3944
        node_disk = node_disk.Copy()
3945
        node_disk.UnsetSize()
3946
      lu.cfg.SetDiskID(node_disk, node)
3947
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3948
      msg = result.fail_msg
3949
      if msg:
3950
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3951
                           " (is_primary=True, pass=2): %s",
3952
                           inst_disk.iv_name, node, msg)
3953
        disks_ok = False
3954
      else:
3955
        dev_path = result.payload
3956

    
3957
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3958

    
3959
  # leave the disks configured for the primary node
3960
  # this is a workaround that would be fixed better by
3961
  # improving the logical/physical id handling
3962
  for disk in disks:
3963
    lu.cfg.SetDiskID(disk, instance.primary_node)
3964

    
3965
  return disks_ok, device_info
3966

    
3967

    
3968
def _StartInstanceDisks(lu, instance, force):
3969
  """Start the disks of an instance.
3970

3971
  """
3972
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3973
                                           ignore_secondaries=force)
3974
  if not disks_ok:
3975
    _ShutdownInstanceDisks(lu, instance)
3976
    if force is not None and not force:
3977
      lu.proc.LogWarning("", hint="If the message above refers to a"
3978
                         " secondary node,"
3979
                         " you can retry the operation using '--force'.")
3980
    raise errors.OpExecError("Disk consistency error")
3981

    
3982

    
3983
class LUDeactivateInstanceDisks(NoHooksLU):
3984
  """Shutdown an instance's disks.
3985

3986
  """
3987
  _OP_REQP = ["instance_name"]
3988
  REQ_BGL = False
3989

    
3990
  def ExpandNames(self):
3991
    self._ExpandAndLockInstance()
3992
    self.needed_locks[locking.LEVEL_NODE] = []
3993
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3994

    
3995
  def DeclareLocks(self, level):
3996
    if level == locking.LEVEL_NODE:
3997
      self._LockInstancesNodes()
3998

    
3999
  def CheckPrereq(self):
4000
    """Check prerequisites.
4001

4002
    This checks that the instance is in the cluster.
4003

4004
    """
4005
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4006
    assert self.instance is not None, \
4007
      "Cannot retrieve locked instance %s" % self.op.instance_name
4008

    
4009
  def Exec(self, feedback_fn):
4010
    """Deactivate the disks
4011

4012
    """
4013
    instance = self.instance
4014
    _SafeShutdownInstanceDisks(self, instance)
4015

    
4016

    
4017
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4018
  """Shutdown block devices of an instance.
4019

4020
  This function checks if an instance is running, before calling
4021
  _ShutdownInstanceDisks.
4022

4023
  """
4024
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4025
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4026

    
4027

    
4028
def _ExpandCheckDisks(instance, disks):
4029
  """Return the instance disks selected by the disks list
4030

4031
  @type disks: list of L{objects.Disk} or None
4032
  @param disks: selected disks
4033
  @rtype: list of L{objects.Disk}
4034
  @return: selected instance disks to act on
4035

4036
  """
4037
  if disks is None:
4038
    return instance.disks
4039
  else:
4040
    if not set(disks).issubset(instance.disks):
4041
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4042
                                   " target instance")
4043
    return disks
4044

    
4045

    
4046
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4047
  """Shutdown block devices of an instance.
4048

4049
  This does the shutdown on all nodes of the instance.
4050

4051
  If the ignore_primary is false, errors on the primary node are
4052
  ignored.
4053

4054
  """
4055
  all_result = True
4056
  disks = _ExpandCheckDisks(instance, disks)
4057

    
4058
  for disk in disks:
4059
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4060
      lu.cfg.SetDiskID(top_disk, node)
4061
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4062
      msg = result.fail_msg
4063
      if msg:
4064
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4065
                      disk.iv_name, node, msg)
4066
        if not ignore_primary or node != instance.primary_node:
4067
          all_result = False
4068
  return all_result
4069

    
4070

    
4071
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4072
  """Checks if a node has enough free memory.
4073

4074
  This function check if a given node has the needed amount of free
4075
  memory. In case the node has less memory or we cannot get the
4076
  information from the node, this function raise an OpPrereqError
4077
  exception.
4078

4079
  @type lu: C{LogicalUnit}
4080
  @param lu: a logical unit from which we get configuration data
4081
  @type node: C{str}
4082
  @param node: the node to check
4083
  @type reason: C{str}
4084
  @param reason: string to use in the error message
4085
  @type requested: C{int}
4086
  @param requested: the amount of memory in MiB to check for
4087
  @type hypervisor_name: C{str}
4088
  @param hypervisor_name: the hypervisor to ask for memory stats
4089
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4090
      we cannot check the node
4091

4092
  """
4093
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4094
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4095
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4096
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4097
  if not isinstance(free_mem, int):
4098
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4099
                               " was '%s'" % (node, free_mem),
4100
                               errors.ECODE_ENVIRON)
4101
  if requested > free_mem:
4102
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4103
                               " needed %s MiB, available %s MiB" %
4104
                               (node, reason, requested, free_mem),
4105
                               errors.ECODE_NORES)
4106

    
4107

    
4108
def _CheckNodesFreeDisk(lu, nodenames, requested):
4109
  """Checks if nodes have enough free disk space in the default VG.
4110

4111
  This function check if all given nodes have the needed amount of
4112
  free disk. In case any node has less disk or we cannot get the
4113
  information from the node, this function raise an OpPrereqError
4114
  exception.
4115

4116
  @type lu: C{LogicalUnit}
4117
  @param lu: a logical unit from which we get configuration data
4118
  @type nodenames: C{list}
4119
  @param nodenames: the list of node names to check
4120
  @type requested: C{int}
4121
  @param requested: the amount of disk in MiB to check for
4122
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4123
      we cannot check the node
4124

4125
  """
4126
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4127
                                   lu.cfg.GetHypervisorType())
4128
  for node in nodenames:
4129
    info = nodeinfo[node]
4130
    info.Raise("Cannot get current information from node %s" % node,
4131
               prereq=True, ecode=errors.ECODE_ENVIRON)
4132
    vg_free = info.payload.get("vg_free", None)
4133
    if not isinstance(vg_free, int):
4134
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4135
                                 " result was '%s'" % (node, vg_free),
4136
                                 errors.ECODE_ENVIRON)
4137
    if requested > vg_free:
4138
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4139
                                 " required %d MiB, available %d MiB" %
4140
                                 (node, requested, vg_free),
4141
                                 errors.ECODE_NORES)
4142

    
4143

    
4144
class LUStartupInstance(LogicalUnit):
4145
  """Starts an instance.
4146

4147
  """
4148
  HPATH = "instance-start"
4149
  HTYPE = constants.HTYPE_INSTANCE
4150
  _OP_REQP = ["instance_name", "force"]
4151
  REQ_BGL = False
4152

    
4153
  def ExpandNames(self):
4154
    self._ExpandAndLockInstance()
4155

    
4156
  def BuildHooksEnv(self):
4157
    """Build hooks env.
4158

4159
    This runs on master, primary and secondary nodes of the instance.
4160

4161
    """
4162
    env = {
4163
      "FORCE": self.op.force,
4164
      }
4165
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4166
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4167
    return env, nl, nl
4168

    
4169
  def CheckPrereq(self):
4170
    """Check prerequisites.
4171

4172
    This checks that the instance is in the cluster.
4173

4174
    """
4175
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4176
    assert self.instance is not None, \
4177
      "Cannot retrieve locked instance %s" % self.op.instance_name
4178

    
4179
    # extra beparams
4180
    self.beparams = getattr(self.op, "beparams", {})
4181
    if self.beparams:
4182
      if not isinstance(self.beparams, dict):
4183
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4184
                                   " dict" % (type(self.beparams), ),
4185
                                   errors.ECODE_INVAL)
4186
      # fill the beparams dict
4187
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4188
      self.op.beparams = self.beparams
4189

    
4190
    # extra hvparams
4191
    self.hvparams = getattr(self.op, "hvparams", {})
4192
    if self.hvparams:
4193
      if not isinstance(self.hvparams, dict):
4194
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4195
                                   " dict" % (type(self.hvparams), ),
4196
                                   errors.ECODE_INVAL)
4197

    
4198
      # check hypervisor parameter syntax (locally)
4199
      cluster = self.cfg.GetClusterInfo()
4200
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4201
      filled_hvp = cluster.FillHV(instance)
4202
      filled_hvp.update(self.hvparams)
4203
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4204
      hv_type.CheckParameterSyntax(filled_hvp)
4205
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4206
      self.op.hvparams = self.hvparams
4207

    
4208
    _CheckNodeOnline(self, instance.primary_node)
4209

    
4210
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4211
    # check bridges existence
4212
    _CheckInstanceBridgesExist(self, instance)
4213

    
4214
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4215
                                              instance.name,
4216
                                              instance.hypervisor)
4217
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4218
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4219
    if not remote_info.payload: # not running already
4220
      _CheckNodeFreeMemory(self, instance.primary_node,
4221
                           "starting instance %s" % instance.name,
4222
                           bep[constants.BE_MEMORY], instance.hypervisor)
4223

    
4224
  def Exec(self, feedback_fn):
4225
    """Start the instance.
4226

4227
    """
4228
    instance = self.instance
4229
    force = self.op.force
4230

    
4231
    self.cfg.MarkInstanceUp(instance.name)
4232

    
4233
    node_current = instance.primary_node
4234

    
4235
    _StartInstanceDisks(self, instance, force)
4236

    
4237
    result = self.rpc.call_instance_start(node_current, instance,
4238
                                          self.hvparams, self.beparams)
4239
    msg = result.fail_msg
4240
    if msg:
4241
      _ShutdownInstanceDisks(self, instance)
4242
      raise errors.OpExecError("Could not start instance: %s" % msg)
4243

    
4244

    
4245
class LURebootInstance(LogicalUnit):
4246
  """Reboot an instance.
4247

4248
  """
4249
  HPATH = "instance-reboot"
4250
  HTYPE = constants.HTYPE_INSTANCE
4251
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4252
  REQ_BGL = False
4253

    
4254
  def CheckArguments(self):
4255
    """Check the arguments.
4256

4257
    """
4258
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4259
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4260

    
4261
  def ExpandNames(self):
4262
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4263
                                   constants.INSTANCE_REBOOT_HARD,
4264
                                   constants.INSTANCE_REBOOT_FULL]:
4265
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4266
                                  (constants.INSTANCE_REBOOT_SOFT,
4267
                                   constants.INSTANCE_REBOOT_HARD,
4268
                                   constants.INSTANCE_REBOOT_FULL))
4269
    self._ExpandAndLockInstance()
4270

    
4271
  def BuildHooksEnv(self):
4272
    """Build hooks env.
4273

4274
    This runs on master, primary and secondary nodes of the instance.
4275

4276
    """
4277
    env = {
4278
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4279
      "REBOOT_TYPE": self.op.reboot_type,
4280
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4281
      }
4282
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4283
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4284
    return env, nl, nl
4285

    
4286
  def CheckPrereq(self):
4287
    """Check prerequisites.
4288

4289
    This checks that the instance is in the cluster.
4290

4291
    """
4292
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4293
    assert self.instance is not None, \
4294
      "Cannot retrieve locked instance %s" % self.op.instance_name
4295

    
4296
    _CheckNodeOnline(self, instance.primary_node)
4297

    
4298
    # check bridges existence
4299
    _CheckInstanceBridgesExist(self, instance)
4300

    
4301
  def Exec(self, feedback_fn):
4302
    """Reboot the instance.
4303

4304
    """
4305
    instance = self.instance
4306
    ignore_secondaries = self.op.ignore_secondaries
4307
    reboot_type = self.op.reboot_type
4308

    
4309
    node_current = instance.primary_node
4310

    
4311
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4312
                       constants.INSTANCE_REBOOT_HARD]:
4313
      for disk in instance.disks:
4314
        self.cfg.SetDiskID(disk, node_current)
4315
      result = self.rpc.call_instance_reboot(node_current, instance,
4316
                                             reboot_type,
4317
                                             self.shutdown_timeout)
4318
      result.Raise("Could not reboot instance")
4319
    else:
4320
      result = self.rpc.call_instance_shutdown(node_current, instance,
4321
                                               self.shutdown_timeout)
4322
      result.Raise("Could not shutdown instance for full reboot")
4323
      _ShutdownInstanceDisks(self, instance)
4324
      _StartInstanceDisks(self, instance, ignore_secondaries)
4325
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4326
      msg = result.fail_msg
4327
      if msg:
4328
        _ShutdownInstanceDisks(self, instance)
4329
        raise errors.OpExecError("Could not start instance for"
4330
                                 " full reboot: %s" % msg)
4331

    
4332
    self.cfg.MarkInstanceUp(instance.name)
4333

    
4334

    
4335
class LUShutdownInstance(LogicalUnit):
4336
  """Shutdown an instance.
4337

4338
  """
4339
  HPATH = "instance-stop"
4340
  HTYPE = constants.HTYPE_INSTANCE
4341
  _OP_REQP = ["instance_name"]
4342
  REQ_BGL = False
4343

    
4344
  def CheckArguments(self):
4345
    """Check the arguments.
4346

4347
    """
4348
    self.timeout = getattr(self.op, "timeout",
4349
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
4350

    
4351
  def ExpandNames(self):
4352
    self._ExpandAndLockInstance()
4353

    
4354
  def BuildHooksEnv(self):
4355
    """Build hooks env.
4356

4357
    This runs on master, primary and secondary nodes of the instance.
4358

4359
    """
4360
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4361
    env["TIMEOUT"] = self.timeout
4362
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4363
    return env, nl, nl
4364

    
4365
  def CheckPrereq(self):
4366
    """Check prerequisites.
4367

4368
    This checks that the instance is in the cluster.
4369

4370
    """
4371
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4372
    assert self.instance is not None, \
4373
      "Cannot retrieve locked instance %s" % self.op.instance_name
4374
    _CheckNodeOnline(self, self.instance.primary_node)
4375

    
4376
  def Exec(self, feedback_fn):
4377
    """Shutdown the instance.
4378

4379
    """
4380
    instance = self.instance
4381
    node_current = instance.primary_node
4382
    timeout = self.timeout
4383
    self.cfg.MarkInstanceDown(instance.name)
4384
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4385
    msg = result.fail_msg
4386
    if msg:
4387
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4388

    
4389
    _ShutdownInstanceDisks(self, instance)
4390

    
4391

    
4392
class LUReinstallInstance(LogicalUnit):
4393
  """Reinstall an instance.
4394

4395
  """
4396
  HPATH = "instance-reinstall"
4397
  HTYPE = constants.HTYPE_INSTANCE
4398
  _OP_REQP = ["instance_name"]
4399
  REQ_BGL = False
4400

    
4401
  def ExpandNames(self):
4402
    self._ExpandAndLockInstance()
4403

    
4404
  def BuildHooksEnv(self):
4405
    """Build hooks env.
4406

4407
    This runs on master, primary and secondary nodes of the instance.
4408

4409
    """
4410
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4411
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4412
    return env, nl, nl
4413

    
4414
  def CheckPrereq(self):
4415
    """Check prerequisites.
4416

4417
    This checks that the instance is in the cluster and is not running.
4418

4419
    """
4420
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4421
    assert instance is not None, \
4422
      "Cannot retrieve locked instance %s" % self.op.instance_name
4423
    _CheckNodeOnline(self, instance.primary_node)
4424

    
4425
    if instance.disk_template == constants.DT_DISKLESS:
4426
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4427
                                 self.op.instance_name,
4428
                                 errors.ECODE_INVAL)
4429
    _CheckInstanceDown(self, instance, "cannot reinstall")
4430

    
4431
    self.op.os_type = getattr(self.op, "os_type", None)
4432
    self.op.force_variant = getattr(self.op, "force_variant", False)
4433
    if self.op.os_type is not None:
4434
      # OS verification
4435
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4436
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4437

    
4438
    self.instance = instance
4439

    
4440
  def Exec(self, feedback_fn):
4441
    """Reinstall the instance.
4442

4443
    """
4444
    inst = self.instance
4445

    
4446
    if self.op.os_type is not None:
4447
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4448
      inst.os = self.op.os_type
4449
      self.cfg.Update(inst, feedback_fn)
4450

    
4451
    _StartInstanceDisks(self, inst, None)
4452
    try:
4453
      feedback_fn("Running the instance OS create scripts...")
4454
      # FIXME: pass debug option from opcode to backend
4455
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4456
                                             self.op.debug_level)
4457
      result.Raise("Could not install OS for instance %s on node %s" %
4458
                   (inst.name, inst.primary_node))
4459
    finally:
4460
      _ShutdownInstanceDisks(self, inst)
4461

    
4462

    
4463
class LURecreateInstanceDisks(LogicalUnit):
4464
  """Recreate an instance's missing disks.
4465

4466
  """
4467
  HPATH = "instance-recreate-disks"
4468
  HTYPE = constants.HTYPE_INSTANCE
4469
  _OP_REQP = ["instance_name", "disks"]
4470
  REQ_BGL = False
4471

    
4472
  def CheckArguments(self):
4473
    """Check the arguments.
4474

4475
    """
4476
    if not isinstance(self.op.disks, list):
4477
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4478
    for item in self.op.disks:
4479
      if (not isinstance(item, int) or
4480
          item < 0):
4481
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4482
                                   str(item), errors.ECODE_INVAL)
4483

    
4484
  def ExpandNames(self):
4485
    self._ExpandAndLockInstance()
4486

    
4487
  def BuildHooksEnv(self):
4488
    """Build hooks env.
4489

4490
    This runs on master, primary and secondary nodes of the instance.
4491

4492
    """
4493
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4494
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4495
    return env, nl, nl
4496

    
4497
  def CheckPrereq(self):
4498
    """Check prerequisites.
4499

4500
    This checks that the instance is in the cluster and is not running.
4501

4502
    """
4503
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4504
    assert instance is not None, \
4505
      "Cannot retrieve locked instance %s" % self.op.instance_name
4506
    _CheckNodeOnline(self, instance.primary_node)
4507

    
4508
    if instance.disk_template == constants.DT_DISKLESS:
4509
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4510
                                 self.op.instance_name, errors.ECODE_INVAL)
4511
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4512

    
4513
    if not self.op.disks:
4514
      self.op.disks = range(len(instance.disks))
4515
    else:
4516
      for idx in self.op.disks:
4517
        if idx >= len(instance.disks):
4518
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4519
                                     errors.ECODE_INVAL)
4520

    
4521
    self.instance = instance
4522

    
4523
  def Exec(self, feedback_fn):
4524
    """Recreate the disks.
4525

4526
    """
4527
    to_skip = []
4528
    for idx, _ in enumerate(self.instance.disks):
4529
      if idx not in self.op.disks: # disk idx has not been passed in
4530
        to_skip.append(idx)
4531
        continue
4532

    
4533
    _CreateDisks(self, self.instance, to_skip=to_skip)
4534

    
4535

    
4536
class LURenameInstance(LogicalUnit):
4537
  """Rename an instance.
4538

4539
  """
4540
  HPATH = "instance-rename"
4541
  HTYPE = constants.HTYPE_INSTANCE
4542
  _OP_REQP = ["instance_name", "new_name"]
4543

    
4544
  def BuildHooksEnv(self):
4545
    """Build hooks env.
4546

4547
    This runs on master, primary and secondary nodes of the instance.
4548

4549
    """
4550
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4551
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4552
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4553
    return env, nl, nl
4554

    
4555
  def CheckPrereq(self):
4556
    """Check prerequisites.
4557

4558
    This checks that the instance is in the cluster and is not running.
4559

4560
    """
4561
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4562
                                                self.op.instance_name)
4563
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4564
    assert instance is not None
4565
    _CheckNodeOnline(self, instance.primary_node)
4566
    _CheckInstanceDown(self, instance, "cannot rename")
4567
    self.instance = instance
4568

    
4569
    # new name verification
4570
    name_info = utils.GetHostInfo(self.op.new_name)
4571

    
4572
    self.op.new_name = new_name = name_info.name
4573
    instance_list = self.cfg.GetInstanceList()
4574
    if new_name in instance_list:
4575
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4576
                                 new_name, errors.ECODE_EXISTS)
4577

    
4578
    if not getattr(self.op, "ignore_ip", False):
4579
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4580
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4581
                                   (name_info.ip, new_name),
4582
                                   errors.ECODE_NOTUNIQUE)
4583

    
4584

    
4585
  def Exec(self, feedback_fn):
4586
    """Reinstall the instance.
4587

4588
    """
4589
    inst = self.instance
4590
    old_name = inst.name
4591

    
4592
    if inst.disk_template == constants.DT_FILE:
4593
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4594

    
4595
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4596
    # Change the instance lock. This is definitely safe while we hold the BGL
4597
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4598
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4599

    
4600
    # re-read the instance from the configuration after rename
4601
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4602

    
4603
    if inst.disk_template == constants.DT_FILE:
4604
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4605
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4606
                                                     old_file_storage_dir,
4607
                                                     new_file_storage_dir)
4608
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4609
                   " (but the instance has been renamed in Ganeti)" %
4610
                   (inst.primary_node, old_file_storage_dir,
4611
                    new_file_storage_dir))
4612

    
4613
    _StartInstanceDisks(self, inst, None)
4614
    try:
4615
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4616
                                                 old_name, self.op.debug_level)
4617
      msg = result.fail_msg
4618
      if msg:
4619
        msg = ("Could not run OS rename script for instance %s on node %s"
4620
               " (but the instance has been renamed in Ganeti): %s" %
4621
               (inst.name, inst.primary_node, msg))
4622
        self.proc.LogWarning(msg)
4623
    finally:
4624
      _ShutdownInstanceDisks(self, inst)
4625

    
4626

    
4627
class LURemoveInstance(LogicalUnit):
4628
  """Remove an instance.
4629

4630
  """
4631
  HPATH = "instance-remove"
4632
  HTYPE = constants.HTYPE_INSTANCE
4633
  _OP_REQP = ["instance_name", "ignore_failures"]
4634
  REQ_BGL = False
4635

    
4636
  def CheckArguments(self):
4637
    """Check the arguments.
4638

4639
    """
4640
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4641
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4642

    
4643
  def ExpandNames(self):
4644
    self._ExpandAndLockInstance()
4645
    self.needed_locks[locking.LEVEL_NODE] = []
4646
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4647

    
4648
  def DeclareLocks(self, level):
4649
    if level == locking.LEVEL_NODE:
4650
      self._LockInstancesNodes()
4651

    
4652
  def BuildHooksEnv(self):
4653
    """Build hooks env.
4654

4655
    This runs on master, primary and secondary nodes of the instance.
4656

4657
    """
4658
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4659
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4660
    nl = [self.cfg.GetMasterNode()]
4661
    nl_post = list(self.instance.all_nodes) + nl
4662
    return env, nl, nl_post
4663

    
4664
  def CheckPrereq(self):
4665
    """Check prerequisites.
4666

4667
    This checks that the instance is in the cluster.
4668

4669
    """
4670
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4671
    assert self.instance is not None, \
4672
      "Cannot retrieve locked instance %s" % self.op.instance_name
4673

    
4674
  def Exec(self, feedback_fn):
4675
    """Remove the instance.
4676

4677
    """
4678
    instance = self.instance
4679
    logging.info("Shutting down instance %s on node %s",
4680
                 instance.name, instance.primary_node)
4681

    
4682
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4683
                                             self.shutdown_timeout)
4684
    msg = result.fail_msg
4685
    if msg:
4686
      if self.op.ignore_failures:
4687
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4688
      else:
4689
        raise errors.OpExecError("Could not shutdown instance %s on"
4690
                                 " node %s: %s" %
4691
                                 (instance.name, instance.primary_node, msg))
4692

    
4693
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4694

    
4695

    
4696
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4697
  """Utility function to remove an instance.
4698

4699
  """
4700
  logging.info("Removing block devices for instance %s", instance.name)
4701

    
4702
  if not _RemoveDisks(lu, instance):
4703
    if not ignore_failures:
4704
      raise errors.OpExecError("Can't remove instance's disks")
4705
    feedback_fn("Warning: can't remove instance's disks")
4706

    
4707
  logging.info("Removing instance %s out of cluster config", instance.name)
4708

    
4709
  lu.cfg.RemoveInstance(instance.name)
4710

    
4711
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4712
    "Instance lock removal conflict"
4713

    
4714
  # Remove lock for the instance
4715
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4716

    
4717

    
4718
class LUQueryInstances(NoHooksLU):
4719
  """Logical unit for querying instances.
4720

4721
  """
4722
  # pylint: disable-msg=W0142
4723
  _OP_REQP = ["output_fields", "names", "use_locking"]
4724
  REQ_BGL = False
4725
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4726
                    "serial_no", "ctime", "mtime", "uuid"]
4727
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4728
                                    "admin_state",
4729
                                    "disk_template", "ip", "mac", "bridge",
4730
                                    "nic_mode", "nic_link",
4731
                                    "sda_size", "sdb_size", "vcpus", "tags",
4732
                                    "network_port", "beparams",
4733
                                    r"(disk)\.(size)/([0-9]+)",
4734
                                    r"(disk)\.(sizes)", "disk_usage",
4735
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4736
                                    r"(nic)\.(bridge)/([0-9]+)",
4737
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4738
                                    r"(disk|nic)\.(count)",
4739
                                    "hvparams",
4740
                                    ] + _SIMPLE_FIELDS +
4741
                                  ["hv/%s" % name
4742
                                   for name in constants.HVS_PARAMETERS
4743
                                   if name not in constants.HVC_GLOBALS] +
4744
                                  ["be/%s" % name
4745
                                   for name in constants.BES_PARAMETERS])
4746
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4747

    
4748

    
4749
  def ExpandNames(self):
4750
    _CheckOutputFields(static=self._FIELDS_STATIC,
4751
                       dynamic=self._FIELDS_DYNAMIC,
4752
                       selected=self.op.output_fields)
4753

    
4754
    self.needed_locks = {}
4755
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4756
    self.share_locks[locking.LEVEL_NODE] = 1
4757

    
4758
    if self.op.names:
4759
      self.wanted = _GetWantedInstances(self, self.op.names)
4760
    else:
4761
      self.wanted = locking.ALL_SET
4762

    
4763
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4764
    self.do_locking = self.do_node_query and self.op.use_locking
4765
    if self.do_locking:
4766
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4767
      self.needed_locks[locking.LEVEL_NODE] = []
4768
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4769

    
4770
  def DeclareLocks(self, level):
4771
    if level == locking.LEVEL_NODE and self.do_locking:
4772
      self._LockInstancesNodes()
4773

    
4774
  def CheckPrereq(self):
4775
    """Check prerequisites.
4776

4777
    """
4778
    pass
4779

    
4780
  def Exec(self, feedback_fn):
4781
    """Computes the list of nodes and their attributes.
4782

4783
    """
4784
    # pylint: disable-msg=R0912
4785
    # way too many branches here
4786
    all_info = self.cfg.GetAllInstancesInfo()
4787
    if self.wanted == locking.ALL_SET:
4788
      # caller didn't specify instance names, so ordering is not important
4789
      if self.do_locking:
4790
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4791
      else:
4792
        instance_names = all_info.keys()
4793
      instance_names = utils.NiceSort(instance_names)
4794
    else:
4795
      # caller did specify names, so we must keep the ordering
4796
      if self.do_locking:
4797
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4798
      else:
4799
        tgt_set = all_info.keys()
4800
      missing = set(self.wanted).difference(tgt_set)
4801
      if missing:
4802
        raise errors.OpExecError("Some instances were removed before"
4803
                                 " retrieving their data: %s" % missing)
4804
      instance_names = self.wanted
4805

    
4806
    instance_list = [all_info[iname] for iname in instance_names]
4807

    
4808
    # begin data gathering
4809

    
4810
    nodes = frozenset([inst.primary_node for inst in instance_list])
4811
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4812

    
4813
    bad_nodes = []
4814
    off_nodes = []
4815
    if self.do_node_query:
4816
      live_data = {}
4817
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4818
      for name in nodes:
4819
        result = node_data[name]
4820
        if result.offline:
4821
          # offline nodes will be in both lists
4822
          off_nodes.append(name)
4823
        if result.fail_msg:
4824
          bad_nodes.append(name)
4825
        else:
4826
          if result.payload:
4827
            live_data.update(result.payload)
4828
          # else no instance is alive
4829
    else:
4830
      live_data = dict([(name, {}) for name in instance_names])
4831

    
4832
    # end data gathering
4833

    
4834
    HVPREFIX = "hv/"
4835
    BEPREFIX = "be/"
4836
    output = []
4837
    cluster = self.cfg.GetClusterInfo()
4838
    for instance in instance_list:
4839
      iout = []
4840
      i_hv = cluster.FillHV(instance, skip_globals=True)
4841
      i_be = cluster.FillBE(instance)
4842
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
4843
      for field in self.op.output_fields:
4844
        st_match = self._FIELDS_STATIC.Matches(field)
4845
        if field in self._SIMPLE_FIELDS:
4846
          val = getattr(instance, field)
4847
        elif field == "pnode":
4848
          val = instance.primary_node
4849
        elif field == "snodes":
4850
          val = list(instance.secondary_nodes)
4851
        elif field == "admin_state":
4852
          val = instance.admin_up
4853
        elif field == "oper_state":
4854
          if instance.primary_node in bad_nodes:
4855
            val = None
4856
          else:
4857
            val = bool(live_data.get(instance.name))
4858
        elif field == "status":
4859
          if instance.primary_node in off_nodes:
4860
            val = "ERROR_nodeoffline"
4861
          elif instance.primary_node in bad_nodes:
4862
            val = "ERROR_nodedown"
4863
          else:
4864
            running = bool(live_data.get(instance.name))
4865
            if running:
4866
              if instance.admin_up:
4867
                val = "running"
4868
              else:
4869
                val = "ERROR_up"
4870
            else:
4871
              if instance.admin_up:
4872
                val = "ERROR_down"
4873
              else:
4874
                val = "ADMIN_down"
4875
        elif field == "oper_ram":
4876
          if instance.primary_node in bad_nodes:
4877
            val = None
4878
          elif instance.name in live_data:
4879
            val = live_data[instance.name].get("memory", "?")
4880
          else:
4881
            val = "-"
4882
        elif field == "vcpus":
4883
          val = i_be[constants.BE_VCPUS]
4884
        elif field == "disk_template":
4885
          val = instance.disk_template
4886
        elif field == "ip":
4887
          if instance.nics:
4888
            val = instance.nics[0].ip
4889
          else:
4890
            val = None
4891
        elif field == "nic_mode":
4892
          if instance.nics:
4893
            val = i_nicp[0][constants.NIC_MODE]
4894
          else:
4895
            val = None
4896
        elif field == "nic_link":
4897
          if instance.nics:
4898
            val = i_nicp[0][constants.NIC_LINK]
4899
          else:
4900
            val = None
4901
        elif field == "bridge":
4902
          if (instance.nics and
4903
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4904
            val = i_nicp[0][constants.NIC_LINK]
4905
          else:
4906
            val = None
4907
        elif field == "mac":
4908
          if instance.nics:
4909
            val = instance.nics[0].mac
4910
          else:
4911
            val = None
4912
        elif field == "sda_size" or field == "sdb_size":
4913
          idx = ord(field[2]) - ord('a')
4914
          try:
4915
            val = instance.FindDisk(idx).size
4916
          except errors.OpPrereqError:
4917
            val = None
4918
        elif field == "disk_usage": # total disk usage per node
4919
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4920
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4921
        elif field == "tags":
4922
          val = list(instance.GetTags())
4923
        elif field == "hvparams":
4924
          val = i_hv
4925
        elif (field.startswith(HVPREFIX) and
4926
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4927
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4928
          val = i_hv.get(field[len(HVPREFIX):], None)
4929
        elif field == "beparams":
4930
          val = i_be
4931
        elif (field.startswith(BEPREFIX) and
4932
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4933
          val = i_be.get(field[len(BEPREFIX):], None)
4934
        elif st_match and st_match.groups():
4935
          # matches a variable list
4936
          st_groups = st_match.groups()
4937
          if st_groups and st_groups[0] == "disk":
4938
            if st_groups[1] == "count":
4939
              val = len(instance.disks)
4940
            elif st_groups[1] == "sizes":
4941
              val = [disk.size for disk in instance.disks]
4942
            elif st_groups[1] == "size":
4943
              try:
4944
                val = instance.FindDisk(st_groups[2]).size
4945
              except errors.OpPrereqError:
4946
                val = None
4947
            else:
4948
              assert False, "Unhandled disk parameter"
4949
          elif st_groups[0] == "nic":
4950
            if st_groups[1] == "count":
4951
              val = len(instance.nics)
4952
            elif st_groups[1] == "macs":
4953
              val = [nic.mac for nic in instance.nics]
4954
            elif st_groups[1] == "ips":
4955
              val = [nic.ip for nic in instance.nics]
4956
            elif st_groups[1] == "modes":
4957
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4958
            elif st_groups[1] == "links":
4959
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4960
            elif st_groups[1] == "bridges":
4961
              val = []
4962
              for nicp in i_nicp:
4963
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4964
                  val.append(nicp[constants.NIC_LINK])
4965
                else:
4966
                  val.append(None)
4967
            else:
4968
              # index-based item
4969
              nic_idx = int(st_groups[2])
4970
              if nic_idx >= len(instance.nics):
4971
                val = None
4972
              else:
4973
                if st_groups[1] == "mac":
4974
                  val = instance.nics[nic_idx].mac
4975
                elif st_groups[1] == "ip":
4976
                  val = instance.nics[nic_idx].ip
4977
                elif st_groups[1] == "mode":
4978
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4979
                elif st_groups[1] == "link":
4980
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4981
                elif st_groups[1] == "bridge":
4982
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4983
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4984
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4985
                  else:
4986
                    val = None
4987
                else:
4988
                  assert False, "Unhandled NIC parameter"
4989
          else:
4990
            assert False, ("Declared but unhandled variable parameter '%s'" %
4991
                           field)
4992
        else:
4993
          assert False, "Declared but unhandled parameter '%s'" % field
4994
        iout.append(val)
4995
      output.append(iout)
4996

    
4997
    return output
4998

    
4999

    
5000
class LUFailoverInstance(LogicalUnit):
5001
  """Failover an instance.
5002

5003
  """
5004
  HPATH = "instance-failover"
5005
  HTYPE = constants.HTYPE_INSTANCE
5006
  _OP_REQP = ["instance_name", "ignore_consistency"]
5007
  REQ_BGL = False
5008

    
5009
  def CheckArguments(self):
5010
    """Check the arguments.
5011

5012
    """
5013
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5014
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
5015

    
5016
  def ExpandNames(self):
5017
    self._ExpandAndLockInstance()
5018
    self.needed_locks[locking.LEVEL_NODE] = []
5019
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5020

    
5021
  def DeclareLocks(self, level):
5022
    if level == locking.LEVEL_NODE:
5023
      self._LockInstancesNodes()
5024

    
5025
  def BuildHooksEnv(self):
5026
    """Build hooks env.
5027

5028
    This runs on master, primary and secondary nodes of the instance.
5029

5030
    """
5031
    instance = self.instance
5032
    source_node = instance.primary_node
5033
    target_node = instance.secondary_nodes[0]
5034
    env = {
5035
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5036
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5037
      "OLD_PRIMARY": source_node,
5038
      "OLD_SECONDARY": target_node,
5039
      "NEW_PRIMARY": target_node,
5040
      "NEW_SECONDARY": source_node,
5041
      }
5042
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5043
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5044
    nl_post = list(nl)
5045
    nl_post.append(source_node)
5046
    return env, nl, nl_post
5047

    
5048
  def CheckPrereq(self):
5049
    """Check prerequisites.
5050

5051
    This checks that the instance is in the cluster.
5052

5053
    """
5054
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5055
    assert self.instance is not None, \
5056
      "Cannot retrieve locked instance %s" % self.op.instance_name
5057

    
5058
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5059
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5060
      raise errors.OpPrereqError("Instance's disk layout is not"
5061
                                 " network mirrored, cannot failover.",
5062
                                 errors.ECODE_STATE)
5063

    
5064
    secondary_nodes = instance.secondary_nodes
5065
    if not secondary_nodes:
5066
      raise errors.ProgrammerError("no secondary node but using "
5067
                                   "a mirrored disk template")
5068

    
5069
    target_node = secondary_nodes[0]
5070
    _CheckNodeOnline(self, target_node)
5071
    _CheckNodeNotDrained(self, target_node)
5072
    if instance.admin_up:
5073
      # check memory requirements on the secondary node
5074
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5075
                           instance.name, bep[constants.BE_MEMORY],
5076
                           instance.hypervisor)
5077
    else:
5078
      self.LogInfo("Not checking memory on the secondary node as"
5079
                   " instance will not be started")
5080

    
5081
    # check bridge existance
5082
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5083

    
5084
  def Exec(self, feedback_fn):
5085
    """Failover an instance.
5086

5087
    The failover is done by shutting it down on its present node and
5088
    starting it on the secondary.
5089

5090
    """
5091
    instance = self.instance
5092

    
5093
    source_node = instance.primary_node
5094
    target_node = instance.secondary_nodes[0]
5095

    
5096
    if instance.admin_up:
5097
      feedback_fn("* checking disk consistency between source and target")
5098
      for dev in instance.disks:
5099
        # for drbd, these are drbd over lvm
5100
        if not _CheckDiskConsistency(self, dev, target_node, False):
5101
          if not self.op.ignore_consistency:
5102
            raise errors.OpExecError("Disk %s is degraded on target node,"
5103
                                     " aborting failover." % dev.iv_name)
5104
    else:
5105
      feedback_fn("* not checking disk consistency as instance is not running")
5106

    
5107
    feedback_fn("* shutting down instance on source node")
5108
    logging.info("Shutting down instance %s on node %s",
5109
                 instance.name, source_node)
5110

    
5111
    result = self.rpc.call_instance_shutdown(source_node, instance,
5112
                                             self.shutdown_timeout)
5113
    msg = result.fail_msg
5114
    if msg:
5115
      if self.op.ignore_consistency:
5116
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5117
                             " Proceeding anyway. Please make sure node"
5118
                             " %s is down. Error details: %s",
5119
                             instance.name, source_node, source_node, msg)
5120
      else:
5121
        raise errors.OpExecError("Could not shutdown instance %s on"
5122
                                 " node %s: %s" %
5123
                                 (instance.name, source_node, msg))
5124

    
5125
    feedback_fn("* deactivating the instance's disks on source node")
5126
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5127
      raise errors.OpExecError("Can't shut down the instance's disks.")
5128

    
5129
    instance.primary_node = target_node
5130
    # distribute new instance config to the other nodes
5131
    self.cfg.Update(instance, feedback_fn)
5132

    
5133
    # Only start the instance if it's marked as up
5134
    if instance.admin_up:
5135
      feedback_fn("* activating the instance's disks on target node")
5136
      logging.info("Starting instance %s on node %s",
5137
                   instance.name, target_node)
5138

    
5139
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5140
                                               ignore_secondaries=True)
5141
      if not disks_ok:
5142
        _ShutdownInstanceDisks(self, instance)
5143
        raise errors.OpExecError("Can't activate the instance's disks")
5144

    
5145
      feedback_fn("* starting the instance on the target node")
5146
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5147
      msg = result.fail_msg
5148
      if msg:
5149
        _ShutdownInstanceDisks(self, instance)
5150
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5151
                                 (instance.name, target_node, msg))
5152

    
5153

    
5154
class LUMigrateInstance(LogicalUnit):
5155
  """Migrate an instance.
5156

5157
  This is migration without shutting down, compared to the failover,
5158
  which is done with shutdown.
5159

5160
  """
5161
  HPATH = "instance-migrate"
5162
  HTYPE = constants.HTYPE_INSTANCE
5163
  _OP_REQP = ["instance_name", "live", "cleanup"]
5164

    
5165
  REQ_BGL = False
5166

    
5167
  def ExpandNames(self):
5168
    self._ExpandAndLockInstance()
5169

    
5170
    self.needed_locks[locking.LEVEL_NODE] = []
5171
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5172

    
5173
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5174
                                       self.op.live, self.op.cleanup)
5175
    self.tasklets = [self._migrater]
5176

    
5177
  def DeclareLocks(self, level):
5178
    if level == locking.LEVEL_NODE:
5179
      self._LockInstancesNodes()
5180

    
5181
  def BuildHooksEnv(self):
5182
    """Build hooks env.
5183

5184
    This runs on master, primary and secondary nodes of the instance.
5185

5186
    """
5187
    instance = self._migrater.instance
5188
    source_node = instance.primary_node
5189
    target_node = instance.secondary_nodes[0]
5190
    env = _BuildInstanceHookEnvByObject(self, instance)
5191
    env["MIGRATE_LIVE"] = self.op.live
5192
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5193
    env.update({
5194
        "OLD_PRIMARY": source_node,
5195
        "OLD_SECONDARY": target_node,
5196
        "NEW_PRIMARY": target_node,
5197
        "NEW_SECONDARY": source_node,
5198
        })
5199
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5200
    nl_post = list(nl)
5201
    nl_post.append(source_node)
5202
    return env, nl, nl_post
5203

    
5204

    
5205
class LUMoveInstance(LogicalUnit):
5206
  """Move an instance by data-copying.
5207

5208
  """
5209
  HPATH = "instance-move"
5210
  HTYPE = constants.HTYPE_INSTANCE
5211
  _OP_REQP = ["instance_name", "target_node"]
5212
  REQ_BGL = False
5213

    
5214
  def CheckArguments(self):
5215
    """Check the arguments.
5216

5217
    """
5218
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5219
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
5220

    
5221
  def ExpandNames(self):
5222
    self._ExpandAndLockInstance()
5223
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5224
    self.op.target_node = target_node
5225
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5226
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5227

    
5228
  def DeclareLocks(self, level):
5229
    if level == locking.LEVEL_NODE:
5230
      self._LockInstancesNodes(primary_only=True)
5231

    
5232
  def BuildHooksEnv(self):
5233
    """Build hooks env.
5234

5235
    This runs on master, primary and secondary nodes of the instance.
5236

5237
    """
5238
    env = {
5239
      "TARGET_NODE": self.op.target_node,
5240
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5241
      }
5242
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5243
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5244
                                       self.op.target_node]
5245
    return env, nl, nl
5246

    
5247
  def CheckPrereq(self):
5248
    """Check prerequisites.
5249

5250
    This checks that the instance is in the cluster.
5251

5252
    """
5253
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5254
    assert self.instance is not None, \
5255
      "Cannot retrieve locked instance %s" % self.op.instance_name
5256

    
5257
    node = self.cfg.GetNodeInfo(self.op.target_node)
5258
    assert node is not None, \
5259
      "Cannot retrieve locked node %s" % self.op.target_node
5260

    
5261
    self.target_node = target_node = node.name
5262

    
5263
    if target_node == instance.primary_node:
5264
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5265
                                 (instance.name, target_node),
5266
                                 errors.ECODE_STATE)
5267

    
5268
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5269

    
5270
    for idx, dsk in enumerate(instance.disks):
5271
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5272
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5273
                                   " cannot copy" % idx, errors.ECODE_STATE)
5274

    
5275
    _CheckNodeOnline(self, target_node)
5276
    _CheckNodeNotDrained(self, target_node)
5277

    
5278
    if instance.admin_up:
5279
      # check memory requirements on the secondary node
5280
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5281
                           instance.name, bep[constants.BE_MEMORY],
5282
                           instance.hypervisor)
5283
    else:
5284
      self.LogInfo("Not checking memory on the secondary node as"
5285
                   " instance will not be started")
5286

    
5287
    # check bridge existance
5288
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5289

    
5290
  def Exec(self, feedback_fn):
5291
    """Move an instance.
5292

5293
    The move is done by shutting it down on its present node, copying
5294
    the data over (slow) and starting it on the new node.
5295

5296
    """
5297
    instance = self.instance
5298

    
5299
    source_node = instance.primary_node
5300
    target_node = self.target_node
5301

    
5302
    self.LogInfo("Shutting down instance %s on source node %s",
5303
                 instance.name, source_node)
5304

    
5305
    result = self.rpc.call_instance_shutdown(source_node, instance,
5306
                                             self.shutdown_timeout)
5307
    msg = result.fail_msg
5308
    if msg:
5309
      if self.op.ignore_consistency:
5310
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5311
                             " Proceeding anyway. Please make sure node"
5312
                             " %s is down. Error details: %s",
5313
                             instance.name, source_node, source_node, msg)
5314
      else:
5315
        raise errors.OpExecError("Could not shutdown instance %s on"
5316
                                 " node %s: %s" %
5317
                                 (instance.name, source_node, msg))
5318

    
5319
    # create the target disks
5320
    try:
5321
      _CreateDisks(self, instance, target_node=target_node)
5322
    except errors.OpExecError:
5323
      self.LogWarning("Device creation failed, reverting...")
5324
      try:
5325
        _RemoveDisks(self, instance, target_node=target_node)
5326
      finally:
5327
        self.cfg.ReleaseDRBDMinors(instance.name)
5328
        raise
5329

    
5330
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5331

    
5332
    errs = []
5333
    # activate, get path, copy the data over
5334
    for idx, disk in enumerate(instance.disks):
5335
      self.LogInfo("Copying data for disk %d", idx)
5336
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5337
                                               instance.name, True)
5338
      if result.fail_msg:
5339
        self.LogWarning("Can't assemble newly created disk %d: %s",
5340
                        idx, result.fail_msg)
5341
        errs.append(result.fail_msg)
5342
        break
5343
      dev_path = result.payload
5344
      result = self.rpc.call_blockdev_export(source_node, disk,
5345
                                             target_node, dev_path,
5346
                                             cluster_name)
5347
      if result.fail_msg:
5348
        self.LogWarning("Can't copy data over for disk %d: %s",
5349
                        idx, result.fail_msg)
5350
        errs.append(result.fail_msg)
5351
        break
5352

    
5353
    if errs:
5354
      self.LogWarning("Some disks failed to copy, aborting")
5355
      try:
5356
        _RemoveDisks(self, instance, target_node=target_node)
5357
      finally:
5358
        self.cfg.ReleaseDRBDMinors(instance.name)
5359
        raise errors.OpExecError("Errors during disk copy: %s" %
5360
                                 (",".join(errs),))
5361

    
5362
    instance.primary_node = target_node
5363
    self.cfg.Update(instance, feedback_fn)
5364

    
5365
    self.LogInfo("Removing the disks on the original node")
5366
    _RemoveDisks(self, instance, target_node=source_node)
5367

    
5368
    # Only start the instance if it's marked as up
5369
    if instance.admin_up:
5370
      self.LogInfo("Starting instance %s on node %s",
5371
                   instance.name, target_node)
5372

    
5373
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5374
                                           ignore_secondaries=True)
5375
      if not disks_ok:
5376
        _ShutdownInstanceDisks(self, instance)
5377
        raise errors.OpExecError("Can't activate the instance's disks")
5378

    
5379
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5380
      msg = result.fail_msg
5381
      if msg:
5382
        _ShutdownInstanceDisks(self, instance)
5383
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5384
                                 (instance.name, target_node, msg))
5385

    
5386

    
5387
class LUMigrateNode(LogicalUnit):
5388
  """Migrate all instances from a node.
5389

5390
  """
5391
  HPATH = "node-migrate"
5392
  HTYPE = constants.HTYPE_NODE
5393
  _OP_REQP = ["node_name", "live"]
5394
  REQ_BGL = False
5395

    
5396
  def ExpandNames(self):
5397
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5398

    
5399
    self.needed_locks = {
5400
      locking.LEVEL_NODE: [self.op.node_name],
5401
      }
5402

    
5403
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5404

    
5405
    # Create tasklets for migrating instances for all instances on this node
5406
    names = []
5407
    tasklets = []
5408

    
5409
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5410
      logging.debug("Migrating instance %s", inst.name)
5411
      names.append(inst.name)
5412

    
5413
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5414

    
5415
    self.tasklets = tasklets
5416

    
5417
    # Declare instance locks
5418
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5419

    
5420
  def DeclareLocks(self, level):
5421
    if level == locking.LEVEL_NODE:
5422
      self._LockInstancesNodes()
5423

    
5424
  def BuildHooksEnv(self):
5425
    """Build hooks env.
5426

5427
    This runs on the master, the primary and all the secondaries.
5428

5429
    """
5430
    env = {
5431
      "NODE_NAME": self.op.node_name,
5432
      }
5433

    
5434
    nl = [self.cfg.GetMasterNode()]
5435

    
5436
    return (env, nl, nl)
5437

    
5438

    
5439
class TLMigrateInstance(Tasklet):
5440
  def __init__(self, lu, instance_name, live, cleanup):
5441
    """Initializes this class.
5442

5443
    """
5444
    Tasklet.__init__(self, lu)
5445

    
5446
    # Parameters
5447
    self.instance_name = instance_name
5448
    self.live = live
5449
    self.cleanup = cleanup
5450

    
5451
  def CheckPrereq(self):
5452
    """Check prerequisites.
5453

5454
    This checks that the instance is in the cluster.
5455

5456
    """
5457
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5458
    instance = self.cfg.GetInstanceInfo(instance_name)
5459
    assert instance is not None
5460

    
5461
    if instance.disk_template != constants.DT_DRBD8:
5462
      raise errors.OpPrereqError("Instance's disk layout is not"
5463
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5464

    
5465
    secondary_nodes = instance.secondary_nodes
5466
    if not secondary_nodes:
5467
      raise errors.ConfigurationError("No secondary node but using"
5468
                                      " drbd8 disk template")
5469

    
5470
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5471

    
5472
    target_node = secondary_nodes[0]
5473
    # check memory requirements on the secondary node
5474
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5475
                         instance.name, i_be[constants.BE_MEMORY],
5476
                         instance.hypervisor)
5477

    
5478
    # check bridge existance
5479
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5480

    
5481
    if not self.cleanup:
5482
      _CheckNodeNotDrained(self.lu, target_node)
5483
      result = self.rpc.call_instance_migratable(instance.primary_node,
5484
                                                 instance)
5485
      result.Raise("Can't migrate, please use failover",
5486
                   prereq=True, ecode=errors.ECODE_STATE)
5487

    
5488
    self.instance = instance
5489

    
5490
  def _WaitUntilSync(self):
5491
    """Poll with custom rpc for disk sync.
5492

5493
    This uses our own step-based rpc call.
5494

5495
    """
5496
    self.feedback_fn("* wait until resync is done")
5497
    all_done = False
5498
    while not all_done:
5499
      all_done = True
5500
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5501
                                            self.nodes_ip,
5502
                                            self.instance.disks)
5503
      min_percent = 100
5504
      for node, nres in result.items():
5505
        nres.Raise("Cannot resync disks on node %s" % node)
5506
        node_done, node_percent = nres.payload
5507
        all_done = all_done and node_done
5508
        if node_percent is not None:
5509
          min_percent = min(min_percent, node_percent)
5510
      if not all_done:
5511
        if min_percent < 100:
5512
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5513
        time.sleep(2)
5514

    
5515
  def _EnsureSecondary(self, node):
5516
    """Demote a node to secondary.
5517

5518
    """
5519
    self.feedback_fn("* switching node %s to secondary mode" % node)
5520

    
5521
    for dev in self.instance.disks:
5522
      self.cfg.SetDiskID(dev, node)
5523

    
5524
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5525
                                          self.instance.disks)
5526
    result.Raise("Cannot change disk to secondary on node %s" % node)
5527

    
5528
  def _GoStandalone(self):
5529
    """Disconnect from the network.
5530

5531
    """
5532
    self.feedback_fn("* changing into standalone mode")
5533
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5534
                                               self.instance.disks)
5535
    for node, nres in result.items():
5536
      nres.Raise("Cannot disconnect disks node %s" % node)
5537

    
5538
  def _GoReconnect(self, multimaster):
5539
    """Reconnect to the network.
5540

5541
    """
5542
    if multimaster:
5543
      msg = "dual-master"
5544
    else:
5545
      msg = "single-master"
5546
    self.feedback_fn("* changing disks into %s mode" % msg)
5547
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5548
                                           self.instance.disks,
5549
                                           self.instance.name, multimaster)
5550
    for node, nres in result.items():
5551
      nres.Raise("Cannot change disks config on node %s" % node)
5552

    
5553
  def _ExecCleanup(self):
5554
    """Try to cleanup after a failed migration.
5555

5556
    The cleanup is done by:
5557
      - check that the instance is running only on one node
5558
        (and update the config if needed)
5559
      - change disks on its secondary node to secondary
5560
      - wait until disks are fully synchronized
5561
      - disconnect from the network
5562
      - change disks into single-master mode
5563
      - wait again until disks are fully synchronized
5564

5565
    """
5566
    instance = self.instance
5567
    target_node = self.target_node
5568
    source_node = self.source_node
5569

    
5570
    # check running on only one node
5571
    self.feedback_fn("* checking where the instance actually runs"
5572
                     " (if this hangs, the hypervisor might be in"
5573
                     " a bad state)")
5574
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5575
    for node, result in ins_l.items():
5576
      result.Raise("Can't contact node %s" % node)
5577

    
5578
    runningon_source = instance.name in ins_l[source_node].payload
5579
    runningon_target = instance.name in ins_l[target_node].payload
5580

    
5581
    if runningon_source and runningon_target:
5582
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5583
                               " or the hypervisor is confused. You will have"
5584
                               " to ensure manually that it runs only on one"
5585
                               " and restart this operation.")
5586

    
5587
    if not (runningon_source or runningon_target):
5588
      raise errors.OpExecError("Instance does not seem to be running at all."
5589
                               " In this case, it's safer to repair by"
5590
                               " running 'gnt-instance stop' to ensure disk"
5591
                               " shutdown, and then restarting it.")
5592

    
5593
    if runningon_target:
5594
      # the migration has actually succeeded, we need to update the config
5595
      self.feedback_fn("* instance running on secondary node (%s),"
5596
                       " updating config" % target_node)
5597
      instance.primary_node = target_node
5598
      self.cfg.Update(instance, self.feedback_fn)
5599
      demoted_node = source_node
5600
    else:
5601
      self.feedback_fn("* instance confirmed to be running on its"
5602
                       " primary node (%s)" % source_node)
5603
      demoted_node = target_node
5604

    
5605
    self._EnsureSecondary(demoted_node)
5606
    try:
5607
      self._WaitUntilSync()
5608
    except errors.OpExecError:
5609
      # we ignore here errors, since if the device is standalone, it
5610
      # won't be able to sync
5611
      pass
5612
    self._GoStandalone()
5613
    self._GoReconnect(False)
5614
    self._WaitUntilSync()
5615

    
5616
    self.feedback_fn("* done")
5617

    
5618
  def _RevertDiskStatus(self):
5619
    """Try to revert the disk status after a failed migration.
5620

5621
    """
5622
    target_node = self.target_node
5623
    try:
5624
      self._EnsureSecondary(target_node)
5625
      self._GoStandalone()
5626
      self._GoReconnect(False)
5627
      self._WaitUntilSync()
5628
    except errors.OpExecError, err:
5629
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5630
                         " drives: error '%s'\n"
5631
                         "Please look and recover the instance status" %
5632
                         str(err))
5633

    
5634
  def _AbortMigration(self):
5635
    """Call the hypervisor code to abort a started migration.
5636

5637
    """
5638
    instance = self.instance
5639
    target_node = self.target_node
5640
    migration_info = self.migration_info
5641

    
5642
    abort_result = self.rpc.call_finalize_migration(target_node,
5643
                                                    instance,
5644
                                                    migration_info,
5645
                                                    False)
5646
    abort_msg = abort_result.fail_msg
5647
    if abort_msg:
5648
      logging.error("Aborting migration failed on target node %s: %s",
5649
                    target_node, abort_msg)
5650
      # Don't raise an exception here, as we stil have to try to revert the
5651
      # disk status, even if this step failed.
5652

    
5653
  def _ExecMigration(self):
5654
    """Migrate an instance.
5655

5656
    The migrate is done by:
5657
      - change the disks into dual-master mode
5658
      - wait until disks are fully synchronized again
5659
      - migrate the instance
5660
      - change disks on the new secondary node (the old primary) to secondary
5661
      - wait until disks are fully synchronized
5662
      - change disks into single-master mode
5663

5664
    """
5665
    instance = self.instance
5666
    target_node = self.target_node
5667
    source_node = self.source_node
5668

    
5669
    self.feedback_fn("* checking disk consistency between source and target")
5670
    for dev in instance.disks:
5671
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5672
        raise errors.OpExecError("Disk %s is degraded or not fully"
5673
                                 " synchronized on target node,"
5674
                                 " aborting migrate." % dev.iv_name)
5675

    
5676
    # First get the migration information from the remote node
5677
    result = self.rpc.call_migration_info(source_node, instance)
5678
    msg = result.fail_msg
5679
    if msg:
5680
      log_err = ("Failed fetching source migration information from %s: %s" %
5681
                 (source_node, msg))
5682
      logging.error(log_err)
5683
      raise errors.OpExecError(log_err)
5684

    
5685
    self.migration_info = migration_info = result.payload
5686

    
5687
    # Then switch the disks to master/master mode
5688
    self._EnsureSecondary(target_node)
5689
    self._GoStandalone()
5690
    self._GoReconnect(True)
5691
    self._WaitUntilSync()
5692

    
5693
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5694
    result = self.rpc.call_accept_instance(target_node,
5695
                                           instance,
5696
                                           migration_info,
5697
                                           self.nodes_ip[target_node])
5698

    
5699
    msg = result.fail_msg
5700
    if msg:
5701
      logging.error("Instance pre-migration failed, trying to revert"
5702
                    " disk status: %s", msg)
5703
      self.feedback_fn("Pre-migration failed, aborting")
5704
      self._AbortMigration()
5705
      self._RevertDiskStatus()
5706
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5707
                               (instance.name, msg))
5708

    
5709
    self.feedback_fn("* migrating instance to %s" % target_node)
5710
    time.sleep(10)
5711
    result = self.rpc.call_instance_migrate(source_node, instance,
5712
                                            self.nodes_ip[target_node],
5713
                                            self.live)
5714
    msg = result.fail_msg
5715
    if msg:
5716
      logging.error("Instance migration failed, trying to revert"
5717
                    " disk status: %s", msg)
5718
      self.feedback_fn("Migration failed, aborting")
5719
      self._AbortMigration()
5720
      self._RevertDiskStatus()
5721
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5722
                               (instance.name, msg))
5723
    time.sleep(10)
5724

    
5725
    instance.primary_node = target_node
5726
    # distribute new instance config to the other nodes
5727
    self.cfg.Update(instance, self.feedback_fn)
5728

    
5729
    result = self.rpc.call_finalize_migration(target_node,
5730
                                              instance,
5731
                                              migration_info,
5732
                                              True)
5733
    msg = result.fail_msg
5734
    if msg:
5735
      logging.error("Instance migration succeeded, but finalization failed:"
5736
                    " %s", msg)
5737
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5738
                               msg)
5739

    
5740
    self._EnsureSecondary(source_node)
5741
    self._WaitUntilSync()
5742
    self._GoStandalone()
5743
    self._GoReconnect(False)
5744
    self._WaitUntilSync()
5745

    
5746
    self.feedback_fn("* done")
5747

    
5748
  def Exec(self, feedback_fn):
5749
    """Perform the migration.
5750

5751
    """
5752
    feedback_fn("Migrating instance %s" % self.instance.name)
5753

    
5754
    self.feedback_fn = feedback_fn
5755

    
5756
    self.source_node = self.instance.primary_node
5757
    self.target_node = self.instance.secondary_nodes[0]
5758
    self.all_nodes = [self.source_node, self.target_node]
5759
    self.nodes_ip = {
5760
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5761
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5762
      }
5763

    
5764
    if self.cleanup:
5765
      return self._ExecCleanup()
5766
    else:
5767
      return self._ExecMigration()
5768

    
5769

    
5770
def _CreateBlockDev(lu, node, instance, device, force_create,
5771
                    info, force_open):
5772
  """Create a tree of block devices on a given node.
5773

5774
  If this device type has to be created on secondaries, create it and
5775
  all its children.
5776

5777
  If not, just recurse to children keeping the same 'force' value.
5778

5779
  @param lu: the lu on whose behalf we execute
5780
  @param node: the node on which to create the device
5781
  @type instance: L{objects.Instance}
5782
  @param instance: the instance which owns the device
5783
  @type device: L{objects.Disk}
5784
  @param device: the device to create
5785
  @type force_create: boolean
5786
  @param force_create: whether to force creation of this device; this
5787
      will be change to True whenever we find a device which has
5788
      CreateOnSecondary() attribute
5789
  @param info: the extra 'metadata' we should attach to the device
5790
      (this will be represented as a LVM tag)
5791
  @type force_open: boolean
5792
  @param force_open: this parameter will be passes to the
5793
      L{backend.BlockdevCreate} function where it specifies
5794
      whether we run on primary or not, and it affects both
5795
      the child assembly and the device own Open() execution
5796

5797
  """
5798
  if device.CreateOnSecondary():
5799
    force_create = True
5800

    
5801
  if device.children:
5802
    for child in device.children:
5803
      _CreateBlockDev(lu, node, instance, child, force_create,
5804
                      info, force_open)
5805

    
5806
  if not force_create:
5807
    return
5808

    
5809
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5810

    
5811

    
5812
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5813
  """Create a single block device on a given node.
5814

5815
  This will not recurse over children of the device, so they must be
5816
  created in advance.
5817

5818
  @param lu: the lu on whose behalf we execute
5819
  @param node: the node on which to create the device
5820
  @type instance: L{objects.Instance}
5821
  @param instance: the instance which owns the device
5822
  @type device: L{objects.Disk}
5823
  @param device: the device to create
5824
  @param info: the extra 'metadata' we should attach to the device
5825
      (this will be represented as a LVM tag)
5826
  @type force_open: boolean
5827
  @param force_open: this parameter will be passes to the
5828
      L{backend.BlockdevCreate} function where it specifies
5829
      whether we run on primary or not, and it affects both
5830
      the child assembly and the device own Open() execution
5831

5832
  """
5833
  lu.cfg.SetDiskID(device, node)
5834
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5835
                                       instance.name, force_open, info)
5836
  result.Raise("Can't create block device %s on"
5837
               " node %s for instance %s" % (device, node, instance.name))
5838
  if device.physical_id is None:
5839
    device.physical_id = result.payload
5840

    
5841

    
5842
def _GenerateUniqueNames(lu, exts):
5843
  """Generate a suitable LV name.
5844

5845
  This will generate a logical volume name for the given instance.
5846

5847
  """
5848
  results = []
5849
  for val in exts:
5850
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5851
    results.append("%s%s" % (new_id, val))
5852
  return results
5853

    
5854

    
5855
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5856
                         p_minor, s_minor):
5857
  """Generate a drbd8 device complete with its children.
5858

5859
  """
5860
  port = lu.cfg.AllocatePort()
5861
  vgname = lu.cfg.GetVGName()
5862
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5863
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5864
                          logical_id=(vgname, names[0]))
5865
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5866
                          logical_id=(vgname, names[1]))
5867
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5868
                          logical_id=(primary, secondary, port,
5869
                                      p_minor, s_minor,
5870
                                      shared_secret),
5871
                          children=[dev_data, dev_meta],
5872
                          iv_name=iv_name)
5873
  return drbd_dev
5874

    
5875

    
5876
def _GenerateDiskTemplate(lu, template_name,
5877
                          instance_name, primary_node,
5878
                          secondary_nodes, disk_info,
5879
                          file_storage_dir, file_driver,
5880
                          base_index):
5881
  """Generate the entire disk layout for a given template type.
5882

5883
  """
5884
  #TODO: compute space requirements
5885

    
5886
  vgname = lu.cfg.GetVGName()
5887
  disk_count = len(disk_info)
5888
  disks = []
5889
  if template_name == constants.DT_DISKLESS:
5890
    pass
5891
  elif template_name == constants.DT_PLAIN:
5892
    if len(secondary_nodes) != 0:
5893
      raise errors.ProgrammerError("Wrong template configuration")
5894

    
5895
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5896
                                      for i in range(disk_count)])
5897
    for idx, disk in enumerate(disk_info):
5898
      disk_index = idx + base_index
5899
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5900
                              logical_id=(vgname, names[idx]),
5901
                              iv_name="disk/%d" % disk_index,
5902
                              mode=disk["mode"])
5903
      disks.append(disk_dev)
5904
  elif template_name == constants.DT_DRBD8:
5905
    if len(secondary_nodes) != 1:
5906
      raise errors.ProgrammerError("Wrong template configuration")
5907
    remote_node = secondary_nodes[0]
5908
    minors = lu.cfg.AllocateDRBDMinor(
5909
      [primary_node, remote_node] * len(disk_info), instance_name)
5910

    
5911
    names = []
5912
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5913
                                               for i in range(disk_count)]):
5914
      names.append(lv_prefix + "_data")
5915
      names.append(lv_prefix + "_meta")
5916
    for idx, disk in enumerate(disk_info):
5917
      disk_index = idx + base_index
5918
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5919
                                      disk["size"], names[idx*2:idx*2+2],
5920
                                      "disk/%d" % disk_index,
5921
                                      minors[idx*2], minors[idx*2+1])
5922
      disk_dev.mode = disk["mode"]
5923
      disks.append(disk_dev)
5924
  elif template_name == constants.DT_FILE:
5925
    if len(secondary_nodes) != 0:
5926
      raise errors.ProgrammerError("Wrong template configuration")
5927

    
5928
    _RequireFileStorage()
5929

    
5930
    for idx, disk in enumerate(disk_info):
5931
      disk_index = idx + base_index
5932
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5933
                              iv_name="disk/%d" % disk_index,
5934
                              logical_id=(file_driver,
5935
                                          "%s/disk%d" % (file_storage_dir,
5936
                                                         disk_index)),
5937
                              mode=disk["mode"])
5938
      disks.append(disk_dev)
5939
  else:
5940
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5941
  return disks
5942

    
5943

    
5944
def _GetInstanceInfoText(instance):
5945
  """Compute that text that should be added to the disk's metadata.
5946

5947
  """
5948
  return "originstname+%s" % instance.name
5949

    
5950

    
5951
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5952
  """Create all disks for an instance.
5953

5954
  This abstracts away some work from AddInstance.
5955

5956
  @type lu: L{LogicalUnit}
5957
  @param lu: the logical unit on whose behalf we execute
5958
  @type instance: L{objects.Instance}
5959
  @param instance: the instance whose disks we should create
5960
  @type to_skip: list
5961
  @param to_skip: list of indices to skip
5962
  @type target_node: string
5963
  @param target_node: if passed, overrides the target node for creation
5964
  @rtype: boolean
5965
  @return: the success of the creation
5966

5967
  """
5968
  info = _GetInstanceInfoText(instance)
5969
  if target_node is None:
5970
    pnode = instance.primary_node
5971
    all_nodes = instance.all_nodes
5972
  else:
5973
    pnode = target_node
5974
    all_nodes = [pnode]
5975

    
5976
  if instance.disk_template == constants.DT_FILE:
5977
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5978
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5979

    
5980
    result.Raise("Failed to create directory '%s' on"
5981
                 " node %s" % (file_storage_dir, pnode))
5982

    
5983
  # Note: this needs to be kept in sync with adding of disks in
5984
  # LUSetInstanceParams
5985
  for idx, device in enumerate(instance.disks):
5986
    if to_skip and idx in to_skip:
5987
      continue
5988
    logging.info("Creating volume %s for instance %s",
5989
                 device.iv_name, instance.name)
5990
    #HARDCODE
5991
    for node in all_nodes:
5992
      f_create = node == pnode
5993
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5994

    
5995

    
5996
def _RemoveDisks(lu, instance, target_node=None):
5997
  """Remove all disks for an instance.
5998

5999
  This abstracts away some work from `AddInstance()` and
6000
  `RemoveInstance()`. Note that in case some of the devices couldn't
6001
  be removed, the removal will continue with the other ones (compare
6002
  with `_CreateDisks()`).
6003

6004
  @type lu: L{LogicalUnit}
6005
  @param lu: the logical unit on whose behalf we execute
6006
  @type instance: L{objects.Instance}
6007
  @param instance: the instance whose disks we should remove
6008
  @type target_node: string
6009
  @param target_node: used to override the node on which to remove the disks
6010
  @rtype: boolean
6011
  @return: the success of the removal
6012

6013
  """
6014
  logging.info("Removing block devices for instance %s", instance.name)
6015

    
6016
  all_result = True
6017
  for device in instance.disks:
6018
    if target_node:
6019
      edata = [(target_node, device)]
6020
    else:
6021
      edata = device.ComputeNodeTree(instance.primary_node)
6022
    for node, disk in edata:
6023
      lu.cfg.SetDiskID(disk, node)
6024
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6025
      if msg:
6026
        lu.LogWarning("Could not remove block device %s on node %s,"
6027
                      " continuing anyway: %s", device.iv_name, node, msg)
6028
        all_result = False
6029

    
6030
  if instance.disk_template == constants.DT_FILE:
6031
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6032
    if target_node:
6033
      tgt = target_node
6034
    else:
6035
      tgt = instance.primary_node
6036
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6037
    if result.fail_msg:
6038
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6039
                    file_storage_dir, instance.primary_node, result.fail_msg)
6040
      all_result = False
6041

    
6042
  return all_result
6043

    
6044

    
6045
def _ComputeDiskSize(disk_template, disks):
6046
  """Compute disk size requirements in the volume group
6047

6048
  """
6049
  # Required free disk space as a function of disk and swap space
6050
  req_size_dict = {
6051
    constants.DT_DISKLESS: None,
6052
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6053
    # 128 MB are added for drbd metadata for each disk
6054
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6055
    constants.DT_FILE: None,
6056
  }
6057

    
6058
  if disk_template not in req_size_dict:
6059
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6060
                                 " is unknown" %  disk_template)
6061

    
6062
  return req_size_dict[disk_template]
6063

    
6064

    
6065
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6066
  """Hypervisor parameter validation.
6067

6068
  This function abstract the hypervisor parameter validation to be
6069
  used in both instance create and instance modify.
6070

6071
  @type lu: L{LogicalUnit}
6072
  @param lu: the logical unit for which we check
6073
  @type nodenames: list
6074
  @param nodenames: the list of nodes on which we should check
6075
  @type hvname: string
6076
  @param hvname: the name of the hypervisor we should use
6077
  @type hvparams: dict
6078
  @param hvparams: the parameters which we need to check
6079
  @raise errors.OpPrereqError: if the parameters are not valid
6080

6081
  """
6082
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6083
                                                  hvname,
6084
                                                  hvparams)
6085
  for node in nodenames:
6086
    info = hvinfo[node]
6087
    if info.offline:
6088
      continue
6089
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6090

    
6091

    
6092
class LUCreateInstance(LogicalUnit):
6093
  """Create an instance.
6094

6095
  """
6096
  HPATH = "instance-add"
6097
  HTYPE = constants.HTYPE_INSTANCE
6098
  _OP_REQP = ["instance_name", "disks",
6099
              "mode", "start",
6100
              "wait_for_sync", "ip_check", "nics",
6101
              "hvparams", "beparams"]
6102
  REQ_BGL = False
6103

    
6104
  def CheckArguments(self):
6105
    """Check arguments.
6106

6107
    """
6108
    # set optional parameters to none if they don't exist
6109
    for attr in ["pnode", "snode", "iallocator", "hypervisor",
6110
                 "disk_template", "identify_defaults"]:
6111
      if not hasattr(self.op, attr):
6112
        setattr(self.op, attr, None)
6113

    
6114
    # do not require name_check to ease forward/backward compatibility
6115
    # for tools
6116
    if not hasattr(self.op, "name_check"):
6117
      self.op.name_check = True
6118
    if not hasattr(self.op, "no_install"):
6119
      self.op.no_install = False
6120
    if self.op.no_install and self.op.start:
6121
      self.LogInfo("No-installation mode selected, disabling startup")
6122
      self.op.start = False
6123
    # validate/normalize the instance name
6124
    self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6125
    if self.op.ip_check and not self.op.name_check:
6126
      # TODO: make the ip check more flexible and not depend on the name check
6127
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
6128
                                 errors.ECODE_INVAL)
6129

    
6130
    # check nics' parameter names
6131
    for nic in self.op.nics:
6132
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6133

    
6134
    # check disks. parameter names and consistent adopt/no-adopt strategy
6135
    has_adopt = has_no_adopt = False
6136
    for disk in self.op.disks:
6137
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6138
      if "adopt" in disk:
6139
        has_adopt = True
6140
      else:
6141
        has_no_adopt = True
6142
    if has_adopt and has_no_adopt:
6143
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6144
                                 errors.ECODE_INVAL)
6145
    if has_adopt:
6146
      if self.op.disk_template != constants.DT_PLAIN:
6147
        raise errors.OpPrereqError("Disk adoption is only supported for the"
6148
                                   " 'plain' disk template",
6149
                                   errors.ECODE_INVAL)
6150
      if self.op.iallocator is not None:
6151
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6152
                                   " iallocator script", errors.ECODE_INVAL)
6153
      if self.op.mode == constants.INSTANCE_IMPORT:
6154
        raise errors.OpPrereqError("Disk adoption not allowed for"
6155
                                   " instance import", errors.ECODE_INVAL)
6156

    
6157
    self.adopt_disks = has_adopt
6158

    
6159
    # verify creation mode
6160
    if self.op.mode not in constants.INSTANCE_CREATE_MODES:
6161
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6162
                                 self.op.mode, errors.ECODE_INVAL)
6163

    
6164
    # instance name verification
6165
    if self.op.name_check:
6166
      self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6167
      self.op.instance_name = self.hostname1.name
6168
      # used in CheckPrereq for ip ping check
6169
      self.check_ip = self.hostname1.ip
6170
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6171
      raise errors.OpPrereqError("Remote imports require names to be checked" %
6172
                                 errors.ECODE_INVAL)
6173
    else:
6174
      self.check_ip = None
6175

    
6176
    # file storage checks
6177
    if (self.op.file_driver and
6178
        not self.op.file_driver in constants.FILE_DRIVER):
6179
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6180
                                 self.op.file_driver, errors.ECODE_INVAL)
6181

    
6182
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6183
      raise errors.OpPrereqError("File storage directory path not absolute",
6184
                                 errors.ECODE_INVAL)
6185

    
6186
    ### Node/iallocator related checks
6187
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
6188
      raise errors.OpPrereqError("One and only one of iallocator and primary"
6189
                                 " node must be given",
6190
                                 errors.ECODE_INVAL)
6191

    
6192
    self._cds = _GetClusterDomainSecret()
6193

    
6194
    if self.op.mode == constants.INSTANCE_IMPORT:
6195
      # On import force_variant must be True, because if we forced it at
6196
      # initial install, our only chance when importing it back is that it
6197
      # works again!
6198
      self.op.force_variant = True
6199

    
6200
      if self.op.no_install:
6201
        self.LogInfo("No-installation mode has no effect during import")
6202

    
6203
    elif self.op.mode == constants.INSTANCE_CREATE:
6204
      if getattr(self.op, "os_type", None) is None:
6205
        raise errors.OpPrereqError("No guest OS specified",
6206
                                   errors.ECODE_INVAL)
6207
      self.op.force_variant = getattr(self.op, "force_variant", False)
6208
      if self.op.disk_template is None:
6209
        raise errors.OpPrereqError("No disk template specified",
6210
                                   errors.ECODE_INVAL)
6211

    
6212
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6213
      # Check handshake to ensure both clusters have the same domain secret
6214
      src_handshake = getattr(self.op, "source_handshake", None)
6215
      if not src_handshake:
6216
        raise errors.OpPrereqError("Missing source handshake",
6217
                                   errors.ECODE_INVAL)
6218

    
6219
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6220
                                                           src_handshake)
6221
      if errmsg:
6222
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6223
                                   errors.ECODE_INVAL)
6224

    
6225
      # Load and check source CA
6226
      self.source_x509_ca_pem = getattr(self.op, "source_x509_ca", None)
6227
      if not self.source_x509_ca_pem:
6228
        raise errors.OpPrereqError("Missing source X509 CA",
6229
                                   errors.ECODE_INVAL)
6230

    
6231
      try:
6232
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6233
                                                    self._cds)
6234
      except OpenSSL.crypto.Error, err:
6235
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6236
                                   (err, ), errors.ECODE_INVAL)
6237

    
6238
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6239
      if errcode is not None:
6240
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6241
                                   errors.ECODE_INVAL)
6242

    
6243
      self.source_x509_ca = cert
6244

    
6245
      src_instance_name = getattr(self.op, "source_instance_name", None)
6246
      if not src_instance_name:
6247
        raise errors.OpPrereqError("Missing source instance name",
6248
                                   errors.ECODE_INVAL)
6249

    
6250
      self.source_instance_name = \
6251
        utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6252

    
6253
    else:
6254
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6255
                                 self.op.mode, errors.ECODE_INVAL)
6256

    
6257
  def ExpandNames(self):
6258
    """ExpandNames for CreateInstance.
6259

6260
    Figure out the right locks for instance creation.
6261

6262
    """
6263
    self.needed_locks = {}
6264

    
6265
    instance_name = self.op.instance_name
6266
    # this is just a preventive check, but someone might still add this
6267
    # instance in the meantime, and creation will fail at lock-add time
6268
    if instance_name in self.cfg.GetInstanceList():
6269
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6270
                                 instance_name, errors.ECODE_EXISTS)
6271

    
6272
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6273

    
6274
    if self.op.iallocator:
6275
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6276
    else:
6277
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6278
      nodelist = [self.op.pnode]
6279
      if self.op.snode is not None:
6280
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6281
        nodelist.append(self.op.snode)
6282
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6283

    
6284
    # in case of import lock the source node too
6285
    if self.op.mode == constants.INSTANCE_IMPORT:
6286
      src_node = getattr(self.op, "src_node", None)
6287
      src_path = getattr(self.op, "src_path", None)
6288

    
6289
      if src_path is None:
6290
        self.op.src_path = src_path = self.op.instance_name
6291

    
6292
      if src_node is None:
6293
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6294
        self.op.src_node = None
6295
        if os.path.isabs(src_path):
6296
          raise errors.OpPrereqError("Importing an instance from an absolute"
6297
                                     " path requires a source node option.",
6298
                                     errors.ECODE_INVAL)
6299
      else:
6300
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6301
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6302
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6303
        if not os.path.isabs(src_path):
6304
          self.op.src_path = src_path = \
6305
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6306

    
6307
  def _RunAllocator(self):
6308
    """Run the allocator based on input opcode.
6309

6310
    """
6311
    nics = [n.ToDict() for n in self.nics]
6312
    ial = IAllocator(self.cfg, self.rpc,
6313
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6314
                     name=self.op.instance_name,
6315
                     disk_template=self.op.disk_template,
6316
                     tags=[],
6317
                     os=self.op.os_type,
6318
                     vcpus=self.be_full[constants.BE_VCPUS],
6319
                     mem_size=self.be_full[constants.BE_MEMORY],
6320
                     disks=self.disks,
6321
                     nics=nics,
6322
                     hypervisor=self.op.hypervisor,
6323
                     )
6324

    
6325
    ial.Run(self.op.iallocator)
6326

    
6327
    if not ial.success:
6328
      raise errors.OpPrereqError("Can't compute nodes using"
6329
                                 " iallocator '%s': %s" %
6330
                                 (self.op.iallocator, ial.info),
6331
                                 errors.ECODE_NORES)
6332
    if len(ial.result) != ial.required_nodes:
6333
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6334
                                 " of nodes (%s), required %s" %
6335
                                 (self.op.iallocator, len(ial.result),
6336
                                  ial.required_nodes), errors.ECODE_FAULT)
6337
    self.op.pnode = ial.result[0]
6338
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6339
                 self.op.instance_name, self.op.iallocator,
6340
                 utils.CommaJoin(ial.result))
6341
    if ial.required_nodes == 2:
6342
      self.op.snode = ial.result[1]
6343

    
6344
  def BuildHooksEnv(self):
6345
    """Build hooks env.
6346

6347
    This runs on master, primary and secondary nodes of the instance.
6348

6349
    """
6350
    env = {
6351
      "ADD_MODE": self.op.mode,
6352
      }
6353
    if self.op.mode == constants.INSTANCE_IMPORT:
6354
      env["SRC_NODE"] = self.op.src_node
6355
      env["SRC_PATH"] = self.op.src_path
6356
      env["SRC_IMAGES"] = self.src_images
6357

    
6358
    env.update(_BuildInstanceHookEnv(
6359
      name=self.op.instance_name,
6360
      primary_node=self.op.pnode,
6361
      secondary_nodes=self.secondaries,
6362
      status=self.op.start,
6363
      os_type=self.op.os_type,
6364
      memory=self.be_full[constants.BE_MEMORY],
6365
      vcpus=self.be_full[constants.BE_VCPUS],
6366
      nics=_NICListToTuple(self, self.nics),
6367
      disk_template=self.op.disk_template,
6368
      disks=[(d["size"], d["mode"]) for d in self.disks],
6369
      bep=self.be_full,
6370
      hvp=self.hv_full,
6371
      hypervisor_name=self.op.hypervisor,
6372
    ))
6373

    
6374
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6375
          self.secondaries)
6376
    return env, nl, nl
6377

    
6378
  def _ReadExportInfo(self):
6379
    """Reads the export information from disk.
6380

6381
    It will override the opcode source node and path with the actual
6382
    information, if these two were not specified before.
6383

6384
    @return: the export information
6385

6386
    """
6387
    assert self.op.mode == constants.INSTANCE_IMPORT
6388

    
6389
    src_node = self.op.src_node
6390
    src_path = self.op.src_path
6391

    
6392
    if src_node is None:
6393
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6394
      exp_list = self.rpc.call_export_list(locked_nodes)
6395
      found = False
6396
      for node in exp_list:
6397
        if exp_list[node].fail_msg:
6398
          continue
6399
        if src_path in exp_list[node].payload:
6400
          found = True
6401
          self.op.src_node = src_node = node
6402
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6403
                                                       src_path)
6404
          break
6405
      if not found:
6406
        raise errors.OpPrereqError("No export found for relative path %s" %
6407
                                    src_path, errors.ECODE_INVAL)
6408

    
6409
    _CheckNodeOnline(self, src_node)
6410
    result = self.rpc.call_export_info(src_node, src_path)
6411
    result.Raise("No export or invalid export found in dir %s" % src_path)
6412

    
6413
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6414
    if not export_info.has_section(constants.INISECT_EXP):
6415
      raise errors.ProgrammerError("Corrupted export config",
6416
                                   errors.ECODE_ENVIRON)
6417

    
6418
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6419
    if (int(ei_version) != constants.EXPORT_VERSION):
6420
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6421
                                 (ei_version, constants.EXPORT_VERSION),
6422
                                 errors.ECODE_ENVIRON)
6423
    return export_info
6424

    
6425
  def _ReadExportParams(self, einfo):
6426
    """Use export parameters as defaults.
6427

6428
    In case the opcode doesn't specify (as in override) some instance
6429
    parameters, then try to use them from the export information, if
6430
    that declares them.
6431

6432
    """
6433
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6434

    
6435
    if self.op.disk_template is None:
6436
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6437
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6438
                                          "disk_template")
6439
      else:
6440
        raise errors.OpPrereqError("No disk template specified and the export"
6441
                                   " is missing the disk_template information",
6442
                                   errors.ECODE_INVAL)
6443

    
6444
    if not self.op.disks:
6445
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6446
        disks = []
6447
        # TODO: import the disk iv_name too
6448
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6449
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6450
          disks.append({"size": disk_sz})
6451
        self.op.disks = disks
6452
      else:
6453
        raise errors.OpPrereqError("No disk info specified and the export"
6454
                                   " is missing the disk information",
6455
                                   errors.ECODE_INVAL)
6456

    
6457
    if (not self.op.nics and
6458
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6459
      nics = []
6460
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6461
        ndict = {}
6462
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6463
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6464
          ndict[name] = v
6465
        nics.append(ndict)
6466
      self.op.nics = nics
6467

    
6468
    if (self.op.hypervisor is None and
6469
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6470
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6471
    if einfo.has_section(constants.INISECT_HYP):
6472
      # use the export parameters but do not override the ones
6473
      # specified by the user
6474
      for name, value in einfo.items(constants.INISECT_HYP):
6475
        if name not in self.op.hvparams:
6476
          self.op.hvparams[name] = value
6477

    
6478
    if einfo.has_section(constants.INISECT_BEP):
6479
      # use the parameters, without overriding
6480
      for name, value in einfo.items(constants.INISECT_BEP):
6481
        if name not in self.op.beparams:
6482
          self.op.beparams[name] = value
6483
    else:
6484
      # try to read the parameters old style, from the main section
6485
      for name in constants.BES_PARAMETERS:
6486
        if (name not in self.op.beparams and
6487
            einfo.has_option(constants.INISECT_INS, name)):
6488
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6489

    
6490
  def _RevertToDefaults(self, cluster):
6491
    """Revert the instance parameters to the default values.
6492

6493
    """
6494
    # hvparams
6495
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6496
    for name in self.op.hvparams.keys():
6497
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6498
        del self.op.hvparams[name]
6499
    # beparams
6500
    be_defs = cluster.SimpleFillBE({})
6501
    for name in self.op.beparams.keys():
6502
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6503
        del self.op.beparams[name]
6504
    # nic params
6505
    nic_defs = cluster.SimpleFillNIC({})
6506
    for nic in self.op.nics:
6507
      for name in constants.NICS_PARAMETERS:
6508
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6509
          del nic[name]
6510

    
6511
  def CheckPrereq(self):
6512
    """Check prerequisites.
6513

6514
    """
6515
    if self.op.mode == constants.INSTANCE_IMPORT:
6516
      export_info = self._ReadExportInfo()
6517
      self._ReadExportParams(export_info)
6518

    
6519
    _CheckDiskTemplate(self.op.disk_template)
6520

    
6521
    if (not self.cfg.GetVGName() and
6522
        self.op.disk_template not in constants.DTS_NOT_LVM):
6523
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6524
                                 " instances", errors.ECODE_STATE)
6525

    
6526
    if self.op.hypervisor is None:
6527
      self.op.hypervisor = self.cfg.GetHypervisorType()
6528

    
6529
    cluster = self.cfg.GetClusterInfo()
6530
    enabled_hvs = cluster.enabled_hypervisors
6531
    if self.op.hypervisor not in enabled_hvs:
6532
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6533
                                 " cluster (%s)" % (self.op.hypervisor,
6534
                                  ",".join(enabled_hvs)),
6535
                                 errors.ECODE_STATE)
6536

    
6537
    # check hypervisor parameter syntax (locally)
6538
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6539
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6540
                                      self.op.hvparams)
6541
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6542
    hv_type.CheckParameterSyntax(filled_hvp)
6543
    self.hv_full = filled_hvp
6544
    # check that we don't specify global parameters on an instance
6545
    _CheckGlobalHvParams(self.op.hvparams)
6546

    
6547
    # fill and remember the beparams dict
6548
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6549
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
6550

    
6551
    # now that hvp/bep are in final format, let's reset to defaults,
6552
    # if told to do so
6553
    if self.op.identify_defaults:
6554
      self._RevertToDefaults(cluster)
6555

    
6556
    # NIC buildup
6557
    self.nics = []
6558
    for idx, nic in enumerate(self.op.nics):
6559
      nic_mode_req = nic.get("mode", None)
6560
      nic_mode = nic_mode_req
6561
      if nic_mode is None:
6562
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6563

    
6564
      # in routed mode, for the first nic, the default ip is 'auto'
6565
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6566
        default_ip_mode = constants.VALUE_AUTO
6567
      else:
6568
        default_ip_mode = constants.VALUE_NONE
6569

    
6570
      # ip validity checks
6571
      ip = nic.get("ip", default_ip_mode)
6572
      if ip is None or ip.lower() == constants.VALUE_NONE:
6573
        nic_ip = None
6574
      elif ip.lower() == constants.VALUE_AUTO:
6575
        if not self.op.name_check:
6576
          raise errors.OpPrereqError("IP address set to auto but name checks"
6577
                                     " have been skipped. Aborting.",
6578
                                     errors.ECODE_INVAL)
6579
        nic_ip = self.hostname1.ip
6580
      else:
6581
        if not utils.IsValidIP(ip):
6582
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6583
                                     " like a valid IP" % ip,
6584
                                     errors.ECODE_INVAL)
6585
        nic_ip = ip
6586

    
6587
      # TODO: check the ip address for uniqueness
6588
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6589
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
6590
                                   errors.ECODE_INVAL)
6591

    
6592
      # MAC address verification
6593
      mac = nic.get("mac", constants.VALUE_AUTO)
6594
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6595
        mac = utils.NormalizeAndValidateMac(mac)
6596

    
6597
        try:
6598
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
6599
        except errors.ReservationError:
6600
          raise errors.OpPrereqError("MAC address %s already in use"
6601
                                     " in cluster" % mac,
6602
                                     errors.ECODE_NOTUNIQUE)
6603

    
6604
      # bridge verification
6605
      bridge = nic.get("bridge", None)
6606
      link = nic.get("link", None)
6607
      if bridge and link:
6608
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6609
                                   " at the same time", errors.ECODE_INVAL)
6610
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6611
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6612
                                   errors.ECODE_INVAL)
6613
      elif bridge:
6614
        link = bridge
6615

    
6616
      nicparams = {}
6617
      if nic_mode_req:
6618
        nicparams[constants.NIC_MODE] = nic_mode_req
6619
      if link:
6620
        nicparams[constants.NIC_LINK] = link
6621

    
6622
      check_params = cluster.SimpleFillNIC(nicparams)
6623
      objects.NIC.CheckParameterSyntax(check_params)
6624
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6625

    
6626
    # disk checks/pre-build
6627
    self.disks = []
6628
    for disk in self.op.disks:
6629
      mode = disk.get("mode", constants.DISK_RDWR)
6630
      if mode not in constants.DISK_ACCESS_SET:
6631
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6632
                                   mode, errors.ECODE_INVAL)
6633
      size = disk.get("size", None)
6634
      if size is None:
6635
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6636
      try:
6637
        size = int(size)
6638
      except (TypeError, ValueError):
6639
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6640
                                   errors.ECODE_INVAL)
6641
      new_disk = {"size": size, "mode": mode}
6642
      if "adopt" in disk:
6643
        new_disk["adopt"] = disk["adopt"]
6644
      self.disks.append(new_disk)
6645

    
6646
    if self.op.mode == constants.INSTANCE_IMPORT:
6647

    
6648
      # Check that the new instance doesn't have less disks than the export
6649
      instance_disks = len(self.disks)
6650
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6651
      if instance_disks < export_disks:
6652
        raise errors.OpPrereqError("Not enough disks to import."
6653
                                   " (instance: %d, export: %d)" %
6654
                                   (instance_disks, export_disks),
6655
                                   errors.ECODE_INVAL)
6656

    
6657
      disk_images = []
6658
      for idx in range(export_disks):
6659
        option = 'disk%d_dump' % idx
6660
        if export_info.has_option(constants.INISECT_INS, option):
6661
          # FIXME: are the old os-es, disk sizes, etc. useful?
6662
          export_name = export_info.get(constants.INISECT_INS, option)
6663
          image = utils.PathJoin(self.op.src_path, export_name)
6664
          disk_images.append(image)
6665
        else:
6666
          disk_images.append(False)
6667

    
6668
      self.src_images = disk_images
6669

    
6670
      old_name = export_info.get(constants.INISECT_INS, 'name')
6671
      try:
6672
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6673
      except (TypeError, ValueError), err:
6674
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
6675
                                   " an integer: %s" % str(err),
6676
                                   errors.ECODE_STATE)
6677
      if self.op.instance_name == old_name:
6678
        for idx, nic in enumerate(self.nics):
6679
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6680
            nic_mac_ini = 'nic%d_mac' % idx
6681
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6682

    
6683
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6684

    
6685
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6686
    if self.op.ip_check:
6687
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6688
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6689
                                   (self.check_ip, self.op.instance_name),
6690
                                   errors.ECODE_NOTUNIQUE)
6691

    
6692
    #### mac address generation
6693
    # By generating here the mac address both the allocator and the hooks get
6694
    # the real final mac address rather than the 'auto' or 'generate' value.
6695
    # There is a race condition between the generation and the instance object
6696
    # creation, which means that we know the mac is valid now, but we're not
6697
    # sure it will be when we actually add the instance. If things go bad
6698
    # adding the instance will abort because of a duplicate mac, and the
6699
    # creation job will fail.
6700
    for nic in self.nics:
6701
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6702
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6703

    
6704
    #### allocator run
6705

    
6706
    if self.op.iallocator is not None:
6707
      self._RunAllocator()
6708

    
6709
    #### node related checks
6710

    
6711
    # check primary node
6712
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6713
    assert self.pnode is not None, \
6714
      "Cannot retrieve locked node %s" % self.op.pnode
6715
    if pnode.offline:
6716
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6717
                                 pnode.name, errors.ECODE_STATE)
6718
    if pnode.drained:
6719
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6720
                                 pnode.name, errors.ECODE_STATE)
6721

    
6722
    self.secondaries = []
6723

    
6724
    # mirror node verification
6725
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6726
      if self.op.snode is None:
6727
        raise errors.OpPrereqError("The networked disk templates need"
6728
                                   " a mirror node", errors.ECODE_INVAL)
6729
      if self.op.snode == pnode.name:
6730
        raise errors.OpPrereqError("The secondary node cannot be the"
6731
                                   " primary node.", errors.ECODE_INVAL)
6732
      _CheckNodeOnline(self, self.op.snode)
6733
      _CheckNodeNotDrained(self, self.op.snode)
6734
      self.secondaries.append(self.op.snode)
6735

    
6736
    nodenames = [pnode.name] + self.secondaries
6737

    
6738
    req_size = _ComputeDiskSize(self.op.disk_template,
6739
                                self.disks)
6740

    
6741
    # Check lv size requirements, if not adopting
6742
    if req_size is not None and not self.adopt_disks:
6743
      _CheckNodesFreeDisk(self, nodenames, req_size)
6744

    
6745
    if self.adopt_disks: # instead, we must check the adoption data
6746
      all_lvs = set([i["adopt"] for i in self.disks])
6747
      if len(all_lvs) != len(self.disks):
6748
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
6749
                                   errors.ECODE_INVAL)
6750
      for lv_name in all_lvs:
6751
        try:
6752
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6753
        except errors.ReservationError:
6754
          raise errors.OpPrereqError("LV named %s used by another instance" %
6755
                                     lv_name, errors.ECODE_NOTUNIQUE)
6756

    
6757
      node_lvs = self.rpc.call_lv_list([pnode.name],
6758
                                       self.cfg.GetVGName())[pnode.name]
6759
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6760
      node_lvs = node_lvs.payload
6761
      delta = all_lvs.difference(node_lvs.keys())
6762
      if delta:
6763
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
6764
                                   utils.CommaJoin(delta),
6765
                                   errors.ECODE_INVAL)
6766
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6767
      if online_lvs:
6768
        raise errors.OpPrereqError("Online logical volumes found, cannot"
6769
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
6770
                                   errors.ECODE_STATE)
6771
      # update the size of disk based on what is found
6772
      for dsk in self.disks:
6773
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6774

    
6775
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6776

    
6777
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6778

    
6779
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6780

    
6781
    # memory check on primary node
6782
    if self.op.start:
6783
      _CheckNodeFreeMemory(self, self.pnode.name,
6784
                           "creating instance %s" % self.op.instance_name,
6785
                           self.be_full[constants.BE_MEMORY],
6786
                           self.op.hypervisor)
6787

    
6788
    self.dry_run_result = list(nodenames)
6789

    
6790
  def Exec(self, feedback_fn):
6791
    """Create and add the instance to the cluster.
6792

6793
    """
6794
    instance = self.op.instance_name
6795
    pnode_name = self.pnode.name
6796

    
6797
    ht_kind = self.op.hypervisor
6798
    if ht_kind in constants.HTS_REQ_PORT:
6799
      network_port = self.cfg.AllocatePort()
6800
    else:
6801
      network_port = None
6802

    
6803
    if constants.ENABLE_FILE_STORAGE:
6804
      # this is needed because os.path.join does not accept None arguments
6805
      if self.op.file_storage_dir is None:
6806
        string_file_storage_dir = ""
6807
      else:
6808
        string_file_storage_dir = self.op.file_storage_dir
6809

    
6810
      # build the full file storage dir path
6811
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6812
                                        string_file_storage_dir, instance)
6813
    else:
6814
      file_storage_dir = ""
6815

    
6816
    disks = _GenerateDiskTemplate(self,
6817
                                  self.op.disk_template,
6818
                                  instance, pnode_name,
6819
                                  self.secondaries,
6820
                                  self.disks,
6821
                                  file_storage_dir,
6822
                                  self.op.file_driver,
6823
                                  0)
6824

    
6825
    iobj = objects.Instance(name=instance, os=self.op.os_type,
6826
                            primary_node=pnode_name,
6827
                            nics=self.nics, disks=disks,
6828
                            disk_template=self.op.disk_template,
6829
                            admin_up=False,
6830
                            network_port=network_port,
6831
                            beparams=self.op.beparams,
6832
                            hvparams=self.op.hvparams,
6833
                            hypervisor=self.op.hypervisor,
6834
                            )
6835

    
6836
    if self.adopt_disks:
6837
      # rename LVs to the newly-generated names; we need to construct
6838
      # 'fake' LV disks with the old data, plus the new unique_id
6839
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6840
      rename_to = []
6841
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6842
        rename_to.append(t_dsk.logical_id)
6843
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6844
        self.cfg.SetDiskID(t_dsk, pnode_name)
6845
      result = self.rpc.call_blockdev_rename(pnode_name,
6846
                                             zip(tmp_disks, rename_to))
6847
      result.Raise("Failed to rename adoped LVs")
6848
    else:
6849
      feedback_fn("* creating instance disks...")
6850
      try:
6851
        _CreateDisks(self, iobj)
6852
      except errors.OpExecError:
6853
        self.LogWarning("Device creation failed, reverting...")
6854
        try:
6855
          _RemoveDisks(self, iobj)
6856
        finally:
6857
          self.cfg.ReleaseDRBDMinors(instance)
6858
          raise
6859

    
6860
    feedback_fn("adding instance %s to cluster config" % instance)
6861

    
6862
    self.cfg.AddInstance(iobj, self.proc.GetECId())
6863

    
6864
    # Declare that we don't want to remove the instance lock anymore, as we've
6865
    # added the instance to the config
6866
    del self.remove_locks[locking.LEVEL_INSTANCE]
6867
    # Unlock all the nodes
6868
    if self.op.mode == constants.INSTANCE_IMPORT:
6869
      nodes_keep = [self.op.src_node]
6870
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6871
                       if node != self.op.src_node]
6872
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6873
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6874
    else:
6875
      self.context.glm.release(locking.LEVEL_NODE)
6876
      del self.acquired_locks[locking.LEVEL_NODE]
6877

    
6878
    if self.op.wait_for_sync:
6879
      disk_abort = not _WaitForSync(self, iobj)
6880
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6881
      # make sure the disks are not degraded (still sync-ing is ok)
6882
      time.sleep(15)
6883
      feedback_fn("* checking mirrors status")
6884
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6885
    else:
6886
      disk_abort = False
6887

    
6888
    if disk_abort:
6889
      _RemoveDisks(self, iobj)
6890
      self.cfg.RemoveInstance(iobj.name)
6891
      # Make sure the instance lock gets removed
6892
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6893
      raise errors.OpExecError("There are some degraded disks for"
6894
                               " this instance")
6895

    
6896
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6897
      if self.op.mode == constants.INSTANCE_CREATE:
6898
        if not self.op.no_install:
6899
          feedback_fn("* running the instance OS create scripts...")
6900
          # FIXME: pass debug option from opcode to backend
6901
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6902
                                                 self.op.debug_level)
6903
          result.Raise("Could not add os for instance %s"
6904
                       " on node %s" % (instance, pnode_name))
6905

    
6906
      elif self.op.mode == constants.INSTANCE_IMPORT:
6907
        feedback_fn("* running the instance OS import scripts...")
6908

    
6909
        transfers = []
6910

    
6911
        for idx, image in enumerate(self.src_images):
6912
          if not image:
6913
            continue
6914

    
6915
          # FIXME: pass debug option from opcode to backend
6916
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
6917
                                             constants.IEIO_FILE, (image, ),
6918
                                             constants.IEIO_SCRIPT,
6919
                                             (iobj.disks[idx], idx),
6920
                                             None)
6921
          transfers.append(dt)
6922

    
6923
        import_result = \
6924
          masterd.instance.TransferInstanceData(self, feedback_fn,
6925
                                                self.op.src_node, pnode_name,
6926
                                                self.pnode.secondary_ip,
6927
                                                iobj, transfers)
6928
        if not compat.all(import_result):
6929
          self.LogWarning("Some disks for instance %s on node %s were not"
6930
                          " imported successfully" % (instance, pnode_name))
6931

    
6932
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6933
        feedback_fn("* preparing remote import...")
6934
        connect_timeout = constants.RIE_CONNECT_TIMEOUT
6935
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
6936

    
6937
        disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
6938
                                                     self.source_x509_ca,
6939
                                                     self._cds, timeouts)
6940
        if not compat.all(disk_results):
6941
          # TODO: Should the instance still be started, even if some disks
6942
          # failed to import (valid for local imports, too)?
6943
          self.LogWarning("Some disks for instance %s on node %s were not"
6944
                          " imported successfully" % (instance, pnode_name))
6945

    
6946
        # Run rename script on newly imported instance
6947
        assert iobj.name == instance
6948
        feedback_fn("Running rename script for %s" % instance)
6949
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
6950
                                                   self.source_instance_name,
6951
                                                   self.op.debug_level)
6952
        if result.fail_msg:
6953
          self.LogWarning("Failed to run rename script for %s on node"
6954
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
6955

    
6956
      else:
6957
        # also checked in the prereq part
6958
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6959
                                     % self.op.mode)
6960

    
6961
    if self.op.start:
6962
      iobj.admin_up = True
6963
      self.cfg.Update(iobj, feedback_fn)
6964
      logging.info("Starting instance %s on node %s", instance, pnode_name)
6965
      feedback_fn("* starting instance...")
6966
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6967
      result.Raise("Could not start instance")
6968

    
6969
    return list(iobj.all_nodes)
6970

    
6971

    
6972
class LUConnectConsole(NoHooksLU):
6973
  """Connect to an instance's console.
6974

6975
  This is somewhat special in that it returns the command line that
6976
  you need to run on the master node in order to connect to the
6977
  console.
6978

6979
  """
6980
  _OP_REQP = ["instance_name"]
6981
  REQ_BGL = False
6982

    
6983
  def ExpandNames(self):
6984
    self._ExpandAndLockInstance()
6985

    
6986
  def CheckPrereq(self):
6987
    """Check prerequisites.
6988

6989
    This checks that the instance is in the cluster.
6990

6991
    """
6992
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6993
    assert self.instance is not None, \
6994
      "Cannot retrieve locked instance %s" % self.op.instance_name
6995
    _CheckNodeOnline(self, self.instance.primary_node)
6996

    
6997
  def Exec(self, feedback_fn):
6998
    """Connect to the console of an instance
6999

7000
    """
7001
    instance = self.instance
7002
    node = instance.primary_node
7003

    
7004
    node_insts = self.rpc.call_instance_list([node],
7005
                                             [instance.hypervisor])[node]
7006
    node_insts.Raise("Can't get node information from %s" % node)
7007

    
7008
    if instance.name not in node_insts.payload:
7009
      raise errors.OpExecError("Instance %s is not running." % instance.name)
7010

    
7011
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7012

    
7013
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7014
    cluster = self.cfg.GetClusterInfo()
7015
    # beparams and hvparams are passed separately, to avoid editing the
7016
    # instance and then saving the defaults in the instance itself.
7017
    hvparams = cluster.FillHV(instance)
7018
    beparams = cluster.FillBE(instance)
7019
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7020

    
7021
    # build ssh cmdline
7022
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7023

    
7024

    
7025
class LUReplaceDisks(LogicalUnit):
7026
  """Replace the disks of an instance.
7027

7028
  """
7029
  HPATH = "mirrors-replace"
7030
  HTYPE = constants.HTYPE_INSTANCE
7031
  _OP_REQP = ["instance_name", "mode", "disks"]
7032
  REQ_BGL = False
7033

    
7034
  def CheckArguments(self):
7035
    if not hasattr(self.op, "remote_node"):
7036
      self.op.remote_node = None
7037
    if not hasattr(self.op, "iallocator"):
7038
      self.op.iallocator = None
7039
    if not hasattr(self.op, "early_release"):
7040
      self.op.early_release = False
7041

    
7042
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7043
                                  self.op.iallocator)
7044

    
7045
  def ExpandNames(self):
7046
    self._ExpandAndLockInstance()
7047

    
7048
    if self.op.iallocator is not None:
7049
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7050

    
7051
    elif self.op.remote_node is not None:
7052
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7053
      self.op.remote_node = remote_node
7054

    
7055
      # Warning: do not remove the locking of the new secondary here
7056
      # unless DRBD8.AddChildren is changed to work in parallel;
7057
      # currently it doesn't since parallel invocations of
7058
      # FindUnusedMinor will conflict
7059
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7060
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7061

    
7062
    else:
7063
      self.needed_locks[locking.LEVEL_NODE] = []
7064
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7065

    
7066
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7067
                                   self.op.iallocator, self.op.remote_node,
7068
                                   self.op.disks, False, self.op.early_release)
7069

    
7070
    self.tasklets = [self.replacer]
7071

    
7072
  def DeclareLocks(self, level):
7073
    # If we're not already locking all nodes in the set we have to declare the
7074
    # instance's primary/secondary nodes.
7075
    if (level == locking.LEVEL_NODE and
7076
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7077
      self._LockInstancesNodes()
7078

    
7079
  def BuildHooksEnv(self):
7080
    """Build hooks env.
7081

7082
    This runs on the master, the primary and all the secondaries.
7083

7084
    """
7085
    instance = self.replacer.instance
7086
    env = {
7087
      "MODE": self.op.mode,
7088
      "NEW_SECONDARY": self.op.remote_node,
7089
      "OLD_SECONDARY": instance.secondary_nodes[0],
7090
      }
7091
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7092
    nl = [
7093
      self.cfg.GetMasterNode(),
7094
      instance.primary_node,
7095
      ]
7096
    if self.op.remote_node is not None:
7097
      nl.append(self.op.remote_node)
7098
    return env, nl, nl
7099

    
7100

    
7101
class LUEvacuateNode(LogicalUnit):
7102
  """Relocate the secondary instances from a node.
7103

7104
  """
7105
  HPATH = "node-evacuate"
7106
  HTYPE = constants.HTYPE_NODE
7107
  _OP_REQP = ["node_name"]
7108
  REQ_BGL = False
7109

    
7110
  def CheckArguments(self):
7111
    if not hasattr(self.op, "remote_node"):
7112
      self.op.remote_node = None
7113
    if not hasattr(self.op, "iallocator"):
7114
      self.op.iallocator = None
7115
    if not hasattr(self.op, "early_release"):
7116
      self.op.early_release = False
7117

    
7118
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
7119
                                  self.op.remote_node,
7120
                                  self.op.iallocator)
7121

    
7122
  def ExpandNames(self):
7123
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7124

    
7125
    self.needed_locks = {}
7126

    
7127
    # Declare node locks
7128
    if self.op.iallocator is not None:
7129
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7130

    
7131
    elif self.op.remote_node is not None:
7132
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7133

    
7134
      # Warning: do not remove the locking of the new secondary here
7135
      # unless DRBD8.AddChildren is changed to work in parallel;
7136
      # currently it doesn't since parallel invocations of
7137
      # FindUnusedMinor will conflict
7138
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7139
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7140

    
7141
    else:
7142
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7143

    
7144
    # Create tasklets for replacing disks for all secondary instances on this
7145
    # node
7146
    names = []
7147
    tasklets = []
7148

    
7149
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7150
      logging.debug("Replacing disks for instance %s", inst.name)
7151
      names.append(inst.name)
7152

    
7153
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7154
                                self.op.iallocator, self.op.remote_node, [],
7155
                                True, self.op.early_release)
7156
      tasklets.append(replacer)
7157

    
7158
    self.tasklets = tasklets
7159
    self.instance_names = names
7160

    
7161
    # Declare instance locks
7162
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7163

    
7164
  def DeclareLocks(self, level):
7165
    # If we're not already locking all nodes in the set we have to declare the
7166
    # instance's primary/secondary nodes.
7167
    if (level == locking.LEVEL_NODE and
7168
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7169
      self._LockInstancesNodes()
7170

    
7171
  def BuildHooksEnv(self):
7172
    """Build hooks env.
7173

7174
    This runs on the master, the primary and all the secondaries.
7175

7176
    """
7177
    env = {
7178
      "NODE_NAME": self.op.node_name,
7179
      }
7180

    
7181
    nl = [self.cfg.GetMasterNode()]
7182

    
7183
    if self.op.remote_node is not None:
7184
      env["NEW_SECONDARY"] = self.op.remote_node
7185
      nl.append(self.op.remote_node)
7186

    
7187
    return (env, nl, nl)
7188

    
7189

    
7190
class TLReplaceDisks(Tasklet):
7191
  """Replaces disks for an instance.
7192

7193
  Note: Locking is not within the scope of this class.
7194

7195
  """
7196
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7197
               disks, delay_iallocator, early_release):
7198
    """Initializes this class.
7199

7200
    """
7201
    Tasklet.__init__(self, lu)
7202

    
7203
    # Parameters
7204
    self.instance_name = instance_name
7205
    self.mode = mode
7206
    self.iallocator_name = iallocator_name
7207
    self.remote_node = remote_node
7208
    self.disks = disks
7209
    self.delay_iallocator = delay_iallocator
7210
    self.early_release = early_release
7211

    
7212
    # Runtime data
7213
    self.instance = None
7214
    self.new_node = None
7215
    self.target_node = None
7216
    self.other_node = None
7217
    self.remote_node_info = None
7218
    self.node_secondary_ip = None
7219

    
7220
  @staticmethod
7221
  def CheckArguments(mode, remote_node, iallocator):
7222
    """Helper function for users of this class.
7223

7224
    """
7225
    # check for valid parameter combination
7226
    if mode == constants.REPLACE_DISK_CHG:
7227
      if remote_node is None and iallocator is None:
7228
        raise errors.OpPrereqError("When changing the secondary either an"
7229
                                   " iallocator script must be used or the"
7230
                                   " new node given", errors.ECODE_INVAL)
7231

    
7232
      if remote_node is not None and iallocator is not None:
7233
        raise errors.OpPrereqError("Give either the iallocator or the new"
7234
                                   " secondary, not both", errors.ECODE_INVAL)
7235

    
7236
    elif remote_node is not None or iallocator is not None:
7237
      # Not replacing the secondary
7238
      raise errors.OpPrereqError("The iallocator and new node options can"
7239
                                 " only be used when changing the"
7240
                                 " secondary node", errors.ECODE_INVAL)
7241

    
7242
  @staticmethod
7243
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7244
    """Compute a new secondary node using an IAllocator.
7245

7246
    """
7247
    ial = IAllocator(lu.cfg, lu.rpc,
7248
                     mode=constants.IALLOCATOR_MODE_RELOC,
7249
                     name=instance_name,
7250
                     relocate_from=relocate_from)
7251

    
7252
    ial.Run(iallocator_name)
7253

    
7254
    if not ial.success:
7255
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7256
                                 " %s" % (iallocator_name, ial.info),
7257
                                 errors.ECODE_NORES)
7258

    
7259
    if len(ial.result) != ial.required_nodes:
7260
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7261
                                 " of nodes (%s), required %s" %
7262
                                 (iallocator_name,
7263
                                  len(ial.result), ial.required_nodes),
7264
                                 errors.ECODE_FAULT)
7265

    
7266
    remote_node_name = ial.result[0]
7267

    
7268
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7269
               instance_name, remote_node_name)
7270

    
7271
    return remote_node_name
7272

    
7273
  def _FindFaultyDisks(self, node_name):
7274
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7275
                                    node_name, True)
7276

    
7277
  def CheckPrereq(self):
7278
    """Check prerequisites.
7279

7280
    This checks that the instance is in the cluster.
7281

7282
    """
7283
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7284
    assert instance is not None, \
7285
      "Cannot retrieve locked instance %s" % self.instance_name
7286

    
7287
    if instance.disk_template != constants.DT_DRBD8:
7288
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7289
                                 " instances", errors.ECODE_INVAL)
7290

    
7291
    if len(instance.secondary_nodes) != 1:
7292
      raise errors.OpPrereqError("The instance has a strange layout,"
7293
                                 " expected one secondary but found %d" %
7294
                                 len(instance.secondary_nodes),
7295
                                 errors.ECODE_FAULT)
7296

    
7297
    if not self.delay_iallocator:
7298
      self._CheckPrereq2()
7299

    
7300
  def _CheckPrereq2(self):
7301
    """Check prerequisites, second part.
7302

7303
    This function should always be part of CheckPrereq. It was separated and is
7304
    now called from Exec because during node evacuation iallocator was only
7305
    called with an unmodified cluster model, not taking planned changes into
7306
    account.
7307

7308
    """
7309
    instance = self.instance
7310
    secondary_node = instance.secondary_nodes[0]
7311

    
7312
    if self.iallocator_name is None:
7313
      remote_node = self.remote_node
7314
    else:
7315
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7316
                                       instance.name, instance.secondary_nodes)
7317

    
7318
    if remote_node is not None:
7319
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7320
      assert self.remote_node_info is not None, \
7321
        "Cannot retrieve locked node %s" % remote_node
7322
    else:
7323
      self.remote_node_info = None
7324

    
7325
    if remote_node == self.instance.primary_node:
7326
      raise errors.OpPrereqError("The specified node is the primary node of"
7327
                                 " the instance.", errors.ECODE_INVAL)
7328

    
7329
    if remote_node == secondary_node:
7330
      raise errors.OpPrereqError("The specified node is already the"
7331
                                 " secondary node of the instance.",
7332
                                 errors.ECODE_INVAL)
7333

    
7334
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7335
                                    constants.REPLACE_DISK_CHG):
7336
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7337
                                 errors.ECODE_INVAL)
7338

    
7339
    if self.mode == constants.REPLACE_DISK_AUTO:
7340
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7341
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7342

    
7343
      if faulty_primary and faulty_secondary:
7344
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7345
                                   " one node and can not be repaired"
7346
                                   " automatically" % self.instance_name,
7347
                                   errors.ECODE_STATE)
7348

    
7349
      if faulty_primary:
7350
        self.disks = faulty_primary
7351
        self.target_node = instance.primary_node
7352
        self.other_node = secondary_node
7353
        check_nodes = [self.target_node, self.other_node]
7354
      elif faulty_secondary:
7355
        self.disks = faulty_secondary
7356
        self.target_node = secondary_node
7357
        self.other_node = instance.primary_node
7358
        check_nodes = [self.target_node, self.other_node]
7359
      else:
7360
        self.disks = []
7361
        check_nodes = []
7362

    
7363
    else:
7364
      # Non-automatic modes
7365
      if self.mode == constants.REPLACE_DISK_PRI:
7366
        self.target_node = instance.primary_node
7367
        self.other_node = secondary_node
7368
        check_nodes = [self.target_node, self.other_node]
7369

    
7370
      elif self.mode == constants.REPLACE_DISK_SEC:
7371
        self.target_node = secondary_node
7372
        self.other_node = instance.primary_node
7373
        check_nodes = [self.target_node, self.other_node]
7374

    
7375
      elif self.mode == constants.REPLACE_DISK_CHG:
7376
        self.new_node = remote_node
7377
        self.other_node = instance.primary_node
7378
        self.target_node = secondary_node
7379
        check_nodes = [self.new_node, self.other_node]
7380

    
7381
        _CheckNodeNotDrained(self.lu, remote_node)
7382

    
7383
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7384
        assert old_node_info is not None
7385
        if old_node_info.offline and not self.early_release:
7386
          # doesn't make sense to delay the release
7387
          self.early_release = True
7388
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7389
                          " early-release mode", secondary_node)
7390

    
7391
      else:
7392
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7393
                                     self.mode)
7394

    
7395
      # If not specified all disks should be replaced
7396
      if not self.disks:
7397
        self.disks = range(len(self.instance.disks))
7398

    
7399
    for node in check_nodes:
7400
      _CheckNodeOnline(self.lu, node)
7401

    
7402
    # Check whether disks are valid
7403
    for disk_idx in self.disks:
7404
      instance.FindDisk(disk_idx)
7405

    
7406
    # Get secondary node IP addresses
7407
    node_2nd_ip = {}
7408

    
7409
    for node_name in [self.target_node, self.other_node, self.new_node]:
7410
      if node_name is not None:
7411
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7412

    
7413
    self.node_secondary_ip = node_2nd_ip
7414

    
7415
  def Exec(self, feedback_fn):
7416
    """Execute disk replacement.
7417

7418
    This dispatches the disk replacement to the appropriate handler.
7419

7420
    """
7421
    if self.delay_iallocator:
7422
      self._CheckPrereq2()
7423

    
7424
    if not self.disks:
7425
      feedback_fn("No disks need replacement")
7426
      return
7427

    
7428
    feedback_fn("Replacing disk(s) %s for %s" %
7429
                (utils.CommaJoin(self.disks), self.instance.name))
7430

    
7431
    activate_disks = (not self.instance.admin_up)
7432

    
7433
    # Activate the instance disks if we're replacing them on a down instance
7434
    if activate_disks:
7435
      _StartInstanceDisks(self.lu, self.instance, True)
7436

    
7437
    try:
7438
      # Should we replace the secondary node?
7439
      if self.new_node is not None:
7440
        fn = self._ExecDrbd8Secondary
7441
      else:
7442
        fn = self._ExecDrbd8DiskOnly
7443

    
7444
      return fn(feedback_fn)
7445

    
7446
    finally:
7447
      # Deactivate the instance disks if we're replacing them on a
7448
      # down instance
7449
      if activate_disks:
7450
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7451

    
7452
  def _CheckVolumeGroup(self, nodes):
7453
    self.lu.LogInfo("Checking volume groups")
7454

    
7455
    vgname = self.cfg.GetVGName()
7456

    
7457
    # Make sure volume group exists on all involved nodes
7458
    results = self.rpc.call_vg_list(nodes)
7459
    if not results:
7460
      raise errors.OpExecError("Can't list volume groups on the nodes")
7461

    
7462
    for node in nodes:
7463
      res = results[node]
7464
      res.Raise("Error checking node %s" % node)
7465
      if vgname not in res.payload:
7466
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7467
                                 (vgname, node))
7468

    
7469
  def _CheckDisksExistence(self, nodes):
7470
    # Check disk existence
7471
    for idx, dev in enumerate(self.instance.disks):
7472
      if idx not in self.disks:
7473
        continue
7474

    
7475
      for node in nodes:
7476
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7477
        self.cfg.SetDiskID(dev, node)
7478

    
7479
        result = self.rpc.call_blockdev_find(node, dev)
7480

    
7481
        msg = result.fail_msg
7482
        if msg or not result.payload:
7483
          if not msg:
7484
            msg = "disk not found"
7485
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7486
                                   (idx, node, msg))
7487

    
7488
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7489
    for idx, dev in enumerate(self.instance.disks):
7490
      if idx not in self.disks:
7491
        continue
7492

    
7493
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7494
                      (idx, node_name))
7495

    
7496
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7497
                                   ldisk=ldisk):
7498
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7499
                                 " replace disks for instance %s" %
7500
                                 (node_name, self.instance.name))
7501

    
7502
  def _CreateNewStorage(self, node_name):
7503
    vgname = self.cfg.GetVGName()
7504
    iv_names = {}
7505

    
7506
    for idx, dev in enumerate(self.instance.disks):
7507
      if idx not in self.disks:
7508
        continue
7509

    
7510
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7511

    
7512
      self.cfg.SetDiskID(dev, node_name)
7513

    
7514
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7515
      names = _GenerateUniqueNames(self.lu, lv_names)
7516

    
7517
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7518
                             logical_id=(vgname, names[0]))
7519
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7520
                             logical_id=(vgname, names[1]))
7521

    
7522
      new_lvs = [lv_data, lv_meta]
7523
      old_lvs = dev.children
7524
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7525

    
7526
      # we pass force_create=True to force the LVM creation
7527
      for new_lv in new_lvs:
7528
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7529
                        _GetInstanceInfoText(self.instance), False)
7530

    
7531
    return iv_names
7532

    
7533
  def _CheckDevices(self, node_name, iv_names):
7534
    for name, (dev, _, _) in iv_names.iteritems():
7535
      self.cfg.SetDiskID(dev, node_name)
7536

    
7537
      result = self.rpc.call_blockdev_find(node_name, dev)
7538

    
7539
      msg = result.fail_msg
7540
      if msg or not result.payload:
7541
        if not msg:
7542
          msg = "disk not found"
7543
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7544
                                 (name, msg))
7545

    
7546
      if result.payload.is_degraded:
7547
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7548

    
7549
  def _RemoveOldStorage(self, node_name, iv_names):
7550
    for name, (_, old_lvs, _) in iv_names.iteritems():
7551
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7552

    
7553
      for lv in old_lvs:
7554
        self.cfg.SetDiskID(lv, node_name)
7555

    
7556
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7557
        if msg:
7558
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7559
                             hint="remove unused LVs manually")
7560

    
7561
  def _ReleaseNodeLock(self, node_name):
7562
    """Releases the lock for a given node."""
7563
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7564

    
7565
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7566
    """Replace a disk on the primary or secondary for DRBD 8.
7567

7568
    The algorithm for replace is quite complicated:
7569

7570
      1. for each disk to be replaced:
7571

7572
        1. create new LVs on the target node with unique names
7573
        1. detach old LVs from the drbd device
7574
        1. rename old LVs to name_replaced.<time_t>
7575
        1. rename new LVs to old LVs
7576
        1. attach the new LVs (with the old names now) to the drbd device
7577

7578
      1. wait for sync across all devices
7579

7580
      1. for each modified disk:
7581

7582
        1. remove old LVs (which have the name name_replaces.<time_t>)
7583

7584
    Failures are not very well handled.
7585

7586
    """
7587
    steps_total = 6
7588

    
7589
    # Step: check device activation
7590
    self.lu.LogStep(1, steps_total, "Check device existence")
7591
    self._CheckDisksExistence([self.other_node, self.target_node])
7592
    self._CheckVolumeGroup([self.target_node, self.other_node])
7593

    
7594
    # Step: check other node consistency
7595
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7596
    self._CheckDisksConsistency(self.other_node,
7597
                                self.other_node == self.instance.primary_node,
7598
                                False)
7599

    
7600
    # Step: create new storage
7601
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7602
    iv_names = self._CreateNewStorage(self.target_node)
7603

    
7604
    # Step: for each lv, detach+rename*2+attach
7605
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7606
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7607
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7608

    
7609
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7610
                                                     old_lvs)
7611
      result.Raise("Can't detach drbd from local storage on node"
7612
                   " %s for device %s" % (self.target_node, dev.iv_name))
7613
      #dev.children = []
7614
      #cfg.Update(instance)
7615

    
7616
      # ok, we created the new LVs, so now we know we have the needed
7617
      # storage; as such, we proceed on the target node to rename
7618
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7619
      # using the assumption that logical_id == physical_id (which in
7620
      # turn is the unique_id on that node)
7621

    
7622
      # FIXME(iustin): use a better name for the replaced LVs
7623
      temp_suffix = int(time.time())
7624
      ren_fn = lambda d, suff: (d.physical_id[0],
7625
                                d.physical_id[1] + "_replaced-%s" % suff)
7626

    
7627
      # Build the rename list based on what LVs exist on the node
7628
      rename_old_to_new = []
7629
      for to_ren in old_lvs:
7630
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7631
        if not result.fail_msg and result.payload:
7632
          # device exists
7633
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7634

    
7635
      self.lu.LogInfo("Renaming the old LVs on the target node")
7636
      result = self.rpc.call_blockdev_rename(self.target_node,
7637
                                             rename_old_to_new)
7638
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7639

    
7640
      # Now we rename the new LVs to the old LVs
7641
      self.lu.LogInfo("Renaming the new LVs on the target node")
7642
      rename_new_to_old = [(new, old.physical_id)
7643
                           for old, new in zip(old_lvs, new_lvs)]
7644
      result = self.rpc.call_blockdev_rename(self.target_node,
7645
                                             rename_new_to_old)
7646
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7647

    
7648
      for old, new in zip(old_lvs, new_lvs):
7649
        new.logical_id = old.logical_id
7650
        self.cfg.SetDiskID(new, self.target_node)
7651

    
7652
      for disk in old_lvs:
7653
        disk.logical_id = ren_fn(disk, temp_suffix)
7654
        self.cfg.SetDiskID(disk, self.target_node)
7655

    
7656
      # Now that the new lvs have the old name, we can add them to the device
7657
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7658
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7659
                                                  new_lvs)
7660
      msg = result.fail_msg
7661
      if msg:
7662
        for new_lv in new_lvs:
7663
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7664
                                               new_lv).fail_msg
7665
          if msg2:
7666
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7667
                               hint=("cleanup manually the unused logical"
7668
                                     "volumes"))
7669
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7670

    
7671
      dev.children = new_lvs
7672

    
7673
      self.cfg.Update(self.instance, feedback_fn)
7674

    
7675
    cstep = 5
7676
    if self.early_release:
7677
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7678
      cstep += 1
7679
      self._RemoveOldStorage(self.target_node, iv_names)
7680
      # WARNING: we release both node locks here, do not do other RPCs
7681
      # than WaitForSync to the primary node
7682
      self._ReleaseNodeLock([self.target_node, self.other_node])
7683

    
7684
    # Wait for sync
7685
    # This can fail as the old devices are degraded and _WaitForSync
7686
    # does a combined result over all disks, so we don't check its return value
7687
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7688
    cstep += 1
7689
    _WaitForSync(self.lu, self.instance)
7690

    
7691
    # Check all devices manually
7692
    self._CheckDevices(self.instance.primary_node, iv_names)
7693

    
7694
    # Step: remove old storage
7695
    if not self.early_release:
7696
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7697
      cstep += 1
7698
      self._RemoveOldStorage(self.target_node, iv_names)
7699

    
7700
  def _ExecDrbd8Secondary(self, feedback_fn):
7701
    """Replace the secondary node for DRBD 8.
7702

7703
    The algorithm for replace is quite complicated:
7704
      - for all disks of the instance:
7705
        - create new LVs on the new node with same names
7706
        - shutdown the drbd device on the old secondary
7707
        - disconnect the drbd network on the primary
7708
        - create the drbd device on the new secondary
7709
        - network attach the drbd on the primary, using an artifice:
7710
          the drbd code for Attach() will connect to the network if it
7711
          finds a device which is connected to the good local disks but
7712
          not network enabled
7713
      - wait for sync across all devices
7714
      - remove all disks from the old secondary
7715

7716
    Failures are not very well handled.
7717

7718
    """
7719
    steps_total = 6
7720

    
7721
    # Step: check device activation
7722
    self.lu.LogStep(1, steps_total, "Check device existence")
7723
    self._CheckDisksExistence([self.instance.primary_node])
7724
    self._CheckVolumeGroup([self.instance.primary_node])
7725

    
7726
    # Step: check other node consistency
7727
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7728
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
7729

    
7730
    # Step: create new storage
7731
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7732
    for idx, dev in enumerate(self.instance.disks):
7733
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7734
                      (self.new_node, idx))
7735
      # we pass force_create=True to force LVM creation
7736
      for new_lv in dev.children:
7737
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7738
                        _GetInstanceInfoText(self.instance), False)
7739

    
7740
    # Step 4: dbrd minors and drbd setups changes
7741
    # after this, we must manually remove the drbd minors on both the
7742
    # error and the success paths
7743
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7744
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7745
                                         for dev in self.instance.disks],
7746
                                        self.instance.name)
7747
    logging.debug("Allocated minors %r", minors)
7748

    
7749
    iv_names = {}
7750
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7751
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7752
                      (self.new_node, idx))
7753
      # create new devices on new_node; note that we create two IDs:
7754
      # one without port, so the drbd will be activated without
7755
      # networking information on the new node at this stage, and one
7756
      # with network, for the latter activation in step 4
7757
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7758
      if self.instance.primary_node == o_node1:
7759
        p_minor = o_minor1
7760
      else:
7761
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7762
        p_minor = o_minor2
7763

    
7764
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7765
                      p_minor, new_minor, o_secret)
7766
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7767
                    p_minor, new_minor, o_secret)
7768

    
7769
      iv_names[idx] = (dev, dev.children, new_net_id)
7770
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7771
                    new_net_id)
7772
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7773
                              logical_id=new_alone_id,
7774
                              children=dev.children,
7775
                              size=dev.size)
7776
      try:
7777
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7778
                              _GetInstanceInfoText(self.instance), False)
7779
      except errors.GenericError:
7780
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7781
        raise
7782

    
7783
    # We have new devices, shutdown the drbd on the old secondary
7784
    for idx, dev in enumerate(self.instance.disks):
7785
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7786
      self.cfg.SetDiskID(dev, self.target_node)
7787
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7788
      if msg:
7789
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7790
                           "node: %s" % (idx, msg),
7791
                           hint=("Please cleanup this device manually as"
7792
                                 " soon as possible"))
7793

    
7794
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7795
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7796
                                               self.node_secondary_ip,
7797
                                               self.instance.disks)\
7798
                                              [self.instance.primary_node]
7799

    
7800
    msg = result.fail_msg
7801
    if msg:
7802
      # detaches didn't succeed (unlikely)
7803
      self.cfg.ReleaseDRBDMinors(self.instance.name)
7804
      raise errors.OpExecError("Can't detach the disks from the network on"
7805
                               " old node: %s" % (msg,))
7806

    
7807
    # if we managed to detach at least one, we update all the disks of
7808
    # the instance to point to the new secondary
7809
    self.lu.LogInfo("Updating instance configuration")
7810
    for dev, _, new_logical_id in iv_names.itervalues():
7811
      dev.logical_id = new_logical_id
7812
      self.cfg.SetDiskID(dev, self.instance.primary_node)
7813

    
7814
    self.cfg.Update(self.instance, feedback_fn)
7815

    
7816
    # and now perform the drbd attach
7817
    self.lu.LogInfo("Attaching primary drbds to new secondary"
7818
                    " (standalone => connected)")
7819
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7820
                                            self.new_node],
7821
                                           self.node_secondary_ip,
7822
                                           self.instance.disks,
7823
                                           self.instance.name,
7824
                                           False)
7825
    for to_node, to_result in result.items():
7826
      msg = to_result.fail_msg
7827
      if msg:
7828
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7829
                           to_node, msg,
7830
                           hint=("please do a gnt-instance info to see the"
7831
                                 " status of disks"))
7832
    cstep = 5
7833
    if self.early_release:
7834
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7835
      cstep += 1
7836
      self._RemoveOldStorage(self.target_node, iv_names)
7837
      # WARNING: we release all node locks here, do not do other RPCs
7838
      # than WaitForSync to the primary node
7839
      self._ReleaseNodeLock([self.instance.primary_node,
7840
                             self.target_node,
7841
                             self.new_node])
7842

    
7843
    # Wait for sync
7844
    # This can fail as the old devices are degraded and _WaitForSync
7845
    # does a combined result over all disks, so we don't check its return value
7846
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7847
    cstep += 1
7848
    _WaitForSync(self.lu, self.instance)
7849

    
7850
    # Check all devices manually
7851
    self._CheckDevices(self.instance.primary_node, iv_names)
7852

    
7853
    # Step: remove old storage
7854
    if not self.early_release:
7855
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7856
      self._RemoveOldStorage(self.target_node, iv_names)
7857

    
7858

    
7859
class LURepairNodeStorage(NoHooksLU):
7860
  """Repairs the volume group on a node.
7861

7862
  """
7863
  _OP_REQP = ["node_name"]
7864
  REQ_BGL = False
7865

    
7866
  def CheckArguments(self):
7867
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7868

    
7869
    _CheckStorageType(self.op.storage_type)
7870

    
7871
  def ExpandNames(self):
7872
    self.needed_locks = {
7873
      locking.LEVEL_NODE: [self.op.node_name],
7874
      }
7875

    
7876
  def _CheckFaultyDisks(self, instance, node_name):
7877
    """Ensure faulty disks abort the opcode or at least warn."""
7878
    try:
7879
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7880
                                  node_name, True):
7881
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7882
                                   " node '%s'" % (instance.name, node_name),
7883
                                   errors.ECODE_STATE)
7884
    except errors.OpPrereqError, err:
7885
      if self.op.ignore_consistency:
7886
        self.proc.LogWarning(str(err.args[0]))
7887
      else:
7888
        raise
7889

    
7890
  def CheckPrereq(self):
7891
    """Check prerequisites.
7892

7893
    """
7894
    storage_type = self.op.storage_type
7895

    
7896
    if (constants.SO_FIX_CONSISTENCY not in
7897
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7898
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
7899
                                 " repaired" % storage_type,
7900
                                 errors.ECODE_INVAL)
7901

    
7902
    # Check whether any instance on this node has faulty disks
7903
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7904
      if not inst.admin_up:
7905
        continue
7906
      check_nodes = set(inst.all_nodes)
7907
      check_nodes.discard(self.op.node_name)
7908
      for inst_node_name in check_nodes:
7909
        self._CheckFaultyDisks(inst, inst_node_name)
7910

    
7911
  def Exec(self, feedback_fn):
7912
    feedback_fn("Repairing storage unit '%s' on %s ..." %
7913
                (self.op.name, self.op.node_name))
7914

    
7915
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7916
    result = self.rpc.call_storage_execute(self.op.node_name,
7917
                                           self.op.storage_type, st_args,
7918
                                           self.op.name,
7919
                                           constants.SO_FIX_CONSISTENCY)
7920
    result.Raise("Failed to repair storage unit '%s' on %s" %
7921
                 (self.op.name, self.op.node_name))
7922

    
7923

    
7924
class LUNodeEvacuationStrategy(NoHooksLU):
7925
  """Computes the node evacuation strategy.
7926

7927
  """
7928
  _OP_REQP = ["nodes"]
7929
  REQ_BGL = False
7930

    
7931
  def CheckArguments(self):
7932
    if not hasattr(self.op, "remote_node"):
7933
      self.op.remote_node = None
7934
    if not hasattr(self.op, "iallocator"):
7935
      self.op.iallocator = None
7936
    if self.op.remote_node is not None and self.op.iallocator is not None:
7937
      raise errors.OpPrereqError("Give either the iallocator or the new"
7938
                                 " secondary, not both", errors.ECODE_INVAL)
7939

    
7940
  def ExpandNames(self):
7941
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7942
    self.needed_locks = locks = {}
7943
    if self.op.remote_node is None:
7944
      locks[locking.LEVEL_NODE] = locking.ALL_SET
7945
    else:
7946
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7947
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7948

    
7949
  def CheckPrereq(self):
7950
    pass
7951

    
7952
  def Exec(self, feedback_fn):
7953
    if self.op.remote_node is not None:
7954
      instances = []
7955
      for node in self.op.nodes:
7956
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7957
      result = []
7958
      for i in instances:
7959
        if i.primary_node == self.op.remote_node:
7960
          raise errors.OpPrereqError("Node %s is the primary node of"
7961
                                     " instance %s, cannot use it as"
7962
                                     " secondary" %
7963
                                     (self.op.remote_node, i.name),
7964
                                     errors.ECODE_INVAL)
7965
        result.append([i.name, self.op.remote_node])
7966
    else:
7967
      ial = IAllocator(self.cfg, self.rpc,
7968
                       mode=constants.IALLOCATOR_MODE_MEVAC,
7969
                       evac_nodes=self.op.nodes)
7970
      ial.Run(self.op.iallocator, validate=True)
7971
      if not ial.success:
7972
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7973
                                 errors.ECODE_NORES)
7974
      result = ial.result
7975
    return result
7976

    
7977

    
7978
class LUGrowDisk(LogicalUnit):
7979
  """Grow a disk of an instance.
7980

7981
  """
7982
  HPATH = "disk-grow"
7983
  HTYPE = constants.HTYPE_INSTANCE
7984
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7985
  REQ_BGL = False
7986

    
7987
  def ExpandNames(self):
7988
    self._ExpandAndLockInstance()
7989
    self.needed_locks[locking.LEVEL_NODE] = []
7990
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7991

    
7992
  def DeclareLocks(self, level):
7993
    if level == locking.LEVEL_NODE:
7994
      self._LockInstancesNodes()
7995

    
7996
  def BuildHooksEnv(self):
7997
    """Build hooks env.
7998

7999
    This runs on the master, the primary and all the secondaries.
8000

8001
    """
8002
    env = {
8003
      "DISK": self.op.disk,
8004
      "AMOUNT": self.op.amount,
8005
      }
8006
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8007
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8008
    return env, nl, nl
8009

    
8010
  def CheckPrereq(self):
8011
    """Check prerequisites.
8012

8013
    This checks that the instance is in the cluster.
8014

8015
    """
8016
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8017
    assert instance is not None, \
8018
      "Cannot retrieve locked instance %s" % self.op.instance_name
8019
    nodenames = list(instance.all_nodes)
8020
    for node in nodenames:
8021
      _CheckNodeOnline(self, node)
8022

    
8023

    
8024
    self.instance = instance
8025

    
8026
    if instance.disk_template not in constants.DTS_GROWABLE:
8027
      raise errors.OpPrereqError("Instance's disk layout does not support"
8028
                                 " growing.", errors.ECODE_INVAL)
8029

    
8030
    self.disk = instance.FindDisk(self.op.disk)
8031

    
8032
    if instance.disk_template != constants.DT_FILE:
8033
      # TODO: check the free disk space for file, when that feature will be
8034
      # supported
8035
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8036

    
8037
  def Exec(self, feedback_fn):
8038
    """Execute disk grow.
8039

8040
    """
8041
    instance = self.instance
8042
    disk = self.disk
8043

    
8044
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8045
    if not disks_ok:
8046
      raise errors.OpExecError("Cannot activate block device to grow")
8047

    
8048
    for node in instance.all_nodes:
8049
      self.cfg.SetDiskID(disk, node)
8050
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8051
      result.Raise("Grow request failed to node %s" % node)
8052

    
8053
      # TODO: Rewrite code to work properly
8054
      # DRBD goes into sync mode for a short amount of time after executing the
8055
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8056
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8057
      # time is a work-around.
8058
      time.sleep(5)
8059

    
8060
    disk.RecordGrow(self.op.amount)
8061
    self.cfg.Update(instance, feedback_fn)
8062
    if self.op.wait_for_sync:
8063
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8064
      if disk_abort:
8065
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8066
                             " status.\nPlease check the instance.")
8067
      if not instance.admin_up:
8068
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8069
    elif not instance.admin_up:
8070
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8071
                           " not supposed to be running because no wait for"
8072
                           " sync mode was requested.")
8073

    
8074

    
8075
class LUQueryInstanceData(NoHooksLU):
8076
  """Query runtime instance data.
8077

8078
  """
8079
  _OP_REQP = ["instances", "static"]
8080
  REQ_BGL = False
8081

    
8082
  def ExpandNames(self):
8083
    self.needed_locks = {}
8084
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8085

    
8086
    if not isinstance(self.op.instances, list):
8087
      raise errors.OpPrereqError("Invalid argument type 'instances'",
8088
                                 errors.ECODE_INVAL)
8089

    
8090
    if self.op.instances:
8091
      self.wanted_names = []
8092
      for name in self.op.instances:
8093
        full_name = _ExpandInstanceName(self.cfg, name)
8094
        self.wanted_names.append(full_name)
8095
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8096
    else:
8097
      self.wanted_names = None
8098
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8099

    
8100
    self.needed_locks[locking.LEVEL_NODE] = []
8101
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8102

    
8103
  def DeclareLocks(self, level):
8104
    if level == locking.LEVEL_NODE:
8105
      self._LockInstancesNodes()
8106

    
8107
  def CheckPrereq(self):
8108
    """Check prerequisites.
8109

8110
    This only checks the optional instance list against the existing names.
8111

8112
    """
8113
    if self.wanted_names is None:
8114
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8115

    
8116
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8117
                             in self.wanted_names]
8118
    return
8119

    
8120
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8121
    """Returns the status of a block device
8122

8123
    """
8124
    if self.op.static or not node:
8125
      return None
8126

    
8127
    self.cfg.SetDiskID(dev, node)
8128

    
8129
    result = self.rpc.call_blockdev_find(node, dev)
8130
    if result.offline:
8131
      return None
8132

    
8133
    result.Raise("Can't compute disk status for %s" % instance_name)
8134

    
8135
    status = result.payload
8136
    if status is None:
8137
      return None
8138

    
8139
    return (status.dev_path, status.major, status.minor,
8140
            status.sync_percent, status.estimated_time,
8141
            status.is_degraded, status.ldisk_status)
8142

    
8143
  def _ComputeDiskStatus(self, instance, snode, dev):
8144
    """Compute block device status.
8145

8146
    """
8147
    if dev.dev_type in constants.LDS_DRBD:
8148
      # we change the snode then (otherwise we use the one passed in)
8149
      if dev.logical_id[0] == instance.primary_node:
8150
        snode = dev.logical_id[1]
8151
      else:
8152
        snode = dev.logical_id[0]
8153

    
8154
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8155
                                              instance.name, dev)
8156
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8157

    
8158
    if dev.children:
8159
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8160
                      for child in dev.children]
8161
    else:
8162
      dev_children = []
8163

    
8164
    data = {
8165
      "iv_name": dev.iv_name,
8166
      "dev_type": dev.dev_type,
8167
      "logical_id": dev.logical_id,
8168
      "physical_id": dev.physical_id,
8169
      "pstatus": dev_pstatus,
8170
      "sstatus": dev_sstatus,
8171
      "children": dev_children,
8172
      "mode": dev.mode,
8173
      "size": dev.size,
8174
      }
8175

    
8176
    return data
8177

    
8178
  def Exec(self, feedback_fn):
8179
    """Gather and return data"""
8180
    result = {}
8181

    
8182
    cluster = self.cfg.GetClusterInfo()
8183

    
8184
    for instance in self.wanted_instances:
8185
      if not self.op.static:
8186
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8187
                                                  instance.name,
8188
                                                  instance.hypervisor)
8189
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8190
        remote_info = remote_info.payload
8191
        if remote_info and "state" in remote_info:
8192
          remote_state = "up"
8193
        else:
8194
          remote_state = "down"
8195
      else:
8196
        remote_state = None
8197
      if instance.admin_up:
8198
        config_state = "up"
8199
      else:
8200
        config_state = "down"
8201

    
8202
      disks = [self._ComputeDiskStatus(instance, None, device)
8203
               for device in instance.disks]
8204

    
8205
      idict = {
8206
        "name": instance.name,
8207
        "config_state": config_state,
8208
        "run_state": remote_state,
8209
        "pnode": instance.primary_node,
8210
        "snodes": instance.secondary_nodes,
8211
        "os": instance.os,
8212
        # this happens to be the same format used for hooks
8213
        "nics": _NICListToTuple(self, instance.nics),
8214
        "disk_template": instance.disk_template,
8215
        "disks": disks,
8216
        "hypervisor": instance.hypervisor,
8217
        "network_port": instance.network_port,
8218
        "hv_instance": instance.hvparams,
8219
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8220
        "be_instance": instance.beparams,
8221
        "be_actual": cluster.FillBE(instance),
8222
        "serial_no": instance.serial_no,
8223
        "mtime": instance.mtime,
8224
        "ctime": instance.ctime,
8225
        "uuid": instance.uuid,
8226
        }
8227

    
8228
      result[instance.name] = idict
8229

    
8230
    return result
8231

    
8232

    
8233
class LUSetInstanceParams(LogicalUnit):
8234
  """Modifies an instances's parameters.
8235

8236
  """
8237
  HPATH = "instance-modify"
8238
  HTYPE = constants.HTYPE_INSTANCE
8239
  _OP_REQP = ["instance_name"]
8240
  REQ_BGL = False
8241

    
8242
  def CheckArguments(self):
8243
    if not hasattr(self.op, 'nics'):
8244
      self.op.nics = []
8245
    if not hasattr(self.op, 'disks'):
8246
      self.op.disks = []
8247
    if not hasattr(self.op, 'beparams'):
8248
      self.op.beparams = {}
8249
    if not hasattr(self.op, 'hvparams'):
8250
      self.op.hvparams = {}
8251
    if not hasattr(self.op, "disk_template"):
8252
      self.op.disk_template = None
8253
    if not hasattr(self.op, "remote_node"):
8254
      self.op.remote_node = None
8255
    if not hasattr(self.op, "os_name"):
8256
      self.op.os_name = None
8257
    if not hasattr(self.op, "force_variant"):
8258
      self.op.force_variant = False
8259
    self.op.force = getattr(self.op, "force", False)
8260
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8261
            self.op.hvparams or self.op.beparams or self.op.os_name):
8262
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8263

    
8264
    if self.op.hvparams:
8265
      _CheckGlobalHvParams(self.op.hvparams)
8266

    
8267
    # Disk validation
8268
    disk_addremove = 0
8269
    for disk_op, disk_dict in self.op.disks:
8270
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8271
      if disk_op == constants.DDM_REMOVE:
8272
        disk_addremove += 1
8273
        continue
8274
      elif disk_op == constants.DDM_ADD:
8275
        disk_addremove += 1
8276
      else:
8277
        if not isinstance(disk_op, int):
8278
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8279
        if not isinstance(disk_dict, dict):
8280
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8281
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8282

    
8283
      if disk_op == constants.DDM_ADD:
8284
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8285
        if mode not in constants.DISK_ACCESS_SET:
8286
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8287
                                     errors.ECODE_INVAL)
8288
        size = disk_dict.get('size', None)
8289
        if size is None:
8290
          raise errors.OpPrereqError("Required disk parameter size missing",
8291
                                     errors.ECODE_INVAL)
8292
        try:
8293
          size = int(size)
8294
        except (TypeError, ValueError), err:
8295
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8296
                                     str(err), errors.ECODE_INVAL)
8297
        disk_dict['size'] = size
8298
      else:
8299
        # modification of disk
8300
        if 'size' in disk_dict:
8301
          raise errors.OpPrereqError("Disk size change not possible, use"
8302
                                     " grow-disk", errors.ECODE_INVAL)
8303

    
8304
    if disk_addremove > 1:
8305
      raise errors.OpPrereqError("Only one disk add or remove operation"
8306
                                 " supported at a time", errors.ECODE_INVAL)
8307

    
8308
    if self.op.disks and self.op.disk_template is not None:
8309
      raise errors.OpPrereqError("Disk template conversion and other disk"
8310
                                 " changes not supported at the same time",
8311
                                 errors.ECODE_INVAL)
8312

    
8313
    if self.op.disk_template:
8314
      _CheckDiskTemplate(self.op.disk_template)
8315
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8316
          self.op.remote_node is None):
8317
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8318
                                   " one requires specifying a secondary node",
8319
                                   errors.ECODE_INVAL)
8320

    
8321
    # NIC validation
8322
    nic_addremove = 0
8323
    for nic_op, nic_dict in self.op.nics:
8324
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8325
      if nic_op == constants.DDM_REMOVE:
8326
        nic_addremove += 1
8327
        continue
8328
      elif nic_op == constants.DDM_ADD:
8329
        nic_addremove += 1
8330
      else:
8331
        if not isinstance(nic_op, int):
8332
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8333
        if not isinstance(nic_dict, dict):
8334
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8335
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8336

    
8337
      # nic_dict should be a dict
8338
      nic_ip = nic_dict.get('ip', None)
8339
      if nic_ip is not None:
8340
        if nic_ip.lower() == constants.VALUE_NONE:
8341
          nic_dict['ip'] = None
8342
        else:
8343
          if not utils.IsValidIP(nic_ip):
8344
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8345
                                       errors.ECODE_INVAL)
8346

    
8347
      nic_bridge = nic_dict.get('bridge', None)
8348
      nic_link = nic_dict.get('link', None)
8349
      if nic_bridge and nic_link:
8350
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8351
                                   " at the same time", errors.ECODE_INVAL)
8352
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8353
        nic_dict['bridge'] = None
8354
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8355
        nic_dict['link'] = None
8356

    
8357
      if nic_op == constants.DDM_ADD:
8358
        nic_mac = nic_dict.get('mac', None)
8359
        if nic_mac is None:
8360
          nic_dict['mac'] = constants.VALUE_AUTO
8361

    
8362
      if 'mac' in nic_dict:
8363
        nic_mac = nic_dict['mac']
8364
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8365
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8366

    
8367
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8368
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8369
                                     " modifying an existing nic",
8370
                                     errors.ECODE_INVAL)
8371

    
8372
    if nic_addremove > 1:
8373
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8374
                                 " supported at a time", errors.ECODE_INVAL)
8375

    
8376
  def ExpandNames(self):
8377
    self._ExpandAndLockInstance()
8378
    self.needed_locks[locking.LEVEL_NODE] = []
8379
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8380

    
8381
  def DeclareLocks(self, level):
8382
    if level == locking.LEVEL_NODE:
8383
      self._LockInstancesNodes()
8384
      if self.op.disk_template and self.op.remote_node:
8385
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8386
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8387

    
8388
  def BuildHooksEnv(self):
8389
    """Build hooks env.
8390

8391
    This runs on the master, primary and secondaries.
8392

8393
    """
8394
    args = dict()
8395
    if constants.BE_MEMORY in self.be_new:
8396
      args['memory'] = self.be_new[constants.BE_MEMORY]
8397
    if constants.BE_VCPUS in self.be_new:
8398
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8399
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8400
    # information at all.
8401
    if self.op.nics:
8402
      args['nics'] = []
8403
      nic_override = dict(self.op.nics)
8404
      for idx, nic in enumerate(self.instance.nics):
8405
        if idx in nic_override:
8406
          this_nic_override = nic_override[idx]
8407
        else:
8408
          this_nic_override = {}
8409
        if 'ip' in this_nic_override:
8410
          ip = this_nic_override['ip']
8411
        else:
8412
          ip = nic.ip
8413
        if 'mac' in this_nic_override:
8414
          mac = this_nic_override['mac']
8415
        else:
8416
          mac = nic.mac
8417
        if idx in self.nic_pnew:
8418
          nicparams = self.nic_pnew[idx]
8419
        else:
8420
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8421
        mode = nicparams[constants.NIC_MODE]
8422
        link = nicparams[constants.NIC_LINK]
8423
        args['nics'].append((ip, mac, mode, link))
8424
      if constants.DDM_ADD in nic_override:
8425
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8426
        mac = nic_override[constants.DDM_ADD]['mac']
8427
        nicparams = self.nic_pnew[constants.DDM_ADD]
8428
        mode = nicparams[constants.NIC_MODE]
8429
        link = nicparams[constants.NIC_LINK]
8430
        args['nics'].append((ip, mac, mode, link))
8431
      elif constants.DDM_REMOVE in nic_override:
8432
        del args['nics'][-1]
8433

    
8434
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8435
    if self.op.disk_template:
8436
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8437
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8438
    return env, nl, nl
8439

    
8440
  def CheckPrereq(self):
8441
    """Check prerequisites.
8442

8443
    This only checks the instance list against the existing names.
8444

8445
    """
8446
    self.force = self.op.force
8447

    
8448
    # checking the new params on the primary/secondary nodes
8449

    
8450
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8451
    cluster = self.cluster = self.cfg.GetClusterInfo()
8452
    assert self.instance is not None, \
8453
      "Cannot retrieve locked instance %s" % self.op.instance_name
8454
    pnode = instance.primary_node
8455
    nodelist = list(instance.all_nodes)
8456

    
8457
    if self.op.disk_template:
8458
      if instance.disk_template == self.op.disk_template:
8459
        raise errors.OpPrereqError("Instance already has disk template %s" %
8460
                                   instance.disk_template, errors.ECODE_INVAL)
8461

    
8462
      if (instance.disk_template,
8463
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8464
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8465
                                   " %s to %s" % (instance.disk_template,
8466
                                                  self.op.disk_template),
8467
                                   errors.ECODE_INVAL)
8468
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8469
        _CheckNodeOnline(self, self.op.remote_node)
8470
        _CheckNodeNotDrained(self, self.op.remote_node)
8471
        disks = [{"size": d.size} for d in instance.disks]
8472
        required = _ComputeDiskSize(self.op.disk_template, disks)
8473
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8474
        _CheckInstanceDown(self, instance, "cannot change disk template")
8475

    
8476
    # hvparams processing
8477
    if self.op.hvparams:
8478
      hv_type = instance.hypervisor
8479
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8480
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8481
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8482

    
8483
      # local check
8484
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8485
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8486
      self.hv_new = hv_new # the new actual values
8487
      self.hv_inst = i_hvdict # the new dict (without defaults)
8488
    else:
8489
      self.hv_new = self.hv_inst = {}
8490

    
8491
    # beparams processing
8492
    if self.op.beparams:
8493
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams)
8494
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8495
      be_new = cluster.SimpleFillBE(i_bedict)
8496
      self.be_new = be_new # the new actual values
8497
      self.be_inst = i_bedict # the new dict (without defaults)
8498
    else:
8499
      self.be_new = self.be_inst = {}
8500

    
8501
    self.warn = []
8502

    
8503
    if constants.BE_MEMORY in self.op.beparams and not self.force:
8504
      mem_check_list = [pnode]
8505
      if be_new[constants.BE_AUTO_BALANCE]:
8506
        # either we changed auto_balance to yes or it was from before
8507
        mem_check_list.extend(instance.secondary_nodes)
8508
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8509
                                                  instance.hypervisor)
8510
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8511
                                         instance.hypervisor)
8512
      pninfo = nodeinfo[pnode]
8513
      msg = pninfo.fail_msg
8514
      if msg:
8515
        # Assume the primary node is unreachable and go ahead
8516
        self.warn.append("Can't get info from primary node %s: %s" %
8517
                         (pnode,  msg))
8518
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8519
        self.warn.append("Node data from primary node %s doesn't contain"
8520
                         " free memory information" % pnode)
8521
      elif instance_info.fail_msg:
8522
        self.warn.append("Can't get instance runtime information: %s" %
8523
                        instance_info.fail_msg)
8524
      else:
8525
        if instance_info.payload:
8526
          current_mem = int(instance_info.payload['memory'])
8527
        else:
8528
          # Assume instance not running
8529
          # (there is a slight race condition here, but it's not very probable,
8530
          # and we have no other way to check)
8531
          current_mem = 0
8532
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8533
                    pninfo.payload['memory_free'])
8534
        if miss_mem > 0:
8535
          raise errors.OpPrereqError("This change will prevent the instance"
8536
                                     " from starting, due to %d MB of memory"
8537
                                     " missing on its primary node" % miss_mem,
8538
                                     errors.ECODE_NORES)
8539

    
8540
      if be_new[constants.BE_AUTO_BALANCE]:
8541
        for node, nres in nodeinfo.items():
8542
          if node not in instance.secondary_nodes:
8543
            continue
8544
          msg = nres.fail_msg
8545
          if msg:
8546
            self.warn.append("Can't get info from secondary node %s: %s" %
8547
                             (node, msg))
8548
          elif not isinstance(nres.payload.get('memory_free', None), int):
8549
            self.warn.append("Secondary node %s didn't return free"
8550
                             " memory information" % node)
8551
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8552
            self.warn.append("Not enough memory to failover instance to"
8553
                             " secondary node %s" % node)
8554

    
8555
    # NIC processing
8556
    self.nic_pnew = {}
8557
    self.nic_pinst = {}
8558
    for nic_op, nic_dict in self.op.nics:
8559
      if nic_op == constants.DDM_REMOVE:
8560
        if not instance.nics:
8561
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8562
                                     errors.ECODE_INVAL)
8563
        continue
8564
      if nic_op != constants.DDM_ADD:
8565
        # an existing nic
8566
        if not instance.nics:
8567
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8568
                                     " no NICs" % nic_op,
8569
                                     errors.ECODE_INVAL)
8570
        if nic_op < 0 or nic_op >= len(instance.nics):
8571
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8572
                                     " are 0 to %d" %
8573
                                     (nic_op, len(instance.nics) - 1),
8574
                                     errors.ECODE_INVAL)
8575
        old_nic_params = instance.nics[nic_op].nicparams
8576
        old_nic_ip = instance.nics[nic_op].ip
8577
      else:
8578
        old_nic_params = {}
8579
        old_nic_ip = None
8580

    
8581
      update_params_dict = dict([(key, nic_dict[key])
8582
                                 for key in constants.NICS_PARAMETERS
8583
                                 if key in nic_dict])
8584

    
8585
      if 'bridge' in nic_dict:
8586
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8587

    
8588
      new_nic_params = _GetUpdatedParams(old_nic_params,
8589
                                         update_params_dict)
8590
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8591
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8592
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8593
      self.nic_pinst[nic_op] = new_nic_params
8594
      self.nic_pnew[nic_op] = new_filled_nic_params
8595
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8596

    
8597
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8598
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8599
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8600
        if msg:
8601
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8602
          if self.force:
8603
            self.warn.append(msg)
8604
          else:
8605
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8606
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8607
        if 'ip' in nic_dict:
8608
          nic_ip = nic_dict['ip']
8609
        else:
8610
          nic_ip = old_nic_ip
8611
        if nic_ip is None:
8612
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8613
                                     ' on a routed nic', errors.ECODE_INVAL)
8614
      if 'mac' in nic_dict:
8615
        nic_mac = nic_dict['mac']
8616
        if nic_mac is None:
8617
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8618
                                     errors.ECODE_INVAL)
8619
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8620
          # otherwise generate the mac
8621
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8622
        else:
8623
          # or validate/reserve the current one
8624
          try:
8625
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8626
          except errors.ReservationError:
8627
            raise errors.OpPrereqError("MAC address %s already in use"
8628
                                       " in cluster" % nic_mac,
8629
                                       errors.ECODE_NOTUNIQUE)
8630

    
8631
    # DISK processing
8632
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8633
      raise errors.OpPrereqError("Disk operations not supported for"
8634
                                 " diskless instances",
8635
                                 errors.ECODE_INVAL)
8636
    for disk_op, _ in self.op.disks:
8637
      if disk_op == constants.DDM_REMOVE:
8638
        if len(instance.disks) == 1:
8639
          raise errors.OpPrereqError("Cannot remove the last disk of"
8640
                                     " an instance", errors.ECODE_INVAL)
8641
        _CheckInstanceDown(self, instance, "cannot remove disks")
8642

    
8643
      if (disk_op == constants.DDM_ADD and
8644
          len(instance.nics) >= constants.MAX_DISKS):
8645
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8646
                                   " add more" % constants.MAX_DISKS,
8647
                                   errors.ECODE_STATE)
8648
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8649
        # an existing disk
8650
        if disk_op < 0 or disk_op >= len(instance.disks):
8651
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8652
                                     " are 0 to %d" %
8653
                                     (disk_op, len(instance.disks)),
8654
                                     errors.ECODE_INVAL)
8655

    
8656
    # OS change
8657
    if self.op.os_name and not self.op.force:
8658
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8659
                      self.op.force_variant)
8660

    
8661
    return
8662

    
8663
  def _ConvertPlainToDrbd(self, feedback_fn):
8664
    """Converts an instance from plain to drbd.
8665

8666
    """
8667
    feedback_fn("Converting template to drbd")
8668
    instance = self.instance
8669
    pnode = instance.primary_node
8670
    snode = self.op.remote_node
8671

    
8672
    # create a fake disk info for _GenerateDiskTemplate
8673
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8674
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8675
                                      instance.name, pnode, [snode],
8676
                                      disk_info, None, None, 0)
8677
    info = _GetInstanceInfoText(instance)
8678
    feedback_fn("Creating aditional volumes...")
8679
    # first, create the missing data and meta devices
8680
    for disk in new_disks:
8681
      # unfortunately this is... not too nice
8682
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8683
                            info, True)
8684
      for child in disk.children:
8685
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
8686
    # at this stage, all new LVs have been created, we can rename the
8687
    # old ones
8688
    feedback_fn("Renaming original volumes...")
8689
    rename_list = [(o, n.children[0].logical_id)
8690
                   for (o, n) in zip(instance.disks, new_disks)]
8691
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
8692
    result.Raise("Failed to rename original LVs")
8693

    
8694
    feedback_fn("Initializing DRBD devices...")
8695
    # all child devices are in place, we can now create the DRBD devices
8696
    for disk in new_disks:
8697
      for node in [pnode, snode]:
8698
        f_create = node == pnode
8699
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8700

    
8701
    # at this point, the instance has been modified
8702
    instance.disk_template = constants.DT_DRBD8
8703
    instance.disks = new_disks
8704
    self.cfg.Update(instance, feedback_fn)
8705

    
8706
    # disks are created, waiting for sync
8707
    disk_abort = not _WaitForSync(self, instance)
8708
    if disk_abort:
8709
      raise errors.OpExecError("There are some degraded disks for"
8710
                               " this instance, please cleanup manually")
8711

    
8712
  def _ConvertDrbdToPlain(self, feedback_fn):
8713
    """Converts an instance from drbd to plain.
8714

8715
    """
8716
    instance = self.instance
8717
    assert len(instance.secondary_nodes) == 1
8718
    pnode = instance.primary_node
8719
    snode = instance.secondary_nodes[0]
8720
    feedback_fn("Converting template to plain")
8721

    
8722
    old_disks = instance.disks
8723
    new_disks = [d.children[0] for d in old_disks]
8724

    
8725
    # copy over size and mode
8726
    for parent, child in zip(old_disks, new_disks):
8727
      child.size = parent.size
8728
      child.mode = parent.mode
8729

    
8730
    # update instance structure
8731
    instance.disks = new_disks
8732
    instance.disk_template = constants.DT_PLAIN
8733
    self.cfg.Update(instance, feedback_fn)
8734

    
8735
    feedback_fn("Removing volumes on the secondary node...")
8736
    for disk in old_disks:
8737
      self.cfg.SetDiskID(disk, snode)
8738
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8739
      if msg:
8740
        self.LogWarning("Could not remove block device %s on node %s,"
8741
                        " continuing anyway: %s", disk.iv_name, snode, msg)
8742

    
8743
    feedback_fn("Removing unneeded volumes on the primary node...")
8744
    for idx, disk in enumerate(old_disks):
8745
      meta = disk.children[1]
8746
      self.cfg.SetDiskID(meta, pnode)
8747
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8748
      if msg:
8749
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
8750
                        " continuing anyway: %s", idx, pnode, msg)
8751

    
8752

    
8753
  def Exec(self, feedback_fn):
8754
    """Modifies an instance.
8755

8756
    All parameters take effect only at the next restart of the instance.
8757

8758
    """
8759
    # Process here the warnings from CheckPrereq, as we don't have a
8760
    # feedback_fn there.
8761
    for warn in self.warn:
8762
      feedback_fn("WARNING: %s" % warn)
8763

    
8764
    result = []
8765
    instance = self.instance
8766
    # disk changes
8767
    for disk_op, disk_dict in self.op.disks:
8768
      if disk_op == constants.DDM_REMOVE:
8769
        # remove the last disk
8770
        device = instance.disks.pop()
8771
        device_idx = len(instance.disks)
8772
        for node, disk in device.ComputeNodeTree(instance.primary_node):
8773
          self.cfg.SetDiskID(disk, node)
8774
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8775
          if msg:
8776
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
8777
                            " continuing anyway", device_idx, node, msg)
8778
        result.append(("disk/%d" % device_idx, "remove"))
8779
      elif disk_op == constants.DDM_ADD:
8780
        # add a new disk
8781
        if instance.disk_template == constants.DT_FILE:
8782
          file_driver, file_path = instance.disks[0].logical_id
8783
          file_path = os.path.dirname(file_path)
8784
        else:
8785
          file_driver = file_path = None
8786
        disk_idx_base = len(instance.disks)
8787
        new_disk = _GenerateDiskTemplate(self,
8788
                                         instance.disk_template,
8789
                                         instance.name, instance.primary_node,
8790
                                         instance.secondary_nodes,
8791
                                         [disk_dict],
8792
                                         file_path,
8793
                                         file_driver,
8794
                                         disk_idx_base)[0]
8795
        instance.disks.append(new_disk)
8796
        info = _GetInstanceInfoText(instance)
8797

    
8798
        logging.info("Creating volume %s for instance %s",
8799
                     new_disk.iv_name, instance.name)
8800
        # Note: this needs to be kept in sync with _CreateDisks
8801
        #HARDCODE
8802
        for node in instance.all_nodes:
8803
          f_create = node == instance.primary_node
8804
          try:
8805
            _CreateBlockDev(self, node, instance, new_disk,
8806
                            f_create, info, f_create)
8807
          except errors.OpExecError, err:
8808
            self.LogWarning("Failed to create volume %s (%s) on"
8809
                            " node %s: %s",
8810
                            new_disk.iv_name, new_disk, node, err)
8811
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8812
                       (new_disk.size, new_disk.mode)))
8813
      else:
8814
        # change a given disk
8815
        instance.disks[disk_op].mode = disk_dict['mode']
8816
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8817

    
8818
    if self.op.disk_template:
8819
      r_shut = _ShutdownInstanceDisks(self, instance)
8820
      if not r_shut:
8821
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8822
                                 " proceed with disk template conversion")
8823
      mode = (instance.disk_template, self.op.disk_template)
8824
      try:
8825
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
8826
      except:
8827
        self.cfg.ReleaseDRBDMinors(instance.name)
8828
        raise
8829
      result.append(("disk_template", self.op.disk_template))
8830

    
8831
    # NIC changes
8832
    for nic_op, nic_dict in self.op.nics:
8833
      if nic_op == constants.DDM_REMOVE:
8834
        # remove the last nic
8835
        del instance.nics[-1]
8836
        result.append(("nic.%d" % len(instance.nics), "remove"))
8837
      elif nic_op == constants.DDM_ADD:
8838
        # mac and bridge should be set, by now
8839
        mac = nic_dict['mac']
8840
        ip = nic_dict.get('ip', None)
8841
        nicparams = self.nic_pinst[constants.DDM_ADD]
8842
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8843
        instance.nics.append(new_nic)
8844
        result.append(("nic.%d" % (len(instance.nics) - 1),
8845
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
8846
                       (new_nic.mac, new_nic.ip,
8847
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8848
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8849
                       )))
8850
      else:
8851
        for key in 'mac', 'ip':
8852
          if key in nic_dict:
8853
            setattr(instance.nics[nic_op], key, nic_dict[key])
8854
        if nic_op in self.nic_pinst:
8855
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8856
        for key, val in nic_dict.iteritems():
8857
          result.append(("nic.%s/%d" % (key, nic_op), val))
8858

    
8859
    # hvparams changes
8860
    if self.op.hvparams:
8861
      instance.hvparams = self.hv_inst
8862
      for key, val in self.op.hvparams.iteritems():
8863
        result.append(("hv/%s" % key, val))
8864

    
8865
    # beparams changes
8866
    if self.op.beparams:
8867
      instance.beparams = self.be_inst
8868
      for key, val in self.op.beparams.iteritems():
8869
        result.append(("be/%s" % key, val))
8870

    
8871
    # OS change
8872
    if self.op.os_name:
8873
      instance.os = self.op.os_name
8874

    
8875
    self.cfg.Update(instance, feedback_fn)
8876

    
8877
    return result
8878

    
8879
  _DISK_CONVERSIONS = {
8880
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8881
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8882
    }
8883

    
8884

    
8885
class LUQueryExports(NoHooksLU):
8886
  """Query the exports list
8887

8888
  """
8889
  _OP_REQP = ['nodes']
8890
  REQ_BGL = False
8891

    
8892
  def ExpandNames(self):
8893
    self.needed_locks = {}
8894
    self.share_locks[locking.LEVEL_NODE] = 1
8895
    if not self.op.nodes:
8896
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8897
    else:
8898
      self.needed_locks[locking.LEVEL_NODE] = \
8899
        _GetWantedNodes(self, self.op.nodes)
8900

    
8901
  def CheckPrereq(self):
8902
    """Check prerequisites.
8903

8904
    """
8905
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8906

    
8907
  def Exec(self, feedback_fn):
8908
    """Compute the list of all the exported system images.
8909

8910
    @rtype: dict
8911
    @return: a dictionary with the structure node->(export-list)
8912
        where export-list is a list of the instances exported on
8913
        that node.
8914

8915
    """
8916
    rpcresult = self.rpc.call_export_list(self.nodes)
8917
    result = {}
8918
    for node in rpcresult:
8919
      if rpcresult[node].fail_msg:
8920
        result[node] = False
8921
      else:
8922
        result[node] = rpcresult[node].payload
8923

    
8924
    return result
8925

    
8926

    
8927
class LUPrepareExport(NoHooksLU):
8928
  """Prepares an instance for an export and returns useful information.
8929

8930
  """
8931
  _OP_REQP = ["instance_name", "mode"]
8932
  REQ_BGL = False
8933

    
8934
  def CheckArguments(self):
8935
    """Check the arguments.
8936

8937
    """
8938
    if self.op.mode not in constants.EXPORT_MODES:
8939
      raise errors.OpPrereqError("Invalid export mode %r" % self.op.mode,
8940
                                 errors.ECODE_INVAL)
8941

    
8942
  def ExpandNames(self):
8943
    self._ExpandAndLockInstance()
8944

    
8945
  def CheckPrereq(self):
8946
    """Check prerequisites.
8947

8948
    """
8949
    instance_name = self.op.instance_name
8950

    
8951
    self.instance = self.cfg.GetInstanceInfo(instance_name)
8952
    assert self.instance is not None, \
8953
          "Cannot retrieve locked instance %s" % self.op.instance_name
8954
    _CheckNodeOnline(self, self.instance.primary_node)
8955

    
8956
    self._cds = _GetClusterDomainSecret()
8957

    
8958
  def Exec(self, feedback_fn):
8959
    """Prepares an instance for an export.
8960

8961
    """
8962
    instance = self.instance
8963

    
8964
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
8965
      salt = utils.GenerateSecret(8)
8966

    
8967
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
8968
      result = self.rpc.call_x509_cert_create(instance.primary_node,
8969
                                              constants.RIE_CERT_VALIDITY)
8970
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
8971

    
8972
      (name, cert_pem) = result.payload
8973

    
8974
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
8975
                                             cert_pem)
8976

    
8977
      return {
8978
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
8979
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
8980
                          salt),
8981
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
8982
        }
8983

    
8984
    return None
8985

    
8986

    
8987
class LUExportInstance(LogicalUnit):
8988
  """Export an instance to an image in the cluster.
8989

8990
  """
8991
  HPATH = "instance-export"
8992
  HTYPE = constants.HTYPE_INSTANCE
8993
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
8994
  REQ_BGL = False
8995

    
8996
  def CheckArguments(self):
8997
    """Check the arguments.
8998

8999
    """
9000
    _CheckBooleanOpField(self.op, "remove_instance")
9001
    _CheckBooleanOpField(self.op, "ignore_remove_failures")
9002

    
9003
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
9004
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
9005
    self.remove_instance = getattr(self.op, "remove_instance", False)
9006
    self.ignore_remove_failures = getattr(self.op, "ignore_remove_failures",
9007
                                          False)
9008
    self.export_mode = getattr(self.op, "mode", constants.EXPORT_MODE_LOCAL)
9009
    self.x509_key_name = getattr(self.op, "x509_key_name", None)
9010
    self.dest_x509_ca_pem = getattr(self.op, "destination_x509_ca", None)
9011

    
9012
    if self.remove_instance and not self.op.shutdown:
9013
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9014
                                 " down before")
9015

    
9016
    if self.export_mode not in constants.EXPORT_MODES:
9017
      raise errors.OpPrereqError("Invalid export mode %r" % self.export_mode,
9018
                                 errors.ECODE_INVAL)
9019

    
9020
    if self.export_mode == constants.EXPORT_MODE_REMOTE:
9021
      if not self.x509_key_name:
9022
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9023
                                   errors.ECODE_INVAL)
9024

    
9025
      if not self.dest_x509_ca_pem:
9026
        raise errors.OpPrereqError("Missing destination X509 CA",
9027
                                   errors.ECODE_INVAL)
9028

    
9029
  def ExpandNames(self):
9030
    self._ExpandAndLockInstance()
9031

    
9032
    # Lock all nodes for local exports
9033
    if self.export_mode == constants.EXPORT_MODE_LOCAL:
9034
      # FIXME: lock only instance primary and destination node
9035
      #
9036
      # Sad but true, for now we have do lock all nodes, as we don't know where
9037
      # the previous export might be, and in this LU we search for it and
9038
      # remove it from its current node. In the future we could fix this by:
9039
      #  - making a tasklet to search (share-lock all), then create the new one,
9040
      #    then one to remove, after
9041
      #  - removing the removal operation altogether
9042
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9043

    
9044
  def DeclareLocks(self, level):
9045
    """Last minute lock declaration."""
9046
    # All nodes are locked anyway, so nothing to do here.
9047

    
9048
  def BuildHooksEnv(self):
9049
    """Build hooks env.
9050

9051
    This will run on the master, primary node and target node.
9052

9053
    """
9054
    env = {
9055
      "EXPORT_MODE": self.export_mode,
9056
      "EXPORT_NODE": self.op.target_node,
9057
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9058
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
9059
      # TODO: Generic function for boolean env variables
9060
      "REMOVE_INSTANCE": str(bool(self.remove_instance)),
9061
      }
9062

    
9063
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9064

    
9065
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9066

    
9067
    if self.export_mode == constants.EXPORT_MODE_LOCAL:
9068
      nl.append(self.op.target_node)
9069

    
9070
    return env, nl, nl
9071

    
9072
  def CheckPrereq(self):
9073
    """Check prerequisites.
9074

9075
    This checks that the instance and node names are valid.
9076

9077
    """
9078
    instance_name = self.op.instance_name
9079

    
9080
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9081
    assert self.instance is not None, \
9082
          "Cannot retrieve locked instance %s" % self.op.instance_name
9083
    _CheckNodeOnline(self, self.instance.primary_node)
9084

    
9085
    if self.export_mode == constants.EXPORT_MODE_LOCAL:
9086
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9087
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9088
      assert self.dst_node is not None
9089

    
9090
      _CheckNodeOnline(self, self.dst_node.name)
9091
      _CheckNodeNotDrained(self, self.dst_node.name)
9092

    
9093
      self._cds = None
9094
      self.dest_disk_info = None
9095
      self.dest_x509_ca = None
9096

    
9097
    elif self.export_mode == constants.EXPORT_MODE_REMOTE:
9098
      self.dst_node = None
9099

    
9100
      if len(self.op.target_node) != len(self.instance.disks):
9101
        raise errors.OpPrereqError(("Received destination information for %s"
9102
                                    " disks, but instance %s has %s disks") %
9103
                                   (len(self.op.target_node), instance_name,
9104
                                    len(self.instance.disks)),
9105
                                   errors.ECODE_INVAL)
9106

    
9107
      cds = _GetClusterDomainSecret()
9108

    
9109
      # Check X509 key name
9110
      try:
9111
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9112
      except (TypeError, ValueError), err:
9113
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9114

    
9115
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9116
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9117
                                   errors.ECODE_INVAL)
9118

    
9119
      # Load and verify CA
9120
      try:
9121
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9122
      except OpenSSL.crypto.Error, err:
9123
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9124
                                   (err, ), errors.ECODE_INVAL)
9125

    
9126
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9127
      if errcode is not None:
9128
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" % (msg, ),
9129
                                   errors.ECODE_INVAL)
9130

    
9131
      self.dest_x509_ca = cert
9132

    
9133
      # Verify target information
9134
      disk_info = []
9135
      for idx, disk_data in enumerate(self.op.target_node):
9136
        try:
9137
          (host, port, magic) = \
9138
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9139
        except errors.GenericError, err:
9140
          raise errors.OpPrereqError("Target info for disk %s: %s" % (idx, err),
9141
                                     errors.ECODE_INVAL)
9142

    
9143
        disk_info.append((host, port, magic))
9144

    
9145
      assert len(disk_info) == len(self.op.target_node)
9146
      self.dest_disk_info = disk_info
9147

    
9148
    else:
9149
      raise errors.ProgrammerError("Unhandled export mode %r" %
9150
                                   self.export_mode)
9151

    
9152
    # instance disk type verification
9153
    # TODO: Implement export support for file-based disks
9154
    for disk in self.instance.disks:
9155
      if disk.dev_type == constants.LD_FILE:
9156
        raise errors.OpPrereqError("Export not supported for instances with"
9157
                                   " file-based disks", errors.ECODE_INVAL)
9158

    
9159
  def _CleanupExports(self, feedback_fn):
9160
    """Removes exports of current instance from all other nodes.
9161

9162
    If an instance in a cluster with nodes A..D was exported to node C, its
9163
    exports will be removed from the nodes A, B and D.
9164

9165
    """
9166
    assert self.export_mode != constants.EXPORT_MODE_REMOTE
9167

    
9168
    nodelist = self.cfg.GetNodeList()
9169
    nodelist.remove(self.dst_node.name)
9170

    
9171
    # on one-node clusters nodelist will be empty after the removal
9172
    # if we proceed the backup would be removed because OpQueryExports
9173
    # substitutes an empty list with the full cluster node list.
9174
    iname = self.instance.name
9175
    if nodelist:
9176
      feedback_fn("Removing old exports for instance %s" % iname)
9177
      exportlist = self.rpc.call_export_list(nodelist)
9178
      for node in exportlist:
9179
        if exportlist[node].fail_msg:
9180
          continue
9181
        if iname in exportlist[node].payload:
9182
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9183
          if msg:
9184
            self.LogWarning("Could not remove older export for instance %s"
9185
                            " on node %s: %s", iname, node, msg)
9186

    
9187
  def Exec(self, feedback_fn):
9188
    """Export an instance to an image in the cluster.
9189

9190
    """
9191
    assert self.export_mode in constants.EXPORT_MODES
9192

    
9193
    instance = self.instance
9194
    src_node = instance.primary_node
9195

    
9196
    if self.op.shutdown:
9197
      # shutdown the instance, but not the disks
9198
      feedback_fn("Shutting down instance %s" % instance.name)
9199
      result = self.rpc.call_instance_shutdown(src_node, instance,
9200
                                               self.shutdown_timeout)
9201
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9202
      result.Raise("Could not shutdown instance %s on"
9203
                   " node %s" % (instance.name, src_node))
9204

    
9205
    # set the disks ID correctly since call_instance_start needs the
9206
    # correct drbd minor to create the symlinks
9207
    for disk in instance.disks:
9208
      self.cfg.SetDiskID(disk, src_node)
9209

    
9210
    activate_disks = (not instance.admin_up)
9211

    
9212
    if activate_disks:
9213
      # Activate the instance disks if we'exporting a stopped instance
9214
      feedback_fn("Activating disks for %s" % instance.name)
9215
      _StartInstanceDisks(self, instance, None)
9216

    
9217
    try:
9218
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9219
                                                     instance)
9220

    
9221
      helper.CreateSnapshots()
9222
      try:
9223
        if (self.op.shutdown and instance.admin_up and
9224
            not self.remove_instance):
9225
          assert not activate_disks
9226
          feedback_fn("Starting instance %s" % instance.name)
9227
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9228
          msg = result.fail_msg
9229
          if msg:
9230
            feedback_fn("Failed to start instance: %s" % msg)
9231
            _ShutdownInstanceDisks(self, instance)
9232
            raise errors.OpExecError("Could not start instance: %s" % msg)
9233

    
9234
        if self.export_mode == constants.EXPORT_MODE_LOCAL:
9235
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9236
        elif self.export_mode == constants.EXPORT_MODE_REMOTE:
9237
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9238
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9239

    
9240
          (key_name, _, _) = self.x509_key_name
9241

    
9242
          dest_ca_pem = \
9243
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9244
                                            self.dest_x509_ca)
9245

    
9246
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9247
                                                     key_name, dest_ca_pem,
9248
                                                     timeouts)
9249
      finally:
9250
        helper.Cleanup()
9251

    
9252
      # Check for backwards compatibility
9253
      assert len(dresults) == len(instance.disks)
9254
      assert compat.all(isinstance(i, bool) for i in dresults), \
9255
             "Not all results are boolean: %r" % dresults
9256

    
9257
    finally:
9258
      if activate_disks:
9259
        feedback_fn("Deactivating disks for %s" % instance.name)
9260
        _ShutdownInstanceDisks(self, instance)
9261

    
9262
    # Remove instance if requested
9263
    if self.remove_instance:
9264
      if not (compat.all(dresults) and fin_resu):
9265
        feedback_fn("Not removing instance %s as parts of the export failed" %
9266
                    instance.name)
9267
      else:
9268
        feedback_fn("Removing instance %s" % instance.name)
9269
        _RemoveInstance(self, feedback_fn, instance,
9270
                        self.ignore_remove_failures)
9271

    
9272
    if self.export_mode == constants.EXPORT_MODE_LOCAL:
9273
      self._CleanupExports(feedback_fn)
9274

    
9275
    return fin_resu, dresults
9276

    
9277

    
9278
class LURemoveExport(NoHooksLU):
9279
  """Remove exports related to the named instance.
9280

9281
  """
9282
  _OP_REQP = ["instance_name"]
9283
  REQ_BGL = False
9284

    
9285
  def ExpandNames(self):
9286
    self.needed_locks = {}
9287
    # We need all nodes to be locked in order for RemoveExport to work, but we
9288
    # don't need to lock the instance itself, as nothing will happen to it (and
9289
    # we can remove exports also for a removed instance)
9290
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9291

    
9292
  def CheckPrereq(self):
9293
    """Check prerequisites.
9294
    """
9295
    pass
9296

    
9297
  def Exec(self, feedback_fn):
9298
    """Remove any export.
9299

9300
    """
9301
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9302
    # If the instance was not found we'll try with the name that was passed in.
9303
    # This will only work if it was an FQDN, though.
9304
    fqdn_warn = False
9305
    if not instance_name:
9306
      fqdn_warn = True
9307
      instance_name = self.op.instance_name
9308

    
9309
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9310
    exportlist = self.rpc.call_export_list(locked_nodes)
9311
    found = False
9312
    for node in exportlist:
9313
      msg = exportlist[node].fail_msg
9314
      if msg:
9315
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9316
        continue
9317
      if instance_name in exportlist[node].payload:
9318
        found = True
9319
        result = self.rpc.call_export_remove(node, instance_name)
9320
        msg = result.fail_msg
9321
        if msg:
9322
          logging.error("Could not remove export for instance %s"
9323
                        " on node %s: %s", instance_name, node, msg)
9324

    
9325
    if fqdn_warn and not found:
9326
      feedback_fn("Export not found. If trying to remove an export belonging"
9327
                  " to a deleted instance please use its Fully Qualified"
9328
                  " Domain Name.")
9329

    
9330

    
9331
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9332
  """Generic tags LU.
9333

9334
  This is an abstract class which is the parent of all the other tags LUs.
9335

9336
  """
9337

    
9338
  def ExpandNames(self):
9339
    self.needed_locks = {}
9340
    if self.op.kind == constants.TAG_NODE:
9341
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9342
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9343
    elif self.op.kind == constants.TAG_INSTANCE:
9344
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9345
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9346

    
9347
  def CheckPrereq(self):
9348
    """Check prerequisites.
9349

9350
    """
9351
    if self.op.kind == constants.TAG_CLUSTER:
9352
      self.target = self.cfg.GetClusterInfo()
9353
    elif self.op.kind == constants.TAG_NODE:
9354
      self.target = self.cfg.GetNodeInfo(self.op.name)
9355
    elif self.op.kind == constants.TAG_INSTANCE:
9356
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9357
    else:
9358
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9359
                                 str(self.op.kind), errors.ECODE_INVAL)
9360

    
9361

    
9362
class LUGetTags(TagsLU):
9363
  """Returns the tags of a given object.
9364

9365
  """
9366
  _OP_REQP = ["kind", "name"]
9367
  REQ_BGL = False
9368

    
9369
  def Exec(self, feedback_fn):
9370
    """Returns the tag list.
9371

9372
    """
9373
    return list(self.target.GetTags())
9374

    
9375

    
9376
class LUSearchTags(NoHooksLU):
9377
  """Searches the tags for a given pattern.
9378

9379
  """
9380
  _OP_REQP = ["pattern"]
9381
  REQ_BGL = False
9382

    
9383
  def ExpandNames(self):
9384
    self.needed_locks = {}
9385

    
9386
  def CheckPrereq(self):
9387
    """Check prerequisites.
9388

9389
    This checks the pattern passed for validity by compiling it.
9390

9391
    """
9392
    try:
9393
      self.re = re.compile(self.op.pattern)
9394
    except re.error, err:
9395
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9396
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9397

    
9398
  def Exec(self, feedback_fn):
9399
    """Returns the tag list.
9400

9401
    """
9402
    cfg = self.cfg
9403
    tgts = [("/cluster", cfg.GetClusterInfo())]
9404
    ilist = cfg.GetAllInstancesInfo().values()
9405
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9406
    nlist = cfg.GetAllNodesInfo().values()
9407
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9408
    results = []
9409
    for path, target in tgts:
9410
      for tag in target.GetTags():
9411
        if self.re.search(tag):
9412
          results.append((path, tag))
9413
    return results
9414

    
9415

    
9416
class LUAddTags(TagsLU):
9417
  """Sets a tag on a given object.
9418

9419
  """
9420
  _OP_REQP = ["kind", "name", "tags"]
9421
  REQ_BGL = False
9422

    
9423
  def CheckPrereq(self):
9424
    """Check prerequisites.
9425

9426
    This checks the type and length of the tag name and value.
9427

9428
    """
9429
    TagsLU.CheckPrereq(self)
9430
    for tag in self.op.tags:
9431
      objects.TaggableObject.ValidateTag(tag)
9432

    
9433
  def Exec(self, feedback_fn):
9434
    """Sets the tag.
9435

9436
    """
9437
    try:
9438
      for tag in self.op.tags:
9439
        self.target.AddTag(tag)
9440
    except errors.TagError, err:
9441
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9442
    self.cfg.Update(self.target, feedback_fn)
9443

    
9444

    
9445
class LUDelTags(TagsLU):
9446
  """Delete a list of tags from a given object.
9447

9448
  """
9449
  _OP_REQP = ["kind", "name", "tags"]
9450
  REQ_BGL = False
9451

    
9452
  def CheckPrereq(self):
9453
    """Check prerequisites.
9454

9455
    This checks that we have the given tag.
9456

9457
    """
9458
    TagsLU.CheckPrereq(self)
9459
    for tag in self.op.tags:
9460
      objects.TaggableObject.ValidateTag(tag)
9461
    del_tags = frozenset(self.op.tags)
9462
    cur_tags = self.target.GetTags()
9463
    if not del_tags <= cur_tags:
9464
      diff_tags = del_tags - cur_tags
9465
      diff_names = ["'%s'" % tag for tag in diff_tags]
9466
      diff_names.sort()
9467
      raise errors.OpPrereqError("Tag(s) %s not found" %
9468
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9469

    
9470
  def Exec(self, feedback_fn):
9471
    """Remove the tag from the object.
9472

9473
    """
9474
    for tag in self.op.tags:
9475
      self.target.RemoveTag(tag)
9476
    self.cfg.Update(self.target, feedback_fn)
9477

    
9478

    
9479
class LUTestDelay(NoHooksLU):
9480
  """Sleep for a specified amount of time.
9481

9482
  This LU sleeps on the master and/or nodes for a specified amount of
9483
  time.
9484

9485
  """
9486
  _OP_REQP = ["duration", "on_master", "on_nodes"]
9487
  REQ_BGL = False
9488

    
9489
  def CheckArguments(self):
9490
    # TODO: convert to the type system
9491
    self.op.repeat = getattr(self.op, "repeat", 0)
9492
    if self.op.repeat < 0:
9493
      raise errors.OpPrereqError("Repetition count cannot be negative")
9494

    
9495
  def ExpandNames(self):
9496
    """Expand names and set required locks.
9497

9498
    This expands the node list, if any.
9499

9500
    """
9501
    self.needed_locks = {}
9502
    if self.op.on_nodes:
9503
      # _GetWantedNodes can be used here, but is not always appropriate to use
9504
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9505
      # more information.
9506
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9507
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9508

    
9509
  def CheckPrereq(self):
9510
    """Check prerequisites.
9511

9512
    """
9513

    
9514
  def _TestDelay(self):
9515
    """Do the actual sleep.
9516

9517
    """
9518
    if self.op.on_master:
9519
      if not utils.TestDelay(self.op.duration):
9520
        raise errors.OpExecError("Error during master delay test")
9521
    if self.op.on_nodes:
9522
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9523
      for node, node_result in result.items():
9524
        node_result.Raise("Failure during rpc call to node %s" % node)
9525

    
9526
  def Exec(self, feedback_fn):
9527
    """Execute the test delay opcode, with the wanted repetitions.
9528

9529
    """
9530
    if self.op.repeat == 0:
9531
      self._TestDelay()
9532
    else:
9533
      top_value = self.op.repeat - 1
9534
      for i in range(self.op.repeat):
9535
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9536
        self._TestDelay()
9537

    
9538

    
9539
class IAllocator(object):
9540
  """IAllocator framework.
9541

9542
  An IAllocator instance has three sets of attributes:
9543
    - cfg that is needed to query the cluster
9544
    - input data (all members of the _KEYS class attribute are required)
9545
    - four buffer attributes (in|out_data|text), that represent the
9546
      input (to the external script) in text and data structure format,
9547
      and the output from it, again in two formats
9548
    - the result variables from the script (success, info, nodes) for
9549
      easy usage
9550

9551
  """
9552
  # pylint: disable-msg=R0902
9553
  # lots of instance attributes
9554
  _ALLO_KEYS = [
9555
    "name", "mem_size", "disks", "disk_template",
9556
    "os", "tags", "nics", "vcpus", "hypervisor",
9557
    ]
9558
  _RELO_KEYS = [
9559
    "name", "relocate_from",
9560
    ]
9561
  _EVAC_KEYS = [
9562
    "evac_nodes",
9563
    ]
9564

    
9565
  def __init__(self, cfg, rpc, mode, **kwargs):
9566
    self.cfg = cfg
9567
    self.rpc = rpc
9568
    # init buffer variables
9569
    self.in_text = self.out_text = self.in_data = self.out_data = None
9570
    # init all input fields so that pylint is happy
9571
    self.mode = mode
9572
    self.mem_size = self.disks = self.disk_template = None
9573
    self.os = self.tags = self.nics = self.vcpus = None
9574
    self.hypervisor = None
9575
    self.relocate_from = None
9576
    self.name = None
9577
    self.evac_nodes = None
9578
    # computed fields
9579
    self.required_nodes = None
9580
    # init result fields
9581
    self.success = self.info = self.result = None
9582
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9583
      keyset = self._ALLO_KEYS
9584
      fn = self._AddNewInstance
9585
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9586
      keyset = self._RELO_KEYS
9587
      fn = self._AddRelocateInstance
9588
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9589
      keyset = self._EVAC_KEYS
9590
      fn = self._AddEvacuateNodes
9591
    else:
9592
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9593
                                   " IAllocator" % self.mode)
9594
    for key in kwargs:
9595
      if key not in keyset:
9596
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
9597
                                     " IAllocator" % key)
9598
      setattr(self, key, kwargs[key])
9599

    
9600
    for key in keyset:
9601
      if key not in kwargs:
9602
        raise errors.ProgrammerError("Missing input parameter '%s' to"
9603
                                     " IAllocator" % key)
9604
    self._BuildInputData(fn)
9605

    
9606
  def _ComputeClusterData(self):
9607
    """Compute the generic allocator input data.
9608

9609
    This is the data that is independent of the actual operation.
9610

9611
    """
9612
    cfg = self.cfg
9613
    cluster_info = cfg.GetClusterInfo()
9614
    # cluster data
9615
    data = {
9616
      "version": constants.IALLOCATOR_VERSION,
9617
      "cluster_name": cfg.GetClusterName(),
9618
      "cluster_tags": list(cluster_info.GetTags()),
9619
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9620
      # we don't have job IDs
9621
      }
9622
    iinfo = cfg.GetAllInstancesInfo().values()
9623
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9624

    
9625
    # node data
9626
    node_results = {}
9627
    node_list = cfg.GetNodeList()
9628

    
9629
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9630
      hypervisor_name = self.hypervisor
9631
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9632
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9633
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9634
      hypervisor_name = cluster_info.enabled_hypervisors[0]
9635

    
9636
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9637
                                        hypervisor_name)
9638
    node_iinfo = \
9639
      self.rpc.call_all_instances_info(node_list,
9640
                                       cluster_info.enabled_hypervisors)
9641
    for nname, nresult in node_data.items():
9642
      # first fill in static (config-based) values
9643
      ninfo = cfg.GetNodeInfo(nname)
9644
      pnr = {
9645
        "tags": list(ninfo.GetTags()),
9646
        "primary_ip": ninfo.primary_ip,
9647
        "secondary_ip": ninfo.secondary_ip,
9648
        "offline": ninfo.offline,
9649
        "drained": ninfo.drained,
9650
        "master_candidate": ninfo.master_candidate,
9651
        }
9652

    
9653
      if not (ninfo.offline or ninfo.drained):
9654
        nresult.Raise("Can't get data for node %s" % nname)
9655
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9656
                                nname)
9657
        remote_info = nresult.payload
9658

    
9659
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
9660
                     'vg_size', 'vg_free', 'cpu_total']:
9661
          if attr not in remote_info:
9662
            raise errors.OpExecError("Node '%s' didn't return attribute"
9663
                                     " '%s'" % (nname, attr))
9664
          if not isinstance(remote_info[attr], int):
9665
            raise errors.OpExecError("Node '%s' returned invalid value"
9666
                                     " for '%s': %s" %
9667
                                     (nname, attr, remote_info[attr]))
9668
        # compute memory used by primary instances
9669
        i_p_mem = i_p_up_mem = 0
9670
        for iinfo, beinfo in i_list:
9671
          if iinfo.primary_node == nname:
9672
            i_p_mem += beinfo[constants.BE_MEMORY]
9673
            if iinfo.name not in node_iinfo[nname].payload:
9674
              i_used_mem = 0
9675
            else:
9676
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9677
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9678
            remote_info['memory_free'] -= max(0, i_mem_diff)
9679

    
9680
            if iinfo.admin_up:
9681
              i_p_up_mem += beinfo[constants.BE_MEMORY]
9682

    
9683
        # compute memory used by instances
9684
        pnr_dyn = {
9685
          "total_memory": remote_info['memory_total'],
9686
          "reserved_memory": remote_info['memory_dom0'],
9687
          "free_memory": remote_info['memory_free'],
9688
          "total_disk": remote_info['vg_size'],
9689
          "free_disk": remote_info['vg_free'],
9690
          "total_cpus": remote_info['cpu_total'],
9691
          "i_pri_memory": i_p_mem,
9692
          "i_pri_up_memory": i_p_up_mem,
9693
          }
9694
        pnr.update(pnr_dyn)
9695

    
9696
      node_results[nname] = pnr
9697
    data["nodes"] = node_results
9698

    
9699
    # instance data
9700
    instance_data = {}
9701
    for iinfo, beinfo in i_list:
9702
      nic_data = []
9703
      for nic in iinfo.nics:
9704
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9705
        nic_dict = {"mac": nic.mac,
9706
                    "ip": nic.ip,
9707
                    "mode": filled_params[constants.NIC_MODE],
9708
                    "link": filled_params[constants.NIC_LINK],
9709
                   }
9710
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9711
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9712
        nic_data.append(nic_dict)
9713
      pir = {
9714
        "tags": list(iinfo.GetTags()),
9715
        "admin_up": iinfo.admin_up,
9716
        "vcpus": beinfo[constants.BE_VCPUS],
9717
        "memory": beinfo[constants.BE_MEMORY],
9718
        "os": iinfo.os,
9719
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9720
        "nics": nic_data,
9721
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9722
        "disk_template": iinfo.disk_template,
9723
        "hypervisor": iinfo.hypervisor,
9724
        }
9725
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9726
                                                 pir["disks"])
9727
      instance_data[iinfo.name] = pir
9728

    
9729
    data["instances"] = instance_data
9730

    
9731
    self.in_data = data
9732

    
9733
  def _AddNewInstance(self):
9734
    """Add new instance data to allocator structure.
9735

9736
    This in combination with _AllocatorGetClusterData will create the
9737
    correct structure needed as input for the allocator.
9738

9739
    The checks for the completeness of the opcode must have already been
9740
    done.
9741

9742
    """
9743
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9744

    
9745
    if self.disk_template in constants.DTS_NET_MIRROR:
9746
      self.required_nodes = 2
9747
    else:
9748
      self.required_nodes = 1
9749
    request = {
9750
      "name": self.name,
9751
      "disk_template": self.disk_template,
9752
      "tags": self.tags,
9753
      "os": self.os,
9754
      "vcpus": self.vcpus,
9755
      "memory": self.mem_size,
9756
      "disks": self.disks,
9757
      "disk_space_total": disk_space,
9758
      "nics": self.nics,
9759
      "required_nodes": self.required_nodes,
9760
      }
9761
    return request
9762

    
9763
  def _AddRelocateInstance(self):
9764
    """Add relocate instance data to allocator structure.
9765

9766
    This in combination with _IAllocatorGetClusterData will create the
9767
    correct structure needed as input for the allocator.
9768

9769
    The checks for the completeness of the opcode must have already been
9770
    done.
9771

9772
    """
9773
    instance = self.cfg.GetInstanceInfo(self.name)
9774
    if instance is None:
9775
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
9776
                                   " IAllocator" % self.name)
9777

    
9778
    if instance.disk_template not in constants.DTS_NET_MIRROR:
9779
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9780
                                 errors.ECODE_INVAL)
9781

    
9782
    if len(instance.secondary_nodes) != 1:
9783
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
9784
                                 errors.ECODE_STATE)
9785

    
9786
    self.required_nodes = 1
9787
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
9788
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9789

    
9790
    request = {
9791
      "name": self.name,
9792
      "disk_space_total": disk_space,
9793
      "required_nodes": self.required_nodes,
9794
      "relocate_from": self.relocate_from,
9795
      }
9796
    return request
9797

    
9798
  def _AddEvacuateNodes(self):
9799
    """Add evacuate nodes data to allocator structure.
9800

9801
    """
9802
    request = {
9803
      "evac_nodes": self.evac_nodes
9804
      }
9805
    return request
9806

    
9807
  def _BuildInputData(self, fn):
9808
    """Build input data structures.
9809

9810
    """
9811
    self._ComputeClusterData()
9812

    
9813
    request = fn()
9814
    request["type"] = self.mode
9815
    self.in_data["request"] = request
9816

    
9817
    self.in_text = serializer.Dump(self.in_data)
9818

    
9819
  def Run(self, name, validate=True, call_fn=None):
9820
    """Run an instance allocator and return the results.
9821

9822
    """
9823
    if call_fn is None:
9824
      call_fn = self.rpc.call_iallocator_runner
9825

    
9826
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9827
    result.Raise("Failure while running the iallocator script")
9828

    
9829
    self.out_text = result.payload
9830
    if validate:
9831
      self._ValidateResult()
9832

    
9833
  def _ValidateResult(self):
9834
    """Process the allocator results.
9835

9836
    This will process and if successful save the result in
9837
    self.out_data and the other parameters.
9838

9839
    """
9840
    try:
9841
      rdict = serializer.Load(self.out_text)
9842
    except Exception, err:
9843
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9844

    
9845
    if not isinstance(rdict, dict):
9846
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
9847

    
9848
    # TODO: remove backwards compatiblity in later versions
9849
    if "nodes" in rdict and "result" not in rdict:
9850
      rdict["result"] = rdict["nodes"]
9851
      del rdict["nodes"]
9852

    
9853
    for key in "success", "info", "result":
9854
      if key not in rdict:
9855
        raise errors.OpExecError("Can't parse iallocator results:"
9856
                                 " missing key '%s'" % key)
9857
      setattr(self, key, rdict[key])
9858

    
9859
    if not isinstance(rdict["result"], list):
9860
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9861
                               " is not a list")
9862
    self.out_data = rdict
9863

    
9864

    
9865
class LUTestAllocator(NoHooksLU):
9866
  """Run allocator tests.
9867

9868
  This LU runs the allocator tests
9869

9870
  """
9871
  _OP_REQP = ["direction", "mode", "name"]
9872

    
9873
  def CheckPrereq(self):
9874
    """Check prerequisites.
9875

9876
    This checks the opcode parameters depending on the director and mode test.
9877

9878
    """
9879
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9880
      for attr in ["name", "mem_size", "disks", "disk_template",
9881
                   "os", "tags", "nics", "vcpus"]:
9882
        if not hasattr(self.op, attr):
9883
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9884
                                     attr, errors.ECODE_INVAL)
9885
      iname = self.cfg.ExpandInstanceName(self.op.name)
9886
      if iname is not None:
9887
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9888
                                   iname, errors.ECODE_EXISTS)
9889
      if not isinstance(self.op.nics, list):
9890
        raise errors.OpPrereqError("Invalid parameter 'nics'",
9891
                                   errors.ECODE_INVAL)
9892
      for row in self.op.nics:
9893
        if (not isinstance(row, dict) or
9894
            "mac" not in row or
9895
            "ip" not in row or
9896
            "bridge" not in row):
9897
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
9898
                                     " parameter", errors.ECODE_INVAL)
9899
      if not isinstance(self.op.disks, list):
9900
        raise errors.OpPrereqError("Invalid parameter 'disks'",
9901
                                   errors.ECODE_INVAL)
9902
      for row in self.op.disks:
9903
        if (not isinstance(row, dict) or
9904
            "size" not in row or
9905
            not isinstance(row["size"], int) or
9906
            "mode" not in row or
9907
            row["mode"] not in ['r', 'w']):
9908
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
9909
                                     " parameter", errors.ECODE_INVAL)
9910
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9911
        self.op.hypervisor = self.cfg.GetHypervisorType()
9912
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9913
      if not hasattr(self.op, "name"):
9914
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9915
                                   errors.ECODE_INVAL)
9916
      fname = _ExpandInstanceName(self.cfg, self.op.name)
9917
      self.op.name = fname
9918
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9919
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9920
      if not hasattr(self.op, "evac_nodes"):
9921
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9922
                                   " opcode input", errors.ECODE_INVAL)
9923
    else:
9924
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9925
                                 self.op.mode, errors.ECODE_INVAL)
9926

    
9927
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9928
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
9929
        raise errors.OpPrereqError("Missing allocator name",
9930
                                   errors.ECODE_INVAL)
9931
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9932
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
9933
                                 self.op.direction, errors.ECODE_INVAL)
9934

    
9935
  def Exec(self, feedback_fn):
9936
    """Run the allocator test.
9937

9938
    """
9939
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9940
      ial = IAllocator(self.cfg, self.rpc,
9941
                       mode=self.op.mode,
9942
                       name=self.op.name,
9943
                       mem_size=self.op.mem_size,
9944
                       disks=self.op.disks,
9945
                       disk_template=self.op.disk_template,
9946
                       os=self.op.os,
9947
                       tags=self.op.tags,
9948
                       nics=self.op.nics,
9949
                       vcpus=self.op.vcpus,
9950
                       hypervisor=self.op.hypervisor,
9951
                       )
9952
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9953
      ial = IAllocator(self.cfg, self.rpc,
9954
                       mode=self.op.mode,
9955
                       name=self.op.name,
9956
                       relocate_from=list(self.relocate_from),
9957
                       )
9958
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9959
      ial = IAllocator(self.cfg, self.rpc,
9960
                       mode=self.op.mode,
9961
                       evac_nodes=self.op.evac_nodes)
9962
    else:
9963
      raise errors.ProgrammerError("Uncatched mode %s in"
9964
                                   " LUTestAllocator.Exec", self.op.mode)
9965

    
9966
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
9967
      result = ial.in_text
9968
    else:
9969
      ial.Run(self.op.allocator, validate=False)
9970
      result = ial.out_text
9971
    return result